diff --git a/bolt/include/bolt/Passes/Hugify.h b/bolt/include/bolt/Passes/Hugify.h new file mode 100644 index 0000000000000..0a7734059121c --- /dev/null +++ b/bolt/include/bolt/Passes/Hugify.h @@ -0,0 +1,29 @@ +//===- bolt/Passes/Hugify.h -------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef BOLT_PASSES_HUGIFY_H +#define BOLT_PASSES_HUGIFY_H + +#include "bolt/Passes/BinaryPasses.h" + +namespace llvm { +namespace bolt { + +class HugePage : public BinaryFunctionPass { +public: + HugePage(const cl::opt &PrintPass) : BinaryFunctionPass(PrintPass) {} + + void runOnFunctions(BinaryContext &BC) override; + + const char *getName() const override { return "HugePage"; } +}; + +} // namespace bolt +} // namespace llvm + +#endif diff --git a/bolt/include/bolt/RuntimeLibs/HugifyRuntimeLibrary.h b/bolt/include/bolt/RuntimeLibs/HugifyRuntimeLibrary.h index 4889708b13a3b..e9357d1c36153 100644 --- a/bolt/include/bolt/RuntimeLibs/HugifyRuntimeLibrary.h +++ b/bolt/include/bolt/RuntimeLibs/HugifyRuntimeLibrary.h @@ -22,13 +22,11 @@ class HugifyRuntimeLibrary : public RuntimeLibrary { public: /// Add custom section names generated by the runtime libraries to \p /// SecNames. - void addRuntimeLibSections(std::vector &SecNames) const final { - SecNames.push_back(".bolt.hugify.entries"); - } + void addRuntimeLibSections(std::vector &SecNames) const final {} void adjustCommandLineOptions(const BinaryContext &BC) const final; - void emitBinary(BinaryContext &BC, MCStreamer &Streamer) final; + void emitBinary(BinaryContext &BC, MCStreamer &Streamer) final {} void link(BinaryContext &BC, StringRef ToolPath, RuntimeDyld &RTDyld, std::function OnLoad) final; diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h b/bolt/include/bolt/Utils/CommandLineOpts.h index b7cca813bdec7..7b654f19f6d45 100644 --- a/bolt/include/bolt/Utils/CommandLineOpts.h +++ b/bolt/include/bolt/Utils/CommandLineOpts.h @@ -44,6 +44,7 @@ extern llvm::cl::opt HeatmapMinAddress; extern llvm::cl::opt HotData; extern llvm::cl::opt HotFunctionsAtEnd; extern llvm::cl::opt HotText; +extern llvm::cl::opt Hugify; extern llvm::cl::opt Instrument; extern llvm::cl::opt OutputFilename; extern llvm::cl::opt PerfData; diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp index c81dcc31f33ba..fe02af0b85a94 100644 --- a/bolt/lib/Core/BinaryEmitter.cpp +++ b/bolt/lib/Core/BinaryEmitter.cpp @@ -740,10 +740,12 @@ void BinaryEmitter::emitJumpTables(const BinaryFunction &BF) { for (auto &JTI : BF.jumpTables()) { JumpTable &JT = *JTI.second; + // Only emit shared jump tables once, when processing the first parent + if (JT.Parents.size() > 1 && JT.Parents[0] != &BF) + continue; if (opts::PrintJumpTables) JT.print(outs()); - if ((opts::JumpTables == JTS_BASIC || !BF.isSimple()) && - BC.HasRelocations) { + if (opts::JumpTables == JTS_BASIC && BC.HasRelocations) { JT.updateOriginal(); } else { MCSection *HotSection, *ColdSection; diff --git a/bolt/lib/Passes/CMakeLists.txt b/bolt/lib/Passes/CMakeLists.txt index 7f67261a4e04b..6a0638ba3aa74 100644 --- a/bolt/lib/Passes/CMakeLists.txt +++ b/bolt/lib/Passes/CMakeLists.txt @@ -15,6 +15,7 @@ add_llvm_library(LLVMBOLTPasses FrameOptimizer.cpp HFSort.cpp HFSortPlus.cpp + Hugify.cpp IdenticalCodeFolding.cpp IndirectCallPromotion.cpp Inliner.cpp diff --git a/bolt/lib/Passes/Hugify.cpp b/bolt/lib/Passes/Hugify.cpp new file mode 100644 index 0000000000000..170fb5bda349f --- /dev/null +++ b/bolt/lib/Passes/Hugify.cpp @@ -0,0 +1,50 @@ +//===--- bolt/Passes/Hugify.cpp -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "bolt/Passes/Hugify.h" +#include "llvm/Support/CommandLine.h" + +#define DEBUG_TYPE "bolt-hugify" + +using namespace llvm; + +namespace llvm { +namespace bolt { + +void HugePage::runOnFunctions(BinaryContext &BC) { + auto *RtLibrary = BC.getRuntimeLibrary(); + if (!RtLibrary || !BC.isELF() || !BC.StartFunctionAddress) { + return; + } + + auto createSimpleFunction = + [&](std::string Title, std::vector Instrs) -> BinaryFunction * { + BinaryFunction *Func = BC.createInjectedBinaryFunction(Title); + + std::vector> BBs; + BBs.emplace_back(Func->createBasicBlock(nullptr)); + BBs.back()->addInstructions(Instrs.begin(), Instrs.end()); + BBs.back()->setCFIState(0); + BBs.back()->setOffset(BinaryBasicBlock::INVALID_OFFSET); + + Func->insertBasicBlocks(nullptr, std::move(BBs), + /*UpdateLayout=*/true, + /*UpdateCFIState=*/false); + Func->updateState(BinaryFunction::State::CFG_Finalized); + return Func; + }; + + const BinaryFunction *const Start = + BC.getBinaryFunctionAtAddress(*BC.StartFunctionAddress); + assert(Start && "Entry point function not found"); + const MCSymbol *StartSym = Start->getSymbol(); + createSimpleFunction("__bolt_hugify_start_program", + BC.MIB->createSymbolTrampoline(StartSym, BC.Ctx.get())); +} +} // namespace bolt +} // namespace llvm \ No newline at end of file diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp index 85b49a29c8079..0b17dd54ece58 100644 --- a/bolt/lib/Rewrite/BinaryPassManager.cpp +++ b/bolt/lib/Rewrite/BinaryPassManager.cpp @@ -13,6 +13,7 @@ #include "bolt/Passes/AsmDump.h" #include "bolt/Passes/CMOVConversion.h" #include "bolt/Passes/FrameOptimizer.h" +#include "bolt/Passes/Hugify.h" #include "bolt/Passes/IdenticalCodeFolding.h" #include "bolt/Passes/IndirectCallPromotion.h" #include "bolt/Passes/Inliner.h" @@ -333,6 +334,8 @@ void BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { if (opts::Instrument) Manager.registerPass(std::make_unique(NeverPrint)); + else if (opts::Hugify) + Manager.registerPass(std::make_unique(NeverPrint)); Manager.registerPass(std::make_unique(NeverPrint)); diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 6ea4ba603698c..b0403da636011 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -479,6 +479,11 @@ Error RewriteInstance::discoverStorage() { NextAvailableAddress = alignTo(NextAvailableAddress, BC->PageAlign); NextAvailableOffset = alignTo(NextAvailableOffset, BC->PageAlign); + // Hugify: Additional huge page from left side due to + // weird ASLR mapping addresses (4KB aligned) + if (opts::Hugify && !BC->HasFixedLoadAddress) + NextAvailableAddress += BC->PageAlign; + if (!opts::UseGnuStack) { // This is where the black magic happens. Creating PHDR table in a segment // other than that containing ELF header is tricky. Some loaders and/or @@ -3719,6 +3724,12 @@ void RewriteInstance::mapCodeSections(RuntimeDyld &RTDyld) { Address = alignTo(Address, Section->getAlignment()); Section->setOutputAddress(Address); Address += Section->getOutputSize(); + + // Hugify: Additional huge page from right side due to + // weird ASLR mapping addresses (4KB aligned) + if (opts::Hugify && !BC->HasFixedLoadAddress && + Section->getName() == BC->getMainCodeSectionName()) + Address = alignTo(Address, Section->getAlignment()); } // Make sure we allocate enough space for huge pages. diff --git a/bolt/lib/RuntimeLibs/HugifyRuntimeLibrary.cpp b/bolt/lib/RuntimeLibs/HugifyRuntimeLibrary.cpp index 9a4a1f7239e54..802bb0d1fe914 100644 --- a/bolt/lib/RuntimeLibs/HugifyRuntimeLibrary.cpp +++ b/bolt/lib/RuntimeLibs/HugifyRuntimeLibrary.cpp @@ -60,35 +60,6 @@ void HugifyRuntimeLibrary::adjustCommandLineOptions( } } -void HugifyRuntimeLibrary::emitBinary(BinaryContext &BC, MCStreamer &Streamer) { - const BinaryFunction *StartFunction = - BC.getBinaryFunctionAtAddress(*(BC.StartFunctionAddress)); - assert(!StartFunction->isFragment() && "expected main function fragment"); - if (!StartFunction) { - errs() << "BOLT-ERROR: failed to locate function at binary start address\n"; - exit(1); - } - - const auto Flags = BinarySection::getFlags(/*IsReadOnly=*/false, - /*IsText=*/false, - /*IsAllocatable=*/true); - MCSectionELF *Section = - BC.Ctx->getELFSection(".bolt.hugify.entries", ELF::SHT_PROGBITS, Flags); - - // __bolt_hugify_init_ptr stores the poiter the hugify library needs to - // jump to after finishing the init code. - MCSymbol *InitPtr = BC.Ctx->getOrCreateSymbol("__bolt_hugify_init_ptr"); - - Section->setAlignment(llvm::Align(BC.RegularPageSize)); - Streamer.switchSection(Section); - - Streamer.emitLabel(InitPtr); - Streamer.emitSymbolAttribute(InitPtr, MCSymbolAttr::MCSA_Global); - Streamer.emitValue( - MCSymbolRefExpr::create(StartFunction->getSymbol(), *(BC.Ctx)), - /*Size=*/8); -} - void HugifyRuntimeLibrary::link(BinaryContext &BC, StringRef ToolPath, RuntimeDyld &RTDyld, std::function OnLoad) { diff --git a/bolt/runtime/CMakeLists.txt b/bolt/runtime/CMakeLists.txt index f3cea8d2dc08c..c685819aec821 100644 --- a/bolt/runtime/CMakeLists.txt +++ b/bolt/runtime/CMakeLists.txt @@ -27,10 +27,11 @@ set(BOLT_RT_FLAGS -fno-exceptions -fno-rtti -fno-stack-protector - -mno-sse) + -mno-sse + -fPIE) # Don't let the compiler think it can create calls to standard libs -target_compile_options(bolt_rt_instr PRIVATE ${BOLT_RT_FLAGS} -fPIE) +target_compile_options(bolt_rt_instr PRIVATE ${BOLT_RT_FLAGS}) target_include_directories(bolt_rt_instr PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) target_compile_options(bolt_rt_hugify PRIVATE ${BOLT_RT_FLAGS}) target_include_directories(bolt_rt_hugify PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/bolt/runtime/common.h b/bolt/runtime/common.h index 008dbb6c3de86..58b8114713d69 100644 --- a/bolt/runtime/common.h +++ b/bolt/runtime/common.h @@ -283,6 +283,22 @@ uint32_t strLen(const char *Str) { return Size; } +void *strStr(const char *const Haystack, const char *const Needle) { + int j = 0; + + for (int i = 0; i < strLen(Haystack); i++) { + if (Haystack[i] == Needle[0]) { + for (j = 1; j < strLen(Needle); j++) { + if (Haystack[i + j] != Needle[j]) + break; + } + if (j == strLen(Needle)) + return (void *)&Haystack[i]; + } + } + return nullptr; +} + void reportNumber(const char *Msg, uint64_t Num, uint32_t Base) { char Buf[BufSize]; char *Ptr = Buf; @@ -310,6 +326,25 @@ unsigned long hexToLong(const char *Str, char Terminator = '\0') { return Res; } +/// Starting from character at \p buf, find the longest consecutive sequence +/// of digits (0-9) and convert it to uint32_t. The converted value +/// is put into \p ret. \p end marks the end of the buffer to avoid buffer +/// overflow. The function \returns whether a valid uint32_t value is found. +/// \p buf will be updated to the next character right after the digits. +static bool scanUInt32(const char *&Buf, const char *End, uint32_t &Ret) { + uint64_t Result = 0; + const char *OldBuf = Buf; + while (Buf < End && ((*Buf) >= '0' && (*Buf) <= '9')) { + Result = Result * 10 + (*Buf) - '0'; + ++Buf; + } + if (OldBuf != Buf && Result <= 0xFFFFFFFFu) { + Ret = static_cast(Result); + return true; + } + return false; +} + #if !defined(__APPLE__) // We use a stack-allocated buffer for string manipulation in many pieces of // this code, including the code that prints each line of the fdata file. This @@ -387,6 +422,28 @@ int __madvise(void *addr, size_t length, int advice) { return ret; } +#define _UTSNAME_LENGTH 65 + +struct UtsNameTy { + char sysname[_UTSNAME_LENGTH]; /* Operating system name (e.g., "Linux") */ + char nodename[_UTSNAME_LENGTH]; /* Name within "some implementation-defined + network" */ + char release[_UTSNAME_LENGTH]; /* Operating system release (e.g., "2.6.28") */ + char version[_UTSNAME_LENGTH]; /* Operating system version */ + char machine[_UTSNAME_LENGTH]; /* Hardware identifier */ + char domainname[_UTSNAME_LENGTH]; /* NIS or YP domain name */ +}; + +int __uname(struct UtsNameTy *Buf) { + int Ret; + __asm__ __volatile__("movq $63, %%rax\n" + "syscall\n" + : "=a"(Ret) + : "D"(Buf) + : "cc", "rcx", "r11", "memory"); + return Ret; +} + struct timespec { uint64_t tv_sec; /* seconds */ uint64_t tv_nsec; /* nanoseconds */ @@ -482,6 +539,23 @@ int __fsync(int fd) { return ret; } +// %rdi %rsi %rdx %r10 %r8 +// sys_prctl int option unsigned unsigned unsigned unsigned +// long arg2 long arg3 long arg4 long arg5 +int __prctl(int Option, unsigned long Arg2, unsigned long Arg3, + unsigned long Arg4, unsigned long Arg5) { + int Ret; + register long rdx asm("rdx") = Arg3; + register long r8 asm("r8") = Arg5; + register long r10 asm("r10") = Arg4; + __asm__ __volatile__("movq $157, %%rax\n" + "syscall\n" + : "=a"(Ret) + : "D"(Option), "S"(Arg2), "d"(rdx), "r"(r10), "r"(r8) + :); + return Ret; +} + #endif void reportError(const char *Msg, uint64_t Size) { diff --git a/bolt/runtime/hugify.cpp b/bolt/runtime/hugify.cpp index 69e1a7e0694a8..05c1be4f2d70c 100644 --- a/bolt/runtime/hugify.cpp +++ b/bolt/runtime/hugify.cpp @@ -1,129 +1,177 @@ -//===- bolt/runtime/hugify.cpp --------------------------------------------===// +//===- bolt/runtime/hugify.cpp -------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -//===----------------------------------------------------------------------===// +//===---------------------------------------------------------------------===// -#if defined (__x86_64__) -#if !defined(__APPLE__) +#if defined (__x86_64__) && !defined(__APPLE__) #include "common.h" -#include + +#pragma GCC visibility push(hidden) // Enables a very verbose logging to stderr useful when debugging -//#define ENABLE_DEBUG +// #define ENABLE_DEBUG + +#ifdef ENABLE_DEBUG +#define DEBUG(X) \ + { X; } +#else +#define DEBUG(X) \ + {} +#endif -// Function pointers to init routines in the binary, so we can resume -// regular execution of the function that we hooked. -extern void (*__bolt_hugify_init_ptr)(); +// Function constains trampoline to _start, +// so we can resume regular execution of the function that we hooked. +extern void __bolt_hugify_start_program(); // The __hot_start and __hot_end symbols set by Bolt. We use them to figure // out the rage for marking huge pages. extern uint64_t __hot_start; extern uint64_t __hot_end; -#ifdef MADV_HUGEPAGE +static void getKernelVersion(uint32_t *Val) { + // release should be in the format: %d.%d.%d + // major, minor, release + struct UtsNameTy UtsName; + int Ret = __uname(&UtsName); + const char *Buf = UtsName.release; + const char *End = Buf + strLen(Buf); + const char Delims[2][2] = {".", "."}; + + for (int i = 0; i < 3; ++i) { + if (!scanUInt32(Buf, End, Val[i])) { + return; + } + if (i < sizeof(Delims) / sizeof(Delims[0])) { + const char *Ptr = Delims[i]; + while (*Ptr != '\0') { + if (*Ptr != *Buf) { + return; + } + ++Ptr; + ++Buf; + } + } + } +} + /// Check whether the kernel supports THP via corresponding sysfs entry. -static bool has_pagecache_thp_support() { - char buf[256] = {0}; - const char *madviseStr = "always [madvise] never"; +/// thp works only starting from 5.10 +static bool hasPagecacheTHPSupport() { + char Buf[64]; - int fd = __open("/sys/kernel/mm/transparent_hugepage/enabled", + int FD = __open("/sys/kernel/mm/transparent_hugepage/enabled", 0 /* O_RDONLY */, 0); - if (fd < 0) + if (FD < 0) + return false; + + memset(Buf, 0, sizeof(Buf)); + const size_t Res = __read(FD, Buf, sizeof(Buf)); + if (Res < 0) return false; - size_t res = __read(fd, buf, 256); - if (res < 0) + if (!strStr(Buf, "[always]") && !strStr(Buf, "[madvise]")) return false; - int cmp = strnCmp(buf, madviseStr, strLen(madviseStr)); - return cmp == 0; + struct KernelVersionTy { + uint32_t major; + uint32_t minor; + uint32_t release; + }; + + KernelVersionTy KernelVersion; + + getKernelVersion((uint32_t *)&KernelVersion); + if (KernelVersion.major >= 5 && KernelVersion.minor >= 10) + return true; + + return false; } -static void hugify_for_old_kernel(uint8_t *from, uint8_t *to) { - size_t size = to - from; +static void hugifyForOldKernel(uint8_t *From, uint8_t *To) { + const size_t Size = To - From; - uint8_t *mem = reinterpret_cast( - __mmap(0, size, 0x3 /* PROT_READ | PROT_WRITE*/, - 0x22 /* MAP_PRIVATE | MAP_ANONYMOUS*/, -1, 0)); + uint8_t *Mem = reinterpret_cast( + __mmap(0, Size, 0x3 /* PROT_READ | PROT_WRITE */, + 0x22 /* MAP_PRIVATE | MAP_ANONYMOUS */, -1, 0)); - if (mem == (void *)MAP_FAILED) { - char msg[] = "Could not allocate memory for text move\n"; - reportError(msg, sizeof(msg)); + if (Mem == ((void *)-1) /* MAP_FAILED */) { + char Msg[] = "[hugify] could not allocate memory for text move\n"; + reportError(Msg, sizeof(Msg)); } -#ifdef ENABLE_DEBUG - reportNumber("Allocated temporary space: ", (uint64_t)mem, 16); -#endif - // Copy the hot code to a temproary location. - memcpy(mem, from, size); + DEBUG(reportNumber("[hugify] allocated temporary address: ", (uint64_t)Mem, + 16);) + DEBUG(reportNumber("[hugify] allocated size: ", (uint64_t)Size, 16);) + + // Copy the hot code to a temporary location. + memcpy(Mem, From, Size); + __prctl(41 /* PR_SET_THP_DISABLE */, 0, 0, 0, 0); // Maps out the existing hot code. - if (__mmap(reinterpret_cast(from), size, - PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, - 0) == (void *)MAP_FAILED) { - char msg[] = "failed to mmap memory for large page move terminating\n"; - reportError(msg, sizeof(msg)); + if (__mmap(reinterpret_cast(From), Size, + 0x3 /* PROT_READ | PROT_WRITE */, + 0x32 /* MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE */, -1, + 0) == ((void *)-1) /*MAP_FAILED*/) { + char Msg[] = + "[hugify] failed to mmap memory for large page move terminating\n"; + reportError(Msg, sizeof(Msg)); } // Mark the hot code page to be huge page. - if (__madvise(from, size, MADV_HUGEPAGE) == -1) { - char msg[] = "failed to allocate large page\n"; - reportError(msg, sizeof(msg)); + if (__madvise(From, Size, 14 /* MADV_HUGEPAGE */) == -1) { + char Msg[] = "[hugify] setting MADV_HUGEPAGE is failed\n"; + reportError(Msg, sizeof(Msg)); } // Copy the hot code back. - memcpy(from, mem, size); + memcpy(From, Mem, Size); // Change permission back to read-only, ignore failure - __mprotect(from, size, PROT_READ | PROT_EXEC); + __mprotect(From, Size, 0x5 /* PROT_READ | PROT_EXEC */); - __munmap(mem, size); + __munmap(Mem, Size); } -#endif extern "C" void __bolt_hugify_self_impl() { -#ifdef MADV_HUGEPAGE - uint8_t *hotStart = (uint8_t *)&__hot_start; - uint8_t *hotEnd = (uint8_t *)&__hot_end; + uint8_t *HotStart = (uint8_t *)&__hot_start; + uint8_t *HotEnd = (uint8_t *)&__hot_end; // Make sure the start and end are aligned with huge page address - const size_t hugePageBytes = 2L * 1024 * 1024; - uint8_t *from = hotStart - ((intptr_t)hotStart & (hugePageBytes - 1)); - uint8_t *to = hotEnd + (hugePageBytes - 1); - to -= (intptr_t)to & (hugePageBytes - 1); - -#ifdef ENABLE_DEBUG - reportNumber("[hugify] hot start: ", (uint64_t)hotStart, 16); - reportNumber("[hugify] hot end: ", (uint64_t)hotEnd, 16); - reportNumber("[hugify] aligned huge page from: ", (uint64_t)from, 16); - reportNumber("[hugify] aligned huge page to: ", (uint64_t)to, 16); -#endif - - if (!has_pagecache_thp_support()) { - hugify_for_old_kernel(from, to); + const size_t HugePageBytes = 2L * 1024 * 1024; + uint8_t *From = HotStart - ((intptr_t)HotStart & (HugePageBytes - 1)); + uint8_t *To = HotEnd + (HugePageBytes - 1); + To -= (intptr_t)To & (HugePageBytes - 1); + + DEBUG(reportNumber("[hugify] hot start: ", (uint64_t)HotStart, 16);) + DEBUG(reportNumber("[hugify] hot end: ", (uint64_t)HotEnd, 16);) + DEBUG(reportNumber("[hugify] aligned huge page from: ", (uint64_t)From, 16);) + DEBUG(reportNumber("[hugify] aligned huge page to: ", (uint64_t)To, 16);) + + if (!hasPagecacheTHPSupport()) { + DEBUG(report( + "[hugify] workaround with memory alignment for kernel < 5.10\n");) + hugifyForOldKernel(From, To); return; } - if (__madvise(from, (to - from), MADV_HUGEPAGE) == -1) { - char msg[] = "failed to allocate large page\n"; + if (__madvise(From, (To - From), 14 /* MADV_HUGEPAGE */) == -1) { + char Msg[] = "[hugify] failed to allocate large page\n"; // TODO: allow user to control the failure behavior. - reportError(msg, sizeof(msg)); + reportError(Msg, sizeof(Msg)); } -#endif } /// This is hooking ELF's entry, it needs to save all machine state. extern "C" __attribute((naked)) void __bolt_hugify_self() { - __asm__ __volatile__(SAVE_ALL - "call __bolt_hugify_self_impl\n" - RESTORE_ALL - "jmp *__bolt_hugify_init_ptr(%%rip)\n" - :::); -} - +#if defined(__x86_64__) + __asm__ __volatile__(SAVE_ALL "call __bolt_hugify_self_impl\n" RESTORE_ALL + "jmp __bolt_hugify_start_program\n" :: + :); +#else + exit(1); #endif +} #endif diff --git a/bolt/test/X86/Inputs/jt-pic-linkerscript.ld b/bolt/test/X86/Inputs/jt-pic-linkerscript.ld new file mode 100644 index 0000000000000..c32ffd695682c --- /dev/null +++ b/bolt/test/X86/Inputs/jt-pic-linkerscript.ld @@ -0,0 +1,10 @@ +# Linker script used by jump-table-pic-conflict.s test. +# .rodata needs to appear before .text + +SECTIONS +{ + . = 0x201120; + .rodata : { *(.rodata) } + .eh_frame : { *(.eh_frame) } + .text : { *(.text) } +} diff --git a/bolt/test/X86/jump-table-pic-conflict.s b/bolt/test/X86/jump-table-pic-conflict.s new file mode 100644 index 0000000000000..ed3c77d49b6cc --- /dev/null +++ b/bolt/test/X86/jump-table-pic-conflict.s @@ -0,0 +1,132 @@ +# Check cases when the first PIC jump table entries of one function can be +# interpreted as valid last entries of the previous function. + +# Conditions to trigger the bug: Function A and B have jump tables that +# are adjacent in memory. We run in lite relocation mode. Function B +# is not disassembled because it does not have profile. Function A +# triggers a special conditional that forced BOLT to rewrite its jump +# table in-place (instead of moving it) because it is marked as +# non-simple (in this case, containing unknown control flow). The +# first entry of B's jump table (a PIC offset) happens to be a valid +# address inside A when added to A's jump table base address. In this +# case, BOLT could overwrite B's jump table, corrupting it, thinking +# the first entry of it is actually part of A's jump table. + +# REQUIRES: system-linux + +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \ +# RUN: %s -o %t.o +# RUN: link_fdata %s %t.o %t.fdata +# RUN: llvm-strip --strip-unneeded %t.o +# RUN: ld.lld %t.o -o %t.exe -q -T %S/Inputs/jt-pic-linkerscript.ld +# RUN: llvm-bolt %t.exe -relocs -o %t.out -data %t.fdata \ +# RUN: -lite=1 +# RUN: llvm-readelf -S %t.out | FileCheck --check-prefix=CHECK %s +# The output binary is runnable, but we check for test success with +# readelf. This is another way to check this bug: +# COM: %t.out + +# BOLT needs to create a new rodata section, indicating that it +# successfully moved the jump table in _start. +# CHECK: [{{.*}}] .bolt.org.rodata + + .globl _start + .type _start, %function +_start: + .cfi_startproc +# FDATA: 0 [unknown] 0 1 _start 0 0 1 + push %rbp + mov %rsp, %rbp + mov 0x8(%rbp), %rdi + cmpq $3, %rdi + ja .L5 + jmp .L6 +# Unreachable code, here to mark this function as non-simple +# (containing unknown control flow) with a stray indirect jmp + jmp *%rax +.L6: + decq %rdi + leaq .LJT1(%rip), %rcx + movslq (%rcx, %rdi, 4), %rax + addq %rcx, %rax + jmp *%rax +.L1: + leaq str1(%rip), %rsi + jmp .L4 +.L2: + leaq str2(%rip), %rsi + jmp .L4 +.L3: + leaq str3(%rip), %rsi + jmp .L4 +.L5: + leaq str4(%rip), %rsi +.L4: + movq $1, %rdi + movq $10, %rdx + movq $1, %rax + syscall + mov 0x8(%rbp), %rdi + decq %rdi + callq func_b + movq %rax, %rdi + movq $231, %rax + syscall + pop %rbp + ret + .cfi_endproc + .size _start, .-_start + + .globl func_b + .type func_b, %function +func_b: + .cfi_startproc + push %rbp + mov %rsp, %rbp + cmpq $3, %rdi + ja .L2_6 +# FT + leaq .LJT2(%rip), %rcx + movslq (%rcx, %rdi, 4), %rax + addq %rcx, %rax + jmp *%rax +.L2_1: + movq $0, %rax + jmp .L2_5 +.L2_2: + movq $1, %rax + jmp .L2_5 +.L2_3: + movq $2, %rax + jmp .L2_5 +.L2_4: + movq $3, %rax + jmp .L2_5 +.L2_6: + movq $-1, %rax +.L2_5: + popq %rbp + ret + .cfi_endproc + .size func_b, .-func_b + + .rodata +str1: .asciz "Message 1\n" +str2: .asciz "Message 2\n" +str3: .asciz "Message 3\n" +str4: .asciz "Highrange\n" +# Special case where the first .LJT2 entry is a valid offset of +# _start when interpreted with .LJT1 as a base address. +.LJT1: + .long .L1-.LJT1 + .long .L2-.LJT1 + .long .L3-.LJT1 + .long .L3-.LJT1 + .long .L3-.LJT1 + .long .L3-.LJT1 + .long .L3-.LJT1 +.LJT2: + .long .L2_1-.LJT2 + .long .L2_2-.LJT2 + .long .L2_3-.LJT2 + .long .L2_4-.LJT2 diff --git a/bolt/test/runtime/X86/hugify.c b/bolt/test/runtime/X86/hugify.c new file mode 100644 index 0000000000000..cfc0cb62652b9 --- /dev/null +++ b/bolt/test/runtime/X86/hugify.c @@ -0,0 +1,27 @@ +// Make sure BOLT correctly processes --hugify option + +#include + +int main(int argc, char **argv) { + printf("Hello world\n"); + return 0; +} + +/* +REQUIRES: system-linux,bolt-runtime + +RUN: %clang %cflags -no-pie %s -o %t.nopie.exe -Wl,-q +RUN: %clang %cflags -fpic -pie %s -o %t.pie.exe -Wl,-q + +RUN: llvm-bolt %t.nopie.exe --lite=0 -o %t.nopie --hugify +RUN: llvm-bolt %t.pie.exe --lite=0 -o %t.pie --hugify + +RUN: %t.nopie | FileCheck %s -check-prefix=CHECK-NOPIE + +CHECK-NOPIE: Hello world + +RUN: %t.pie | FileCheck %s -check-prefix=CHECK-PIE + +CHECK-PIE: Hello world + +*/ diff --git a/clang-tools-extra/clang-tidy/tool/ClangTidyMain.cpp b/clang-tools-extra/clang-tidy/tool/ClangTidyMain.cpp index 67d8ccbd6cad4..f089abf69dce6 100644 --- a/clang-tools-extra/clang-tidy/tool/ClangTidyMain.cpp +++ b/clang-tools-extra/clang-tidy/tool/ClangTidyMain.cpp @@ -519,9 +519,9 @@ int clangTidyMain(int argc, const char **argv) { std::vector RawOptions = OptionsProvider->getRawOptions(FilePath); for (const std::string &Check : EnabledChecks) { - for (auto It = RawOptions.rbegin(); It != RawOptions.rend(); ++It) { - if (It->first.Checks && GlobList(*It->first.Checks).contains(Check)) { - llvm::outs() << "'" << Check << "' is enabled in the " << It->second + for (const auto &[Opts, Source] : llvm::reverse(RawOptions)) { + if (Opts.Checks && GlobList(*Opts.Checks).contains(Check)) { + llvm::outs() << "'" << Check << "' is enabled in the " << Source << ".\n"; break; } @@ -557,20 +557,16 @@ int clangTidyMain(int argc, const char **argv) { NamesAndOptions Valid = getAllChecksAndOptions(AllowEnablingAnalyzerAlphaCheckers); bool AnyInvalid = false; - for (const std::pair &OptionWithSource : - RawOptions) { - const ClangTidyOptions &Opts = OptionWithSource.first; + for (const auto &[Opts, Source] : RawOptions) { if (Opts.Checks) - AnyInvalid |= - verifyChecks(Valid.Names, *Opts.Checks, OptionWithSource.second); + AnyInvalid |= verifyChecks(Valid.Names, *Opts.Checks, Source); for (auto Key : Opts.CheckOptions.keys()) { if (Valid.Options.contains(Key)) continue; AnyInvalid = true; - auto &Output = - llvm::WithColor::warning(llvm::errs(), OptionWithSource.second) - << "unknown check option '" << Key << '\''; + auto &Output = llvm::WithColor::warning(llvm::errs(), Source) + << "unknown check option '" << Key << '\''; llvm::StringRef Closest = closest(Key, Valid.Options); if (!Closest.empty()) Output << "; did you mean '" << Closest << '\''; diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt index df8ad666e2da0..dec115a64a59e 100644 --- a/clang-tools-extra/clangd/CMakeLists.txt +++ b/clang-tools-extra/clangd/CMakeLists.txt @@ -97,7 +97,7 @@ add_clang_library(clangDaemon SemanticHighlighting.cpp SemanticSelection.cpp SourceCode.cpp - QueryDriverDatabase.cpp + SystemIncludeExtractor.cpp TidyProvider.cpp TUScheduler.cpp URI.cpp diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp index 8b94d0d84fd4c..01cd178c5b35c 100644 --- a/clang-tools-extra/clangd/ClangdLSPServer.cpp +++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp @@ -502,14 +502,14 @@ void ClangdLSPServer::onInitialize(const InitializeParams &Params, CDBOpts.ContextProvider = Opts.ContextProvider; BaseCDB = std::make_unique(CDBOpts); - BaseCDB = getQueryDriverDatabase(llvm::makeArrayRef(Opts.QueryDriverGlobs), - std::move(BaseCDB)); } auto Mangler = CommandMangler::detect(); + Mangler.SystemIncludeExtractor = + getSystemIncludeExtractor(llvm::makeArrayRef(Opts.QueryDriverGlobs)); if (Opts.ResourceDir) Mangler.ResourceDir = *Opts.ResourceDir; CDB.emplace(BaseCDB.get(), Params.initializationOptions.fallbackFlags, - tooling::ArgumentsAdjuster(std::move(Mangler))); + std::move(Mangler)); { // Switch caller's context with LSPServer's background context. Since we // rather want to propagate information from LSPServer's context into the @@ -1815,5 +1815,6 @@ void ClangdLSPServer::onSemanticsMaybeChanged(PathRef File) { }); } } + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/CodeComplete.cpp b/clang-tools-extra/clangd/CodeComplete.cpp index 5612fc599fb50..e52cb2643babd 100644 --- a/clang-tools-extra/clangd/CodeComplete.cpp +++ b/clang-tools-extra/clangd/CodeComplete.cpp @@ -2123,6 +2123,9 @@ bool isIndexedForCodeCompletion(const NamedDecl &ND, ASTContext &ASTCtx) { }; return false; }; + auto InClassScope = [](const NamedDecl &ND) { + return ND.getDeclContext()->getDeclKind() == Decl::CXXRecord; + }; // We only complete symbol's name, which is the same as the name of the // *primary* template in case of template specializations. if (isExplicitTemplateSpecialization(&ND)) @@ -2138,8 +2141,11 @@ bool isIndexedForCodeCompletion(const NamedDecl &ND, ASTContext &ASTCtx) { if (InTopLevelScope(ND)) return true; + // Always index enum constants, even if they're not in the top level scope: + // when + // --all-scopes-completion is set, we'll want to complete those as well. if (const auto *EnumDecl = dyn_cast(ND.getDeclContext())) - return InTopLevelScope(*EnumDecl) && !EnumDecl->isScoped(); + return (InTopLevelScope(*EnumDecl) || InClassScope(*EnumDecl)); return false; } diff --git a/clang-tools-extra/clangd/CodeComplete.h b/clang-tools-extra/clangd/CodeComplete.h index 269be8944df17..19ef4c17d3b0f 100644 --- a/clang-tools-extra/clangd/CodeComplete.h +++ b/clang-tools-extra/clangd/CodeComplete.h @@ -291,7 +291,7 @@ SignatureHelp signatureHelp(PathRef FileName, Position Pos, // For index-based completion, we only consider: // * symbols in namespaces or translation unit scopes (e.g. no class // members, no locals) -// * enum constants in unscoped enum decl (e.g. "red" in "enum {red};") +// * enum constants (both scoped and unscoped) // * primary templates (no specializations) // For the other cases, we let Clang do the completion because it does not // need any non-local information and it will be much better at following diff --git a/clang-tools-extra/clangd/CompileCommands.cpp b/clang-tools-extra/clangd/CompileCommands.cpp index 02acc92265ffa..e84eb0aa30328 100644 --- a/clang-tools-extra/clangd/CompileCommands.cpp +++ b/clang-tools-extra/clangd/CompileCommands.cpp @@ -13,7 +13,6 @@ #include "clang/Driver/Driver.h" #include "clang/Driver/Options.h" #include "clang/Frontend/CompilerInvocation.h" -#include "clang/Tooling/ArgumentsAdjusters.h" #include "clang/Tooling/CompilationDatabase.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" @@ -195,8 +194,9 @@ CommandMangler CommandMangler::detect() { CommandMangler CommandMangler::forTests() { return CommandMangler(); } -void CommandMangler::adjust(std::vector &Cmd, - llvm::StringRef File) const { +void CommandMangler::operator()(tooling::CompileCommand &Command, + llvm::StringRef File) const { + std::vector &Cmd = Command.CommandLine; trace::Span S("AdjustCompileFlags"); // Most of the modifications below assumes the Cmd starts with a driver name. // We might consider injecting a generic driver name like "cc" or "c++", but @@ -301,6 +301,17 @@ void CommandMangler::adjust(std::vector &Cmd, for (auto &Edit : Config::current().CompileFlags.Edits) Edit(Cmd); + // The system include extractor needs to run: + // - AFTER transferCompileCommand(), because the -x flag it adds may be + // necessary for the system include extractor to identify the file type + // - AFTER applying CompileFlags.Edits, because the name of the compiler + // that needs to be invoked may come from the CompileFlags->Compiler key + // - BEFORE resolveDriver() because that can mess up the driver path, + // e.g. changing gcc to /path/to/clang/bin/gcc + if (SystemIncludeExtractor) { + SystemIncludeExtractor(Command, File); + } + // Check whether the flag exists, either as -flag or -flag=* auto Has = [&](llvm::StringRef Flag) { for (llvm::StringRef Arg : Cmd) { @@ -340,16 +351,6 @@ void CommandMangler::adjust(std::vector &Cmd, } } -CommandMangler::operator clang::tooling::ArgumentsAdjuster() && { - // ArgumentsAdjuster is a std::function and so must be copyable. - return [Mangler = std::make_shared(std::move(*this))]( - const std::vector &Args, llvm::StringRef File) { - auto Result = Args; - Mangler->adjust(Result, File); - return Result; - }; -} - // ArgStripper implementation namespace { diff --git a/clang-tools-extra/clangd/CompileCommands.h b/clang-tools-extra/clangd/CompileCommands.h index 1cf30b7ae55d7..3cf41afd4ccf1 100644 --- a/clang-tools-extra/clangd/CompileCommands.h +++ b/clang-tools-extra/clangd/CompileCommands.h @@ -8,8 +8,8 @@ #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_COMPILECOMMANDS_H #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_COMPILECOMMANDS_H +#include "GlobalCompilationDatabase.h" #include "support/Threading.h" -#include "clang/Tooling/ArgumentsAdjusters.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include @@ -32,6 +32,7 @@ struct CommandMangler { llvm::Optional ResourceDir; // Root for searching for standard library (passed to -isysroot). llvm::Optional Sysroot; + SystemIncludeExtractorFn SystemIncludeExtractor; // A command-mangler that doesn't know anything about the system. // This is hermetic for unit-tests, but won't work well in production. @@ -42,11 +43,14 @@ struct CommandMangler { // - on mac, find clang and isysroot by querying the `xcrun` launcher static CommandMangler detect(); - void adjust(std::vector &Cmd, llvm::StringRef File) const; - explicit operator clang::tooling::ArgumentsAdjuster() &&; + // `Cmd` may describe compilation of a different file, and will be updated + // for parsing `TargetFile`. + void operator()(tooling::CompileCommand &Cmd, + llvm::StringRef TargetFile) const; private: CommandMangler() = default; + Memoize> ResolvedDrivers; Memoize> ResolvedDriversNoFollow; }; diff --git a/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp b/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp index 824a7027b4d89..c1c4897430d9d 100644 --- a/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp +++ b/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp @@ -740,8 +740,8 @@ DirectoryBasedGlobalCompilationDatabase::getProjectInfo(PathRef File) const { OverlayCDB::OverlayCDB(const GlobalCompilationDatabase *Base, std::vector FallbackFlags, - tooling::ArgumentsAdjuster Adjuster) - : DelegatingCDB(Base), ArgsAdjuster(std::move(Adjuster)), + CommandMangler Mangler) + : DelegatingCDB(Base), Mangler(std::move(Mangler)), FallbackFlags(std::move(FallbackFlags)) {} llvm::Optional @@ -757,8 +757,8 @@ OverlayCDB::getCompileCommand(PathRef File) const { Cmd = DelegatingCDB::getCompileCommand(File); if (!Cmd) return llvm::None; - if (ArgsAdjuster) - Cmd->CommandLine = ArgsAdjuster(Cmd->CommandLine, File); + if (Mangler) + Mangler(*Cmd, File); return Cmd; } @@ -767,8 +767,8 @@ tooling::CompileCommand OverlayCDB::getFallbackCommand(PathRef File) const { std::lock_guard Lock(Mutex); Cmd.CommandLine.insert(Cmd.CommandLine.end(), FallbackFlags.begin(), FallbackFlags.end()); - if (ArgsAdjuster) - Cmd.CommandLine = ArgsAdjuster(Cmd.CommandLine, File); + if (Mangler) + Mangler(Cmd, File); return Cmd; } diff --git a/clang-tools-extra/clangd/GlobalCompilationDatabase.h b/clang-tools-extra/clangd/GlobalCompilationDatabase.h index e71e4368f06b3..c0d751f82f9bb 100644 --- a/clang-tools-extra/clangd/GlobalCompilationDatabase.h +++ b/clang-tools-extra/clangd/GlobalCompilationDatabase.h @@ -15,6 +15,7 @@ #include "support/ThreadsafeFS.h" #include "clang/Tooling/ArgumentsAdjusters.h" #include "clang/Tooling/CompilationDatabase.h" +#include "llvm/ADT/FunctionExtras.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringMap.h" #include @@ -161,22 +162,28 @@ class DirectoryBasedGlobalCompilationDatabase }; /// Extracts system include search path from drivers matching QueryDriverGlobs -/// and adds them to the compile flags. Base may not be nullptr. -/// Returns Base when \p QueryDriverGlobs is empty. -std::unique_ptr -getQueryDriverDatabase(llvm::ArrayRef QueryDriverGlobs, - std::unique_ptr Base); +/// and adds them to the compile flags. +/// Returns null when \p QueryDriverGlobs is empty. +using SystemIncludeExtractorFn = llvm::unique_function; +SystemIncludeExtractorFn +getSystemIncludeExtractor(llvm::ArrayRef QueryDriverGlobs); /// Wraps another compilation database, and supports overriding the commands /// using an in-memory mapping. class OverlayCDB : public DelegatingCDB { public: + // Makes adjustments to a tooling::CompileCommand which will be used to + // process a file (possibly different from the one in the command). + using CommandMangler = llvm::unique_function; + // Base may be null, in which case no entries are inherited. // FallbackFlags are added to the fallback compile command. // Adjuster is applied to all commands, fallback or not. OverlayCDB(const GlobalCompilationDatabase *Base, std::vector FallbackFlags = {}, - tooling::ArgumentsAdjuster Adjuster = nullptr); + CommandMangler Mangler = nullptr); llvm::Optional getCompileCommand(PathRef File) const override; @@ -190,7 +197,7 @@ class OverlayCDB : public DelegatingCDB { private: mutable std::mutex Mutex; llvm::StringMap Commands; /* GUARDED_BY(Mut) */ - tooling::ArgumentsAdjuster ArgsAdjuster; + CommandMangler Mangler; std::vector FallbackFlags; }; diff --git a/clang-tools-extra/clangd/SemanticHighlighting.cpp b/clang-tools-extra/clangd/SemanticHighlighting.cpp index af3a3e6f8e941..dd9392b029df8 100644 --- a/clang-tools-extra/clangd/SemanticHighlighting.cpp +++ b/clang-tools-extra/clangd/SemanticHighlighting.cpp @@ -597,19 +597,27 @@ class CollectExtraHighlightings if (!Arg) return; - // Is this parameter passed by non-const reference? + // Is this parameter passed by non-const pointer or reference? // FIXME The condition T->idDependentType() could be relaxed a bit, // e.g. std::vector& is dependent but we would want to highlight it - if (!T->isLValueReferenceType() || - T.getNonReferenceType().isConstQualified() || T->isDependentType()) { + bool IsRef = T->isLValueReferenceType(); + bool IsPtr = T->isPointerType(); + if ((!IsRef && !IsPtr) || T->getPointeeType().isConstQualified() || + T->isDependentType()) { return; } llvm::Optional Location; - // FIXME Add "unwrapping" for ArraySubscriptExpr and UnaryOperator, + // FIXME Add "unwrapping" for ArraySubscriptExpr, // e.g. highlight `a` in `a[i]` // FIXME Handle dependent expression types + if (auto *IC = dyn_cast(Arg)) + Arg = IC->getSubExprAsWritten(); + if (auto *UO = dyn_cast(Arg)) { + if (UO->getOpcode() == UO_AddrOf) + Arg = UO->getSubExpr(); + } if (auto *DR = dyn_cast(Arg)) Location = DR->getLocation(); else if (auto *M = dyn_cast(Arg)) @@ -617,7 +625,8 @@ class CollectExtraHighlightings if (Location) H.addExtraModifier(*Location, - HighlightingModifier::UsedAsMutableReference); + IsRef ? HighlightingModifier::UsedAsMutableReference + : HighlightingModifier::UsedAsMutablePointer); } void @@ -1140,6 +1149,8 @@ llvm::StringRef toSemanticTokenModifier(HighlightingModifier Modifier) { return "defaultLibrary"; case HighlightingModifier::UsedAsMutableReference: return "usedAsMutableReference"; // nonstandard + case HighlightingModifier::UsedAsMutablePointer: + return "usedAsMutablePointer"; // nonstandard case HighlightingModifier::ConstructorOrDestructor: return "constructorOrDestructor"; // nonstandard case HighlightingModifier::FunctionScope: diff --git a/clang-tools-extra/clangd/SemanticHighlighting.h b/clang-tools-extra/clangd/SemanticHighlighting.h index 79ecb344275d1..64ad431909faa 100644 --- a/clang-tools-extra/clangd/SemanticHighlighting.h +++ b/clang-tools-extra/clangd/SemanticHighlighting.h @@ -71,6 +71,7 @@ enum class HighlightingModifier { DependentName, DefaultLibrary, UsedAsMutableReference, + UsedAsMutablePointer, ConstructorOrDestructor, FunctionScope, diff --git a/clang-tools-extra/clangd/QueryDriverDatabase.cpp b/clang-tools-extra/clangd/SystemIncludeExtractor.cpp similarity index 89% rename from clang-tools-extra/clangd/QueryDriverDatabase.cpp rename to clang-tools-extra/clangd/SystemIncludeExtractor.cpp index c36fb4f042a9f..7cfbd3dbf7318 100644 --- a/clang-tools-extra/clangd/QueryDriverDatabase.cpp +++ b/clang-tools-extra/clangd/SystemIncludeExtractor.cpp @@ -1,4 +1,4 @@ -//===--- QueryDriverDatabase.cpp ---------------------------------*- C++-*-===// +//===--- SystemIncludeExtractor.cpp ------------------------------*- C++-*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -315,24 +315,20 @@ llvm::Regex convertGlobsToRegex(llvm::ArrayRef Globs) { /// Extracts system includes from a trusted driver by parsing the output of /// include search path and appends them to the commands coming from underlying /// compilation database. -class QueryDriverDatabase : public DelegatingCDB { +class SystemIncludeExtractor { public: - QueryDriverDatabase(llvm::ArrayRef QueryDriverGlobs, - std::unique_ptr Base) - : DelegatingCDB(std::move(Base)), - QueryDriverRegex(convertGlobsToRegex(QueryDriverGlobs)) {} + SystemIncludeExtractor(llvm::ArrayRef QueryDriverGlobs) + : QueryDriverRegex(convertGlobsToRegex(QueryDriverGlobs)) {} - llvm::Optional - getCompileCommand(PathRef File) const override { - auto Cmd = DelegatingCDB::getCompileCommand(File); - if (!Cmd || Cmd->CommandLine.empty()) - return Cmd; + void operator()(tooling::CompileCommand &Cmd, llvm::StringRef File) const { + if (Cmd.CommandLine.empty()) + return; llvm::StringRef Lang; - for (size_t I = 0, E = Cmd->CommandLine.size(); I < E; ++I) { - llvm::StringRef Arg = Cmd->CommandLine[I]; + for (size_t I = 0, E = Cmd.CommandLine.size(); I < E; ++I) { + llvm::StringRef Arg = Cmd.CommandLine[I]; if (Arg == "-x" && I + 1 < E) - Lang = Cmd->CommandLine[I + 1]; + Lang = Cmd.CommandLine[I + 1]; else if (Arg.startswith("-x")) Lang = Arg.drop_front(2).trim(); } @@ -341,26 +337,25 @@ class QueryDriverDatabase : public DelegatingCDB { auto Type = driver::types::lookupTypeForExtension(Ext); if (Type == driver::types::TY_INVALID) { elog("System include extraction: invalid file type for {0}", Ext); - return Cmd; + return; } Lang = driver::types::getTypeName(Type); } - llvm::SmallString<128> Driver(Cmd->CommandLine.front()); + llvm::SmallString<128> Driver(Cmd.CommandLine.front()); if (llvm::any_of(Driver, - [](char C) { return llvm::sys::path::is_separator(C); })) + [](char C) { return llvm::sys::path::is_separator(C); })) // Driver is a not a single executable name but instead a path (either // relative or absolute). - llvm::sys::fs::make_absolute(Cmd->Directory, Driver); + llvm::sys::fs::make_absolute(Cmd.Directory, Driver); if (auto Info = QueriedDrivers.get(/*Key=*/(Driver + ":" + Lang).str(), [&] { return extractSystemIncludesAndTarget( - Driver, Lang, Cmd->CommandLine, QueryDriverRegex); + Driver, Lang, Cmd.CommandLine, QueryDriverRegex); })) { - setTarget(addSystemIncludes(*Cmd, Info->SystemIncludes), Info->Target); + setTarget(addSystemIncludes(Cmd, Info->SystemIncludes), Info->Target); } - return Cmd; } private: @@ -370,14 +365,11 @@ class QueryDriverDatabase : public DelegatingCDB { }; } // namespace -std::unique_ptr -getQueryDriverDatabase(llvm::ArrayRef QueryDriverGlobs, - std::unique_ptr Base) { - assert(Base && "Null base to SystemIncludeExtractor"); +SystemIncludeExtractorFn +getSystemIncludeExtractor(llvm::ArrayRef QueryDriverGlobs) { if (QueryDriverGlobs.empty()) - return Base; - return std::make_unique(QueryDriverGlobs, - std::move(Base)); + return nullptr; + return SystemIncludeExtractor(QueryDriverGlobs); } } // namespace clangd diff --git a/clang-tools-extra/clangd/indexer/IndexerMain.cpp b/clang-tools-extra/clangd/indexer/IndexerMain.cpp index 7393984b984ba..9070582801f21 100644 --- a/clang-tools-extra/clangd/indexer/IndexerMain.cpp +++ b/clang-tools-extra/clangd/indexer/IndexerMain.cpp @@ -147,7 +147,14 @@ int main(int argc, const char **argv) { auto Err = Executor->get()->execute( std::make_unique(Data), clang::tooling::ArgumentsAdjuster( - clang::clangd::CommandMangler::detect())); + [Mangler = std::make_shared( + clang::clangd::CommandMangler::detect())]( + const std::vector &Args, llvm::StringRef File) { + clang::tooling::CompileCommand Cmd; + Cmd.CommandLine = Args; + Mangler->operator()(Cmd, File); + return Cmd.CommandLine; + })); if (Err) { clang::clangd::elog("{0}", std::move(Err)); } diff --git a/clang-tools-extra/clangd/refactor/tweaks/RemoveUsingNamespace.cpp b/clang-tools-extra/clangd/refactor/tweaks/RemoveUsingNamespace.cpp index 8df7a448c4383..93fdbb9486cc7 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/RemoveUsingNamespace.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/RemoveUsingNamespace.cpp @@ -155,6 +155,13 @@ Expected RemoveUsingNamespace::apply(const Selection &Inputs) { if (!visibleContext(T->getDeclContext()) ->Equals(TargetDirective->getNominatedNamespace())) return; + // Avoid adding qualifiers before operators, e.g. + // using namespace std; + // cout << "foo"; // Must not changed to std::cout std:: << "foo" + // FIXME: User-defined literals are not handled + if (T->isInIdentifierNamespace( + Decl::IdentifierNamespace::IDNS_NonMemberOperator)) + return; } SourceLocation Loc = Ref.NameLoc; if (Loc.isMacroID()) { diff --git a/clang-tools-extra/clangd/test/initialize-params.test b/clang-tools-extra/clangd/test/initialize-params.test index eb958cac20279..a2df61ca75235 100644 --- a/clang-tools-extra/clangd/test/initialize-params.test +++ b/clang-tools-extra/clangd/test/initialize-params.test @@ -68,6 +68,7 @@ # CHECK-NEXT: "dependentName", # CHECK-NEXT: "defaultLibrary", # CHECK-NEXT: "usedAsMutableReference", +# CHECK-NEXT: "usedAsMutablePointer", # CHECK-NEXT: "constructorOrDestructor", # CHECK-NEXT: "functionScope", # CHECK-NEXT: "classScope", diff --git a/clang-tools-extra/clangd/test/semantic-tokens.test b/clang-tools-extra/clangd/test/semantic-tokens.test index 5abe78e9a51e1..b3a92b7cc737b 100644 --- a/clang-tools-extra/clangd/test/semantic-tokens.test +++ b/clang-tools-extra/clangd/test/semantic-tokens.test @@ -23,7 +23,7 @@ # CHECK-NEXT: 4, # CHECK-NEXT: 1, # CHECK-NEXT: 0, -# CHECK-NEXT: 32771 +# CHECK-NEXT: 65539 # CHECK-NEXT: ], # CHECK-NEXT: "resultId": "1" # CHECK-NEXT: } @@ -49,7 +49,7 @@ # CHECK-NEXT: 4, # CHECK-NEXT: 1, # CHECK-NEXT: 0, -# CHECK-NEXT: 32771 +# CHECK-NEXT: 65539 # CHECK-NEXT: ], # Inserted at position 1 # CHECK-NEXT: "deleteCount": 0, @@ -72,12 +72,12 @@ # CHECK-NEXT: 4, # CHECK-NEXT: 1, # CHECK-NEXT: 0, -# CHECK-NEXT: 32771, +# CHECK-NEXT: 65539, # CHECK-NEXT: 1, # CHECK-NEXT: 4, # CHECK-NEXT: 1, # CHECK-NEXT: 0, -# CHECK-NEXT: 32771 +# CHECK-NEXT: 65539 # CHECK-NEXT: ], # CHECK-NEXT: "resultId": "3" # CHECK-NEXT: } diff --git a/clang-tools-extra/clangd/test/system-include-extractor.test b/clang-tools-extra/clangd/test/system-include-extractor.test index c861a2346470e..ba6aaf6efb9de 100644 --- a/clang-tools-extra/clangd/test/system-include-extractor.test +++ b/clang-tools-extra/clangd/test/system-include-extractor.test @@ -11,9 +11,11 @@ # RUN: echo '#!/bin/sh' >> %t.dir/bin/my_driver.sh # RUN: echo '[ "$0" = "%t.dir/bin/my_driver.sh" ] || exit' >> %t.dir/bin/my_driver.sh # RUN: echo 'args="$*"' >> %t.dir/bin/my_driver.sh +# Check that clangd preserves certain flags like `-nostdinc` from +# original invocation in compile_commands.json. # RUN: echo '[ -z "${args##*"-nostdinc"*}" ] || exit' >> %t.dir/bin/my_driver.sh # RUN: echo '[ -z "${args##*"-isysroot=/isysroot"*}" ] || exit' >> %t.dir/bin/my_driver.sh -# RUN: echo 'echo " $* " | grep " --sysroot /my/sysroot/path " || exit' >> %t.dir/bin/my_driver.sh +# RUN: echo '[ -z "${args##*"--sysroot /my/sysroot/path"*}" ] || exit' >> %t.dir/bin/my_driver.sh # RUN: echo 'echo line to ignore >&2' >> %t.dir/bin/my_driver.sh # RUN: echo 'printf "Target: arm-linux-gnueabihf\r\n" >&2' >> %t.dir/bin/my_driver.sh # RUN: echo 'printf "#include <...> search starts here:\r\n" >&2' >> %t.dir/bin/my_driver.sh @@ -38,7 +40,9 @@ # RUN: sed -E -e 's|"file://([A-Z]):/|"file:///\1:/|g' %t.test.1 > %t.test # Bless the mock driver we've just created so that clangd can execute it. -# RUN: clangd -lit-test -query-driver="**.test,**.sh" < %t.test | FileCheck -strict-whitespace %t.test +# Note: include clangd's stderr in the FileCheck input with "2>&1" so that we +# can match output lines like "ASTWorker building file" +# RUN: clangd -lit-test -query-driver="**.test,**.sh" < %t.test 2>&1 | FileCheck -strict-whitespace %t.test {"jsonrpc":"2.0","id":0,"method":"initialize","params":{}} --- { @@ -53,10 +57,25 @@ } } } +# Look for the "ASTWorker building file" line so that the subsequent diagnostics +# that are matches are for the C++ source file and not a config file. +# CHECK: ASTWorker building file # CHECK: "method": "textDocument/publishDiagnostics", # CHECK-NEXT: "params": { # CHECK-NEXT: "diagnostics": [], +# CHECK-NEXT: "uri": "file://INPUT_DIR/the-file.cpp", --- {"jsonrpc":"2.0","id":10000,"method":"shutdown"} --- {"jsonrpc":"2.0","method":"exit"} + +# Generate a different compile_commands.json which does not point to the mock driver +# RUN: echo '[{"directory": "%/t.dir", "command": "gcc the-file.cpp -nostdinc --sysroot /my/sysroot/path -isysroot=/isysroot", "file": "the-file.cpp"}]' > %t.dir/compile_commands.json + +# Generate a clangd config file which points to the mock driver instead +# RUN: echo 'CompileFlags:' > %t.dir/.clangd +# RUN: echo ' Compiler: my_driver.sh' >> %t.dir/.clangd + +# Run clangd a second time, to make sure it picks up the driver name from the config file +# Note, we need to pass -enable-config because -lit-test otherwise disables it +# RUN: clangd -lit-test -enable-config -query-driver="**.test,**.sh" < %t.test 2>&1 | FileCheck -strict-whitespace %t.test diff --git a/clang-tools-extra/clangd/tool/Check.cpp b/clang-tools-extra/clangd/tool/Check.cpp index 46752e2135639..d216c9d08e89a 100644 --- a/clang-tools-extra/clangd/tool/Check.cpp +++ b/clang-tools-extra/clangd/tool/Check.cpp @@ -101,14 +101,13 @@ class Checker { Config::current().CompileFlags.CDBSearch.FixedCDBPath; std::unique_ptr BaseCDB = std::make_unique(CDBOpts); - BaseCDB = getQueryDriverDatabase(llvm::makeArrayRef(Opts.QueryDriverGlobs), - std::move(BaseCDB)); auto Mangler = CommandMangler::detect(); + Mangler.SystemIncludeExtractor = + getSystemIncludeExtractor(llvm::makeArrayRef(Opts.QueryDriverGlobs)); if (Opts.ResourceDir) Mangler.ResourceDir = *Opts.ResourceDir; auto CDB = std::make_unique( - BaseCDB.get(), std::vector{}, - tooling::ArgumentsAdjuster(std::move(Mangler))); + BaseCDB.get(), std::vector{}, std::move(Mangler)); if (auto TrueCmd = CDB->getCompileCommand(File)) { Cmd = std::move(*TrueCmd); diff --git a/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp b/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp index 8fdc5be68934d..7a1fb9863af1a 100644 --- a/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp +++ b/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp @@ -146,7 +146,7 @@ TEST_F(BackgroundIndexTest, Config) { MemoryShardStorage MSS(Storage, CacheHits); // We need the CommandMangler, because that applies the config we're testing. OverlayCDB CDB(/*Base=*/nullptr, /*FallbackFlags=*/{}, - tooling::ArgumentsAdjuster(CommandMangler::forTests())); + CommandMangler::forTests()); BackgroundIndex Idx( FS, CDB, [&](llvm::StringRef) { return &MSS; }, std::move(Opts)); diff --git a/clang-tools-extra/clangd/unittests/ClangdTests.cpp b/clang-tools-extra/clangd/unittests/ClangdTests.cpp index fa620fda557b8..d3399f4d98e0b 100644 --- a/clang-tools-extra/clangd/unittests/ClangdTests.cpp +++ b/clang-tools-extra/clangd/unittests/ClangdTests.cpp @@ -350,7 +350,7 @@ TEST(ClangdServerTest, RespectsConfig) { Opts.ContextProvider = ClangdServer::createConfiguredContextProvider(&CfgProvider, nullptr); OverlayCDB CDB(/*Base=*/nullptr, /*FallbackFlags=*/{}, - tooling::ArgumentsAdjuster(CommandMangler::forTests())); + CommandMangler::forTests()); MockFS FS; ClangdServer Server(CDB, FS, Opts); // foo.cc sees the expected definition, as FOO is defined. diff --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp index db700556e1d24..99d09ad43466a 100644 --- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp +++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp @@ -2967,14 +2967,20 @@ TEST(CompletionTest, AllScopesCompletion) { } )cpp", {cls("nx::Clangd1"), cls("ny::Clangd2"), cls("Clangd3"), - cls("na::nb::Clangd4")}, + cls("na::nb::Clangd4"), enmConstant("na::C::Clangd5")}, Opts); EXPECT_THAT( Results.Completions, - UnorderedElementsAre(AllOf(qualifier("nx::"), named("Clangd1")), - AllOf(qualifier("ny::"), named("Clangd2")), - AllOf(qualifier(""), scope(""), named("Clangd3")), - AllOf(qualifier("nb::"), named("Clangd4")))); + UnorderedElementsAre(AllOf(qualifier("nx::"), named("Clangd1"), + kind(CompletionItemKind::Class)), + AllOf(qualifier("ny::"), named("Clangd2"), + kind(CompletionItemKind::Class)), + AllOf(qualifier(""), scope(""), named("Clangd3"), + kind(CompletionItemKind::Class)), + AllOf(qualifier("nb::"), named("Clangd4"), + kind(CompletionItemKind::Class)), + AllOf(qualifier("C::"), named("Clangd5"), + kind(CompletionItemKind::EnumMember)))); } TEST(CompletionTest, NoQualifierIfShadowed) { @@ -3358,6 +3364,33 @@ TEST(CompletionTest, UsingDecl) { kind(CompletionItemKind::Reference)))); } +TEST(CompletionTest, Enums) { + const char *Header(R"cpp( + namespace ns { + enum Unscoped { Clangd1 }; + class C { + enum Unscoped { Clangd2 }; + }; + enum class Scoped { Clangd3 }; + })cpp"); + const char *Source(R"cpp( + void bar() { + Clangd^ + })cpp"); + auto Index = TestTU::withHeaderCode(Header).index(); + clangd::CodeCompleteOptions Opts; + Opts.Index = Index.get(); + Opts.AllScopes = true; + auto R = completions(Source, {}, Opts); + EXPECT_THAT(R.Completions, UnorderedElementsAre( + AllOf(scope("ns::"), named("Clangd1"), + kind(CompletionItemKind::EnumMember)), + AllOf(scope("ns::C::"), named("Clangd2"), + kind(CompletionItemKind::EnumMember)), + AllOf(scope("ns::Scoped::"), named("Clangd3"), + kind(CompletionItemKind::EnumMember)))); +} + TEST(CompletionTest, ScopeIsUnresolved) { clangd::CodeCompleteOptions Opts = {}; Opts.AllScopes = true; diff --git a/clang-tools-extra/clangd/unittests/CompileCommandsTests.cpp b/clang-tools-extra/clangd/unittests/CompileCommandsTests.cpp index 28ae6ea01e87a..504487d4e73d1 100644 --- a/clang-tools-extra/clangd/unittests/CompileCommandsTests.cpp +++ b/clang-tools-extra/clangd/unittests/CompileCommandsTests.cpp @@ -45,41 +45,47 @@ TEST(CommandMangler, Everything) { Mangler.ClangPath = testPath("fake/clang"); Mangler.ResourceDir = testPath("fake/resources"); Mangler.Sysroot = testPath("fake/sysroot"); - std::vector Cmd = {"clang++", "--", "foo.cc", "bar.cc"}; - Mangler.adjust(Cmd, "foo.cc"); - EXPECT_THAT(Cmd, ElementsAre(testPath("fake/clang++"), - "-resource-dir=" + testPath("fake/resources"), - "-isysroot", testPath("fake/sysroot"), "--", - "foo.cc")); + tooling::CompileCommand Cmd; + Cmd.CommandLine = {"clang++", "--", "foo.cc", "bar.cc"}; + Mangler(Cmd, "foo.cc"); + EXPECT_THAT(Cmd.CommandLine, + ElementsAre(testPath("fake/clang++"), + "-resource-dir=" + testPath("fake/resources"), + "-isysroot", testPath("fake/sysroot"), "--", + "foo.cc")); } TEST(CommandMangler, FilenameMismatch) { auto Mangler = CommandMangler::forTests(); Mangler.ClangPath = testPath("clang"); // Our compile flags refer to foo.cc... - std::vector Cmd = {"clang", "foo.cc"}; + tooling::CompileCommand Cmd; + Cmd.CommandLine = {"clang", "foo.cc"}; // but we're applying it to foo.h... - Mangler.adjust(Cmd, "foo.h"); + Mangler(Cmd, "foo.h"); // so transferCompileCommand should add -x c++-header to preserve semantics. - EXPECT_THAT( - Cmd, ElementsAre(testPath("clang"), "-x", "c++-header", "--", "foo.h")); + EXPECT_THAT(Cmd.CommandLine, ElementsAre(testPath("clang"), "-x", + "c++-header", "--", "foo.h")); } TEST(CommandMangler, ResourceDir) { auto Mangler = CommandMangler::forTests(); Mangler.ResourceDir = testPath("fake/resources"); - std::vector Cmd = {"clang++", "foo.cc"}; - Mangler.adjust(Cmd, "foo.cc"); - EXPECT_THAT(Cmd, Contains("-resource-dir=" + testPath("fake/resources"))); + tooling::CompileCommand Cmd; + Cmd.CommandLine = {"clang++", "foo.cc"}; + Mangler(Cmd, "foo.cc"); + EXPECT_THAT(Cmd.CommandLine, + Contains("-resource-dir=" + testPath("fake/resources"))); } TEST(CommandMangler, Sysroot) { auto Mangler = CommandMangler::forTests(); Mangler.Sysroot = testPath("fake/sysroot"); - std::vector Cmd = {"clang++", "foo.cc"}; - Mangler.adjust(Cmd, "foo.cc"); - EXPECT_THAT(llvm::join(Cmd, " "), + tooling::CompileCommand Cmd; + Cmd.CommandLine = {"clang++", "foo.cc"}; + Mangler(Cmd, "foo.cc"); + EXPECT_THAT(llvm::join(Cmd.CommandLine, " "), HasSubstr("-isysroot " + testPath("fake/sysroot"))); } @@ -87,21 +93,22 @@ TEST(CommandMangler, ClangPath) { auto Mangler = CommandMangler::forTests(); Mangler.ClangPath = testPath("fake/clang"); - std::vector Cmd = {"clang++", "foo.cc"}; - Mangler.adjust(Cmd, "foo.cc"); - EXPECT_EQ(testPath("fake/clang++"), Cmd.front()); + tooling::CompileCommand Cmd; + Cmd.CommandLine = {"clang++", "foo.cc"}; + Mangler(Cmd, "foo.cc"); + EXPECT_EQ(testPath("fake/clang++"), Cmd.CommandLine.front()); - Cmd = {"unknown-binary", "foo.cc"}; - Mangler.adjust(Cmd, "foo.cc"); - EXPECT_EQ(testPath("fake/unknown-binary"), Cmd.front()); + Cmd.CommandLine = {"unknown-binary", "foo.cc"}; + Mangler(Cmd, "foo.cc"); + EXPECT_EQ(testPath("fake/unknown-binary"), Cmd.CommandLine.front()); - Cmd = {testPath("path/clang++"), "foo.cc"}; - Mangler.adjust(Cmd, "foo.cc"); - EXPECT_EQ(testPath("path/clang++"), Cmd.front()); + Cmd.CommandLine = {testPath("path/clang++"), "foo.cc"}; + Mangler(Cmd, "foo.cc"); + EXPECT_EQ(testPath("path/clang++"), Cmd.CommandLine.front()); - Cmd = {"foo/unknown-binary", "foo.cc"}; - Mangler.adjust(Cmd, "foo.cc"); - EXPECT_EQ("foo/unknown-binary", Cmd.front()); + Cmd.CommandLine = {"foo/unknown-binary", "foo.cc"}; + Mangler(Cmd, "foo.cc"); + EXPECT_EQ("foo/unknown-binary", Cmd.CommandLine.front()); } // Only run the PATH/symlink resolving test on unix, we need to fiddle @@ -142,10 +149,11 @@ TEST(CommandMangler, ClangPathResolve) { // Test the case where the driver is an absolute path to a symlink. auto Mangler = CommandMangler::forTests(); Mangler.ClangPath = testPath("fake/clang"); - std::vector Cmd = {(TempDir + "/bin/foo").str(), "foo.cc"}; - Mangler.adjust(Cmd, "foo.cc"); + tooling::CompileCommand Cmd; + Cmd.CommandLine = {(TempDir + "/bin/foo").str(), "foo.cc"}; + Mangler(Cmd, "foo.cc"); // Directory based on resolved symlink, basename preserved. - EXPECT_EQ((TempDir + "/lib/foo").str(), Cmd.front()); + EXPECT_EQ((TempDir + "/lib/foo").str(), Cmd.CommandLine.front()); // Set PATH to point to temp/bin so we can find 'foo' on it. ASSERT_TRUE(::getenv("PATH")); @@ -159,21 +167,22 @@ TEST(CommandMangler, ClangPathResolve) { Mangler = CommandMangler::forTests(); Mangler.ClangPath = testPath("fake/clang"); // Driver found on PATH. - Cmd = {"foo", "foo.cc"}; - Mangler.adjust(Cmd, "foo.cc"); + Cmd.CommandLine = {"foo", "foo.cc"}; + Mangler(Cmd, "foo.cc"); // Found the symlink and resolved the path as above. - EXPECT_EQ((TempDir + "/lib/foo").str(), Cmd.front()); + EXPECT_EQ((TempDir + "/lib/foo").str(), Cmd.CommandLine.front()); // Symlink not resolved with -no-canonical-prefixes. - Cmd = {"foo", "-no-canonical-prefixes", "foo.cc"}; - Mangler.adjust(Cmd, "foo.cc"); - EXPECT_EQ((TempDir + "/bin/foo").str(), Cmd.front()); + Cmd.CommandLine = {"foo", "-no-canonical-prefixes", "foo.cc"}; + Mangler(Cmd, "foo.cc"); + EXPECT_EQ((TempDir + "/bin/foo").str(), Cmd.CommandLine.front()); } #endif TEST(CommandMangler, ConfigEdits) { auto Mangler = CommandMangler::forTests(); - std::vector Cmd = {"clang++", "foo.cc"}; + tooling::CompileCommand Cmd; + Cmd.CommandLine = {"clang++", "foo.cc"}; { Config Cfg; Cfg.CompileFlags.Edits.push_back([](std::vector &Argv) { @@ -185,11 +194,11 @@ TEST(CommandMangler, ConfigEdits) { Argv = tooling::getInsertArgumentAdjuster("--hello")(Argv, ""); }); WithContextValue WithConfig(Config::Key, std::move(Cfg)); - Mangler.adjust(Cmd, "foo.cc"); + Mangler(Cmd, "foo.cc"); } // Edits are applied in given order and before other mangling and they always // go before filename. - EXPECT_THAT(Cmd, ElementsAre(_, "--hello", "--", "FOO.CC")); + EXPECT_THAT(Cmd.CommandLine, ElementsAre(_, "--hello", "--", "FOO.CC")); } static std::string strip(llvm::StringRef Arg, llvm::StringRef Argv) { @@ -363,70 +372,75 @@ TEST(PrintArgvTest, All) { TEST(CommandMangler, InputsAfterDashDash) { const auto Mangler = CommandMangler::forTests(); { - std::vector Args = {"clang", "/Users/foo.cc"}; - Mangler.adjust(Args, "/Users/foo.cc"); - EXPECT_THAT(llvm::makeArrayRef(Args).take_back(2), + tooling::CompileCommand Cmd; + Cmd.CommandLine = {"clang", "/Users/foo.cc"}; + Mangler(Cmd, "/Users/foo.cc"); + EXPECT_THAT(llvm::makeArrayRef(Cmd.CommandLine).take_back(2), ElementsAre("--", "/Users/foo.cc")); - EXPECT_THAT(llvm::makeArrayRef(Args).drop_back(2), + EXPECT_THAT(llvm::makeArrayRef(Cmd.CommandLine).drop_back(2), Not(Contains("/Users/foo.cc"))); } // In CL mode /U triggers an undef operation, hence `/Users/foo.cc` shouldn't // be interpreted as a file. { - std::vector Args = {"clang", "--driver-mode=cl", "bar.cc", - "/Users/foo.cc"}; - Mangler.adjust(Args, "bar.cc"); - EXPECT_THAT(llvm::makeArrayRef(Args).take_back(2), + tooling::CompileCommand Cmd; + Cmd.CommandLine = {"clang", "--driver-mode=cl", "bar.cc", "/Users/foo.cc"}; + Mangler(Cmd, "bar.cc"); + EXPECT_THAT(llvm::makeArrayRef(Cmd.CommandLine).take_back(2), ElementsAre("--", "bar.cc")); - EXPECT_THAT(llvm::makeArrayRef(Args).drop_back(2), Not(Contains("bar.cc"))); + EXPECT_THAT(llvm::makeArrayRef(Cmd.CommandLine).drop_back(2), + Not(Contains("bar.cc"))); } // All inputs but the main file is dropped. { - std::vector Args = {"clang", "foo.cc", "bar.cc"}; - Mangler.adjust(Args, "baz.cc"); - EXPECT_THAT(llvm::makeArrayRef(Args).take_back(2), + tooling::CompileCommand Cmd; + Cmd.CommandLine = {"clang", "foo.cc", "bar.cc"}; + Mangler(Cmd, "baz.cc"); + EXPECT_THAT(llvm::makeArrayRef(Cmd.CommandLine).take_back(2), ElementsAre("--", "baz.cc")); EXPECT_THAT( - llvm::makeArrayRef(Args).drop_back(2), + llvm::makeArrayRef(Cmd.CommandLine).drop_back(2), testing::AllOf(Not(Contains("foo.cc")), Not(Contains("bar.cc")))); } } TEST(CommandMangler, StripsMultipleArch) { const auto Mangler = CommandMangler::forTests(); - std::vector Args = {"clang", "-arch", "foo", - "-arch", "bar", "/Users/foo.cc"}; - Mangler.adjust(Args, "/Users/foo.cc"); - EXPECT_EQ( - llvm::count_if(Args, [](llvm::StringRef Arg) { return Arg == "-arch"; }), - 0); + tooling::CompileCommand Cmd; + Cmd.CommandLine = {"clang", "-arch", "foo", "-arch", "bar", "/Users/foo.cc"}; + Mangler(Cmd, "/Users/foo.cc"); + EXPECT_EQ(llvm::count_if(Cmd.CommandLine, + [](llvm::StringRef Arg) { return Arg == "-arch"; }), + 0); // Single arch option is preserved. - Args = {"clang", "-arch", "foo", "/Users/foo.cc"}; - Mangler.adjust(Args, "/Users/foo.cc"); - EXPECT_EQ( - llvm::count_if(Args, [](llvm::StringRef Arg) { return Arg == "-arch"; }), - 1); + Cmd.CommandLine = {"clang", "-arch", "foo", "/Users/foo.cc"}; + Mangler(Cmd, "/Users/foo.cc"); + EXPECT_EQ(llvm::count_if(Cmd.CommandLine, + [](llvm::StringRef Arg) { return Arg == "-arch"; }), + 1); } TEST(CommandMangler, EmptyArgs) { const auto Mangler = CommandMangler::forTests(); - std::vector Args = {}; + tooling::CompileCommand Cmd; + Cmd.CommandLine = {}; // Make sure we don't crash. - Mangler.adjust(Args, "foo.cc"); + Mangler(Cmd, "foo.cc"); } TEST(CommandMangler, PathsAsPositional) { const auto Mangler = CommandMangler::forTests(); - std::vector Args = { + tooling::CompileCommand Cmd; + Cmd.CommandLine = { "clang", "--driver-mode=cl", "-I", "foo", }; // Make sure we don't crash. - Mangler.adjust(Args, "a.cc"); - EXPECT_THAT(Args, Contains("foo")); + Mangler(Cmd, "a.cc"); + EXPECT_THAT(Cmd.CommandLine, Contains("foo")); } } // namespace } // namespace clangd diff --git a/clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp b/clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp index 554cb0484a071..22ee0921b6552 100644 --- a/clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp +++ b/clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp @@ -138,11 +138,9 @@ TEST_F(OverlayCDBTest, Watch) { TEST_F(OverlayCDBTest, Adjustments) { OverlayCDB CDB(Base.get(), {"-DFallback"}, - [](const std::vector &Cmd, llvm::StringRef File) { - auto Ret = Cmd; - Ret.push_back( + [](tooling::CompileCommand &Cmd, llvm::StringRef File) { + Cmd.CommandLine.push_back( ("-DAdjust_" + llvm::sys::path::filename(File)).str()); - return Ret; }); // Command from underlying gets adjusted. auto Cmd = *CDB.getCompileCommand(testPath("foo.cc")); diff --git a/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp b/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp index 9abc49bb06014..3ea4a58a83a70 100644 --- a/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp +++ b/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp @@ -382,7 +382,7 @@ TEST(SemanticHighlighting, GetsCorrectTokens) { void $Function_def[[foo]]() { $Class[[F]] $LocalVariable_def[[FF]]; $Class[[G]]<$Class[[F]], &$Class[[F]]::$Method[[f]]> $LocalVariable_def[[GG]]; - $LocalVariable[[GG]].$Method[[foo]](&$LocalVariable[[FF]]); + $LocalVariable[[GG]].$Method[[foo]](&$LocalVariable_usedAsMutablePointer[[FF]]); $Class[[A]]<$Function[[foo]]> $LocalVariable_def[[AA]]; } )cpp", @@ -781,14 +781,14 @@ sizeof...($TemplateParameter[[Elements]]); const int* $LocalVariable_def_readonly[[constPtr]]; int** $LocalVariable_def[[array]]; $Function[[fun]]($LocalVariable[[val]], $LocalVariable[[val]], - $LocalVariable[[ptr]], $LocalVariable_readonly[[constPtr]], + $LocalVariable_usedAsMutablePointer[[ptr]], $LocalVariable_readonly[[constPtr]], $LocalVariable_usedAsMutableReference[[val]], $LocalVariable[[val]], $LocalVariable_usedAsMutableReference[[ptr]], $LocalVariable_readonly_usedAsMutableReference[[constPtr]], $LocalVariable_readonly[[constPtr]], - $LocalVariable[[array]], $LocalVariable_usedAsMutableReference[[array]], + $LocalVariable_usedAsMutablePointer[[array]], $LocalVariable_usedAsMutableReference[[array]], $LocalVariable[[array]] ); [](int){}($LocalVariable[[val]]); diff --git a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp index 8dc7877c17849..62564b989a186 100644 --- a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp +++ b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp @@ -1316,6 +1316,11 @@ TEST_F(SymbolCollectorTest, IncludeEnums) { Black }; } + class Color3 { + enum { + Blue + }; + }; )"; runSymbolCollector(Header, /*Main=*/""); EXPECT_THAT(Symbols, @@ -1324,9 +1329,11 @@ TEST_F(SymbolCollectorTest, IncludeEnums) { AllOf(qName("Color"), forCodeCompletion(true)), AllOf(qName("Green"), forCodeCompletion(true)), AllOf(qName("Color2"), forCodeCompletion(true)), - AllOf(qName("Color2::Yellow"), forCodeCompletion(false)), + AllOf(qName("Color2::Yellow"), forCodeCompletion(true)), AllOf(qName("ns"), forCodeCompletion(true)), - AllOf(qName("ns::Black"), forCodeCompletion(true)))); + AllOf(qName("ns::Black"), forCodeCompletion(true)), + AllOf(qName("Color3"), forCodeCompletion(true)), + AllOf(qName("Color3::Blue"), forCodeCompletion(true)))); } TEST_F(SymbolCollectorTest, NamelessSymbols) { diff --git a/clang-tools-extra/clangd/unittests/TestIndex.cpp b/clang-tools-extra/clangd/unittests/TestIndex.cpp index c247a9c2e90c9..11282bc34231a 100644 --- a/clang-tools-extra/clangd/unittests/TestIndex.cpp +++ b/clang-tools-extra/clangd/unittests/TestIndex.cpp @@ -69,6 +69,10 @@ Symbol enm(llvm::StringRef Name) { return sym(Name, index::SymbolKind::Enum, "@E@\\0"); } +Symbol enmConstant(llvm::StringRef Name) { + return sym(Name, index::SymbolKind::EnumConstant, "@\\0"); +} + Symbol var(llvm::StringRef Name) { return sym(Name, index::SymbolKind::Variable, "@\\0"); } diff --git a/clang-tools-extra/clangd/unittests/TestIndex.h b/clang-tools-extra/clangd/unittests/TestIndex.h index 0cd8a713c31dd..9280b0b12a67f 100644 --- a/clang-tools-extra/clangd/unittests/TestIndex.h +++ b/clang-tools-extra/clangd/unittests/TestIndex.h @@ -27,6 +27,8 @@ Symbol func(llvm::StringRef Name); Symbol cls(llvm::StringRef Name); // Creates an enum symbol. Symbol enm(llvm::StringRef Name); +// Creates an enum constant symbol. +Symbol enmConstant(llvm::StringRef Name); // Creates a variable symbol. Symbol var(llvm::StringRef Name); // Creates a namespace symbol. diff --git a/clang-tools-extra/clangd/unittests/TestTU.cpp b/clang-tools-extra/clangd/unittests/TestTU.cpp index 03f1cd77191d2..761d3ca60a1a1 100644 --- a/clang-tools-extra/clangd/unittests/TestTU.cpp +++ b/clang-tools-extra/clangd/unittests/TestTU.cpp @@ -64,7 +64,7 @@ ParseInputs TestTU::inputs(MockFS &FS) const { Argv.push_back(FullFilename); auto Mangler = CommandMangler::forTests(); - Mangler.adjust(Inputs.CompileCommand.CommandLine, FullFilename); + Mangler(Inputs.CompileCommand, FullFilename); Inputs.CompileCommand.Filename = FullFilename; Inputs.CompileCommand.Directory = testRoot(); Inputs.Contents = Code; diff --git a/clang-tools-extra/clangd/unittests/tweaks/RemoveUsingNamespaceTests.cpp b/clang-tools-extra/clangd/unittests/tweaks/RemoveUsingNamespaceTests.cpp index 59788e75d1698..3449c6475e3fc 100644 --- a/clang-tools-extra/clangd/unittests/tweaks/RemoveUsingNamespaceTests.cpp +++ b/clang-tools-extra/clangd/unittests/tweaks/RemoveUsingNamespaceTests.cpp @@ -226,6 +226,29 @@ TEST_F(RemoveUsingNamespaceTest, All) { int main() { std::vector V; } + )cpp"}, + {// Does not qualify operators declared in a non-class context + R"cpp( + namespace ns { + struct Foo {}; + void operator+(const Foo &, int) {} + } + using namespace n^s; + int main() { + Foo foo; + foo + 10; + } + )cpp", + R"cpp( + namespace ns { + struct Foo {}; + void operator+(const Foo &, int) {} + } + + int main() { + ns::Foo foo; + foo + 10; + } )cpp"}}; for (auto C : Cases) EXPECT_EQ(C.second, apply(C.first)) << C.first; diff --git a/clang-tools-extra/test/clang-apply-replacements/Inputs/ignore-conflict/ignore-conflict.cpp b/clang-tools-extra/test/clang-apply-replacements/Inputs/ignore-conflict/ignore-conflict.cpp index 8791dd952319d..8a483049c8cd5 100644 --- a/clang-tools-extra/test/clang-apply-replacements/Inputs/ignore-conflict/ignore-conflict.cpp +++ b/clang-tools-extra/test/clang-apply-replacements/Inputs/ignore-conflict/ignore-conflict.cpp @@ -1,4 +1,4 @@ class MyType {}; // CHECK: #include // CHECK-NEXT: #include -// CEHCK-NEXT: class MyType {}; +// CHECK-NEXT: class MyType {}; diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt index 83b1f09504f39..2cd66c1f7af42 100644 --- a/clang/CMakeLists.txt +++ b/clang/CMakeLists.txt @@ -63,6 +63,7 @@ if(CLANG_BUILT_STANDALONE) include(TableGen) include(HandleLLVMOptions) include(VersionFromVCS) + include(CheckAtomic) include(GetErrcMessages) include(LLVMDistributionSupport) @@ -92,9 +93,14 @@ if(CLANG_BUILT_STANDALONE) set(LLVM_UTILS_PROVIDED ON) endif() + # Seek installed Lit. + find_program(LLVM_LIT + NAMES llvm-lit lit.py lit + PATHS "${LLVM_MAIN_SRC_DIR}/utils/lit" + DOC "Path to lit.py") + if(EXISTS ${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py) # Note: path not really used, except for checking if lit was found - set(LLVM_LIT ${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py) if(EXISTS ${LLVM_MAIN_SRC_DIR}/utils/llvm-lit) add_subdirectory(${LLVM_MAIN_SRC_DIR}/utils/llvm-lit utils/llvm-lit) endif() @@ -111,12 +117,6 @@ if(CLANG_BUILT_STANDALONE) AND EXISTS ${UNITTEST_DIR}/CMakeLists.txt) add_subdirectory(${UNITTEST_DIR} utils/unittest) endif() - else() - # Seek installed Lit. - find_program(LLVM_LIT - NAMES llvm-lit lit.py lit - PATHS "${LLVM_MAIN_SRC_DIR}/utils/lit" - DOC "Path to lit.py") endif() if(LLVM_LIT) diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst index 0416190a2fd52..44f05cf28270b 100644 --- a/clang/docs/ClangFormatStyleOptions.rst +++ b/clang/docs/ClangFormatStyleOptions.rst @@ -865,7 +865,7 @@ the configuration (without a prefix: ``Auto``). Alignment options * ``TrailingCommentsAlignmentKinds Kind`` - Specifies the way to align trailing comments + Specifies the way to align trailing comments. Possible values: @@ -903,8 +903,9 @@ the configuration (without a prefix: ``Auto``). int abcd; // comment - * ``unsigned OverEmptyLines`` How many empty lines to apply alignment - With ``MaxEmptyLinesToKeep`` is 2 and ``OverEmptyLines`` is 2, + * ``unsigned OverEmptyLines`` How many empty lines to apply alignment. + When both ``MaxEmptyLinesToKeep`` and ``OverEmptyLines`` are set to 2, + it formats like below. .. code-block:: c++ @@ -915,7 +916,8 @@ the configuration (without a prefix: ``Auto``). int abcdef; // aligned - And with ``MaxEmptyLinesToKeep`` is 2 and ``OverEmptyLines`` is 1, + When ``MaxEmptyLinesToKeep`` is set to 2 and ``OverEmptyLines`` is set + to 1, it formats like below. .. code-block:: c++ diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 7697f10daeef0..2ce5fd48ca13a 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -161,6 +161,21 @@ code bases. - The ``-fexperimental-new-pass-manager`` and ``-fno-legacy-pass-manager`` flags have been removed. These have been no-ops since 15.0.0. +- As a side effect of implementing DR692/DR1395/DR1432, Clang now rejects some + overloaded function templates as ambiguous when one of the candidates has a + trailing parameter pack. + + .. code-block:: c++ + + template void g(T, T = T()); + template void g(T, U...); + void h() { + // This is rejected due to ambiguity between the pack and the + // default argument. Only parameters with arguments are considered during + // partial ordering of function templates. + g(42); + } + What's New in Clang |release|? ============================== Some of the major new features and improvements to Clang are listed @@ -274,6 +289,10 @@ Bug Fixes result in a stack overflow. `Issue 44304 `_ `Issue 50891 `_ +- Clang 14 predeclared some builtin POSIX library functions in ``gnu2x`` mode, + and Clang 15 accidentally stopped predeclaring those functions in that + language mode. Clang 16 now predeclares those functions again. This fixes + `Issue 56607 `_. Improvements to Clang's diagnostics ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -347,17 +366,23 @@ Improvements to Clang's diagnostics - Clang now correctly points to the problematic parameter for the ``-Wnonnull`` warning. This fixes `Issue 58273 `_. -- Introduced ``-Wcast-function-type-strict`` to warn about function type mismatches - in casts that may result in runtime indirect call `Control-Flow Integrity (CFI) - `_ failures. This diagnostic - is grouped under ``-Wcast-function-type`` as it identifies a more strict set of - potentially problematic function type casts. +- Introduced ``-Wcast-function-type-strict`` and + ``-Wincompatible-function-pointer-types-strict`` to warn about function type + mismatches in casts and assignments that may result in runtime indirect call + `Control-Flow Integrity (CFI) + `_ failures. The + ``-Wcast-function-type-strict`` diagnostic is grouped under + ``-Wcast-function-type`` as it identifies a more strict set of potentially + problematic function type casts. - Clang will now disambiguate NTTP types when printing diagnostic that contain NTTP types. Fixes `Issue 57562 `_. - Better error recovery for pack expansion of expressions. `Issue 58673 `_. - Better diagnostics when the user has missed `auto` in a declaration. `Issue 49129 `_. +- Clang now diagnoses use of invalid or reserved module names in a module + export declaration. Both are diagnosed as an error, but the diagnostic is + suppressed for use of reserved names in a system header. Non-comprehensive list of changes in this release ------------------------------------------------- @@ -547,14 +572,15 @@ C2x Feature Support C++ Language Changes in Clang ----------------------------- -- Implemented DR692, DR1395 and DR1432. Use the ``-fclang-abi-compat=15`` option - to get the old partial ordering behavior regarding packs. Note that the fix for - DR1432 is speculative that there is no wording or even resolution for this issue. - A speculative fix for DR1432 is needed because it fixes regressions caused by DR692. +- Implemented `DR692 `_, `DR1395 `_, + and `DR1432 `_. The fix for DR1432 is speculative since the + issue is still open and has no proposed resolution at this time. A speculative fix + for DR1432 is needed to prevent regressions that would otherwise occur due to DR692. - Clang's default C++/ObjC++ standard is now ``gnu++17`` instead of ``gnu++14``. This means Clang will by default accept code using features from C++17 and conforming GNU extensions. Projects incompatible with C++17 can add ``-std=gnu++14`` to their build settings to restore the previous behaviour. +- Implemented DR2358 allowing init captures in lambdas in default arguments. C++20 Feature Support ^^^^^^^^^^^^^^^^^^^^^ @@ -615,6 +641,9 @@ C++2b Feature Support CUDA/HIP Language Changes in Clang ---------------------------------- + - Allow the use of ``__noinline__`` as a keyword (instead of ``__attribute__((noinline))``) + in lambda declarations. + Objective-C Language Changes in Clang ------------------------------------- @@ -651,6 +680,7 @@ RISC-V Support in Clang ----------------------- - ``sifive-7-rv32`` and ``sifive-7-rv64`` are no longer supported for ``-mcpu``. Use ``sifive-e76``, ``sifive-s76``, or ``sifive-u74`` instead. +- Native detections via ``-mcpu=native`` and ``-mtune=native`` are supported. X86 Support in Clang -------------------- @@ -682,6 +712,7 @@ X86 Support in Clang * Support intrinsic of ``_mm(256)_cvtneobf16_ps``. * Support intrinsic of ``_mm(256)_cvtneoph_ps``. * Support intrinsic of ``_mm(256)_cvtneps_avx_pbh``. +- ``-march=raptorlake`` and ``-march=meteorlake`` are now supported. WebAssembly Support in Clang ---------------------------- @@ -714,6 +745,9 @@ Arm and AArch64 Support in Clang them, which it cannot. - Add driver and tuning support for Neoverse V2 via the flag ``-mcpu=neoverse-v2``. Native detection is also supported via ``-mcpu=native``. +- Support has been added for the following processors (-mcpu identifiers in parenthesis): + + * Arm Cortex-A715 (cortex-a715). Floating Point Support in Clang ------------------------------- diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 11bc5c9066111..9b03db9e0f742 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -2241,6 +2241,15 @@ usual build cycle when using sample profilers for optimization: $ clang++ -O2 -gline-tables-only -fprofile-sample-use=code.prof code.cc -o code + [OPTIONAL] Sampling-based profiles can have inaccuracies or missing block/ + edge counters. The profile inference algorithm (profi) can be used to infer + missing blocks and edge counts, and improve the quality of profile data. + Enable it with ``-fsample-profile-use-profi``. + + .. code-block:: console + + $ clang++ -O2 -gline-tables-only -fprofile-sample-use=code.prof \ + -fsample-profile-use-profi code.cc -o code Sample Profile Formats """""""""""""""""""""" diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h index c1c47d4575a6a..9b76c29d471e2 100644 --- a/clang/include/clang-c/Index.h +++ b/clang/include/clang-c/Index.h @@ -1978,7 +1978,11 @@ enum CXCursorKind { */ CXCursor_OMPParallelMaskedTaskLoopSimdDirective = 304, - CXCursor_LastStmt = CXCursor_OMPParallelMaskedTaskLoopSimdDirective, + /** OpenMP error directive. + */ + CXCursor_OMPErrorDirective = 305, + + CXCursor_LastStmt = CXCursor_OMPErrorDirective, /** * Cursor that represents the translation unit itself. diff --git a/clang/include/clang/AST/ASTUnresolvedSet.h b/clang/include/clang/AST/ASTUnresolvedSet.h index 8d2b23b3539a2..398ffb188c95b 100644 --- a/clang/include/clang/AST/ASTUnresolvedSet.h +++ b/clang/include/clang/AST/ASTUnresolvedSet.h @@ -69,7 +69,12 @@ class ASTUnresolvedSet { return false; } - void erase(unsigned I) { Decls[I] = Decls.pop_back_val(); } + void erase(unsigned I) { + if (I == Decls.size() - 1) + Decls.pop_back(); + else + Decls[I] = Decls.pop_back_val(); + } void clear() { Decls.clear(); } diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 9a67ee894e71e..496bf26c1c3fc 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -3168,6 +3168,10 @@ DEF_TRAVERSE_STMT(OMPParallelGenericLoopDirective, DEF_TRAVERSE_STMT(OMPTargetParallelGenericLoopDirective, { TRY_TO(TraverseOMPExecutableDirective(S)); }) + +DEF_TRAVERSE_STMT(OMPErrorDirective, + { TRY_TO(TraverseOMPExecutableDirective(S)); }) + // OpenMP clauses. template bool RecursiveASTVisitor::TraverseOMPClause(OMPClause *C) { diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h index 702a82537ab2d..baa5e0ed7b63f 100644 --- a/clang/include/clang/AST/StmtOpenMP.h +++ b/clang/include/clang/AST/StmtOpenMP.h @@ -6220,6 +6220,51 @@ class OMPTargetParallelGenericLoopDirective final : public OMPLoopDirective { return T->getStmtClass() == OMPTargetParallelGenericLoopDirectiveClass; } }; + +/// This represents '#pragma omp error' directive. +/// +/// \code +/// #pragma omp error +/// \endcode +class OMPErrorDirective final : public OMPExecutableDirective { + friend class ASTStmtReader; + friend class OMPExecutableDirective; + /// Build directive with the given start and end location. + /// + /// \param StartLoc Starting location of the directive kind. + /// \param EndLoc Ending location of the directive. + /// + OMPErrorDirective(SourceLocation StartLoc, SourceLocation EndLoc) + : OMPExecutableDirective(OMPErrorDirectiveClass, llvm::omp::OMPD_error, + StartLoc, EndLoc) {} + /// Build an empty directive. + /// + explicit OMPErrorDirective() + : OMPExecutableDirective(OMPErrorDirectiveClass, llvm::omp::OMPD_error, + SourceLocation(), SourceLocation()) {} + +public: + /// + /// \param C AST context. + /// \param StartLoc Starting location of the directive kind. + /// \param EndLoc Ending Location of the directive. + /// \param Clauses List of clauses. + /// + static OMPErrorDirective *Create(const ASTContext &C, SourceLocation StartLoc, + SourceLocation EndLoc, + ArrayRef Clauses); + + /// Creates an empty directive. + /// + /// \param C AST context. + /// + static OMPErrorDirective *CreateEmpty(const ASTContext &C, + unsigned NumClauses, EmptyShell); + + static bool classof(const Stmt *T) { + return T->getStmtClass() == OMPErrorDirectiveClass; + } +}; } // end namespace clang #endif diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h index efea46b4a0c5b..e362d79263ff2 100644 --- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h @@ -48,6 +48,13 @@ enum class SkipPast { ReferenceThenPointer, }; +/// Indicates the result of a tentative comparison. +enum class ComparisonResult { + Same, + Different, + Unknown, +}; + /// Holds the state of the program (store and heap) at a given program point. /// /// WARNING: Symbolic values that are created by the environment for static @@ -62,7 +69,11 @@ class Environment { public: virtual ~ValueModel() = default; - /// Returns true if and only if `Val1` is equivalent to `Val2`. + /// Returns: + /// `Same`: `Val1` is equivalent to `Val2`, according to the model. + /// `Different`: `Val1` is distinct from `Val2`, according to the model. + /// `Unknown`: The model can't determine a relationship between `Val1` and + /// `Val2`. /// /// Requirements: /// @@ -72,16 +83,16 @@ class Environment { /// /// `Val1` and `Val2` must be assigned to the same storage location in /// `Env1` and `Env2` respectively. - virtual bool compareEquivalent(QualType Type, const Value &Val1, - const Environment &Env1, const Value &Val2, - const Environment &Env2) { + virtual ComparisonResult compare(QualType Type, const Value &Val1, + const Environment &Env1, const Value &Val2, + const Environment &Env2) { // FIXME: Consider adding QualType to StructValue and removing the Type // argument here. // - // FIXME: default to a sound comparison and/or expand the comparison logic - // built into the framework to support broader forms of equivalence than - // strict pointer equality. - return true; + // FIXME: default to a sound comparison (`Unknown`) and/or expand the + // comparison logic built into the framework to support broader forms of + // equivalence than strict pointer equality. + return ComparisonResult::Same; } /// Modifies `MergedVal` to approximate both `Val1` and `Val2`. This could diff --git a/clang/include/clang/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.h b/clang/include/clang/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.h index 66aabb531a213..b053a10327c3f 100644 --- a/clang/include/clang/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.h +++ b/clang/include/clang/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.h @@ -54,9 +54,9 @@ class UncheckedOptionalAccessModel void transfer(const CFGElement *Elt, NoopLattice &L, Environment &Env); - bool compareEquivalent(QualType Type, const Value &Val1, - const Environment &Env1, const Value &Val2, - const Environment &Env2) override; + ComparisonResult compare(QualType Type, const Value &Val1, + const Environment &Env1, const Value &Val2, + const Environment &Env2) override; bool merge(QualType Type, const Value &Val1, const Environment &Env1, const Value &Val2, const Environment &Env2, Value &MergedVal, diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index 0137bd69b5285..fc65cd9778836 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -332,6 +332,10 @@ VALUE_CODEGENOPT(StackProbeSize , 32, 4096) ///< Overrides default stack VALUE_CODEGENOPT(WarnStackSize , 32, UINT_MAX) ///< Set via -fwarn-stack-size. CODEGENOPT(NoStackArgProbe, 1, 0) ///< Set when -mno-stack-arg-probe is used CODEGENOPT(DebugStrictDwarf, 1, 1) ///< Whether or not to use strict DWARF info. + +CODEGENOPT(EnableAssignmentTracking, 1,0) ///< Enable the Assignment Tracking + ///< debug info feature feature. + CODEGENOPT(DebugColumnInfo, 1, 0) ///< Whether or not to use column information ///< in debug info. diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 348a17afe4951..205043ad93548 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -8282,6 +8282,9 @@ def err_typecheck_convert_incompatible_function_pointer : Error< def ext_typecheck_convert_incompatible_function_pointer : ExtWarn< err_typecheck_convert_incompatible_function_pointer.Text>, InGroup, DefaultError; +def warn_typecheck_convert_incompatible_function_pointer_strict : Warning< + err_typecheck_convert_incompatible_function_pointer.Text>, + InGroup>, DefaultIgnore; def ext_typecheck_convert_discards_qualifiers : ExtWarn< "%select{%diff{assigning to $ from $|assigning to different types}0,1" "|%diff{passing $ to parameter of type $|" @@ -11282,6 +11285,8 @@ def err_private_module_fragment_not_module_interface : Error< "private module fragment in module implementation unit">; def note_not_module_interface_add_export : Note< "add 'export' here if this is intended to be a module interface unit">; +def err_invalid_module_name : Error< + "%0 is %select{an invalid|a reserved}1 name for a module">; def ext_equivalent_internal_linkage_decl_in_modules : ExtWarn< "ambiguous use of internal linkage declaration %0 defined in multiple modules">, diff --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h index 3e9669ced1009..c41ae41737898 100644 --- a/clang/include/clang/Basic/Module.h +++ b/clang/include/clang/Basic/Module.h @@ -159,7 +159,8 @@ class alignas(8) Module { /// eventually be exposed, for use in "private" modules. std::string ExportAsModule; - /// Does this Module scope describe part of the purview of a named C++ module? + /// Does this Module scope describe part of the purview of a standard named + /// C++ module? bool isModulePurview() const { return Kind == ModuleInterfaceUnit || Kind == ModulePartitionInterface || Kind == ModulePartitionImplementation || diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td index 70742b86dd527..8a1b6b792c0a5 100644 --- a/clang/include/clang/Basic/StmtNodes.td +++ b/clang/include/clang/Basic/StmtNodes.td @@ -299,3 +299,4 @@ def OMPTeamsGenericLoopDirective : StmtNode; def OMPTargetTeamsGenericLoopDirective : StmtNode; def OMPParallelGenericLoopDirective : StmtNode; def OMPTargetParallelGenericLoopDirective : StmtNode; +def OMPErrorDirective : StmtNode; diff --git a/clang/include/clang/Basic/arm_fp16.td b/clang/include/clang/Basic/arm_fp16.td index 79cd16233c104..cb2a09303e8e1 100644 --- a/clang/include/clang/Basic/arm_fp16.td +++ b/clang/include/clang/Basic/arm_fp16.td @@ -14,7 +14,7 @@ include "arm_neon_incl.td" // ARMv8.2-A FP16 intrinsics. -let ArchGuard = "defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) && defined(__aarch64__)" in { +let ArchGuard = "defined(__aarch64__)", TargetGuard = "fullfp16" in { // Negate def VNEGSH : SInst<"vneg", "11", "Sh">; diff --git a/clang/include/clang/Driver/Distro.h b/clang/include/clang/Driver/Distro.h index 01d66b30b0386..1aaf93ddb7c43 100644 --- a/clang/include/clang/Driver/Distro.h +++ b/clang/include/clang/Driver/Distro.h @@ -76,6 +76,7 @@ class Distro { UbuntuImpish, UbuntuJammy, UbuntuKinetic, + UbuntuLunar, UnknownDistro }; @@ -127,7 +128,7 @@ class Distro { } bool IsUbuntu() const { - return DistroVal >= UbuntuHardy && DistroVal <= UbuntuKinetic; + return DistroVal >= UbuntuHardy && DistroVal <= UbuntuLunar; } bool IsAlpineLinux() const { return DistroVal == AlpineLinux; } diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 605389307b9a9..b6c53d8acbc46 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1288,6 +1288,13 @@ def fprofile_sample_accurate : Flag<["-"], "fprofile-sample-accurate">, as cold. Otherwise, treat callsites without profile samples as if we have no profile}]>, MarshallingInfoFlag>; +def fsample_profile_use_profi : Flag<["-"], "fsample-profile-use-profi">, + Flags<[NoXarchOption, CC1Option]>, Group, + HelpText<"Use profi to infer block and edge counts">, + DocBrief<[{Infer block and edge counts. If the profiles have errors or missing + blocks caused by sampling, profile inference (profi) can convert + basic block counts to branch probabilites to fix them by extended + and re-engineered classic MCMF (min-cost max-flow) approach.}]>; def fno_profile_sample_accurate : Flag<["-"], "fno-profile-sample-accurate">, Group, Flags<[NoXarchOption]>; def fauto_profile : Flag<["-"], "fauto-profile">, Group, @@ -1795,7 +1802,7 @@ defm sanitize_address_globals_dead_stripping : BoolOption<"f", "sanitize-address NegFlag>, Group; defm sanitize_address_use_odr_indicator : BoolOption<"f", "sanitize-address-use-odr-indicator", - CodeGenOpts<"SanitizeAddressUseOdrIndicator">, DefaultFalse, + CodeGenOpts<"SanitizeAddressUseOdrIndicator">, DefaultTrue, PosFlag, NegFlag>, @@ -1925,11 +1932,11 @@ def fassociative_math : Flag<["-"], "fassociative-math">, Group; def fno_associative_math : Flag<["-"], "fno-associative-math">, Group; defm reciprocal_math : BoolFOption<"reciprocal-math", LangOpts<"AllowRecip">, DefaultFalse, - PosFlag, NegFlag>; defm approx_func : BoolFOption<"approx-func", LangOpts<"ApproxFunc">, DefaultFalse, - PosFlag, NegFlag>; @@ -1939,7 +1946,7 @@ defm finite_math_only : BoolFOption<"finite-math-only", NegFlag>; defm signed_zeros : BoolFOption<"signed-zeros", LangOpts<"NoSignedZero">, DefaultFalse, - NegFlag, PosFlag>; def fhonor_nans : Flag<["-"], "fhonor-nans">, Group; @@ -5636,15 +5643,6 @@ def mframe_pointer_EQ : Joined<["-"], "mframe-pointer=">, HelpText<"Specify which frame pointers to retain.">, Values<"all,non-leaf,none">, NormalizedValuesScope<"CodeGenOptions::FramePointerKind">, NormalizedValues<["All", "NonLeaf", "None"]>, MarshallingInfoEnum, "None">; -def menable_no_infinities : Flag<["-"], "menable-no-infs">, - HelpText<"Allow optimization to assume there are no infinities.">, - MarshallingInfoFlag>, ImpliedByAnyOf<[ffinite_math_only.KeyPath]>; -def menable_no_nans : Flag<["-"], "menable-no-nans">, - HelpText<"Allow optimization to assume there are no NaNs.">, - MarshallingInfoFlag>, ImpliedByAnyOf<[ffinite_math_only.KeyPath]>; -def mreassociate : Flag<["-"], "mreassociate">, - HelpText<"Allow reassociation transformations for floating-point instructions">, - MarshallingInfoFlag>, ImpliedByAnyOf<[funsafe_math_optimizations.KeyPath]>; def mabi_EQ_ieeelongdouble : Flag<["-"], "mabi=ieeelongdouble">, HelpText<"Use IEEE 754 quadruple-precision for long double">, MarshallingInfoFlag>; @@ -5830,6 +5828,11 @@ def fctor_dtor_return_this : Flag<["-"], "fctor-dtor-return-this">, } // let Flags = [CC1Option, NoDriverOption] +def fexperimental_assignment_tracking : + Flag<["-"], "fexperimental-assignment-tracking">, Group, + HelpText<"Enable assignment tracking debug info">, + MarshallingInfoFlag>; + //===----------------------------------------------------------------------===// // Dependency Output Options //===----------------------------------------------------------------------===// @@ -6256,6 +6259,16 @@ def split_dwarf_output : Separate<["-"], "split-dwarf-output">, let Flags = [CC1Option, FC1Option, NoDriverOption] in { +def mreassociate : Flag<["-"], "mreassociate">, + HelpText<"Allow reassociation transformations for floating-point instructions">, + MarshallingInfoFlag>, ImpliedByAnyOf<[funsafe_math_optimizations.KeyPath]>; +def menable_no_nans : Flag<["-"], "menable-no-nans">, + HelpText<"Allow optimization to assume there are no NaNs.">, + MarshallingInfoFlag>, ImpliedByAnyOf<[ffinite_math_only.KeyPath]>; +def menable_no_infinities : Flag<["-"], "menable-no-infs">, + HelpText<"Allow optimization to assume there are no infinities.">, + MarshallingInfoFlag>, ImpliedByAnyOf<[ffinite_math_only.KeyPath]>; + def pic_level : Separate<["-"], "pic-level">, HelpText<"Value for __PIC__">, MarshallingInfoInt>; diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h index 6981dc158d241..3cadb6304dced 100644 --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -402,10 +402,11 @@ struct FormatStyle { /// Alignment options struct TrailingCommentsAlignmentStyle { - /// Specifies the way to align trailing comments + /// Specifies the way to align trailing comments. TrailingCommentsAlignmentKinds Kind; - /// How many empty lines to apply alignment - /// With ``MaxEmptyLinesToKeep`` is 2 and ``OverEmptyLines`` is 2, + /// How many empty lines to apply alignment. + /// When both ``MaxEmptyLinesToKeep`` and ``OverEmptyLines`` are set to 2, + /// it formats like below. /// \code /// int a; // all these /// @@ -414,7 +415,9 @@ struct FormatStyle { /// /// int abcdef; // aligned /// \endcode - /// And with ``MaxEmptyLinesToKeep`` is 2 and ``OverEmptyLines`` is 1, + /// + /// When ``MaxEmptyLinesToKeep`` is set to 2 and ``OverEmptyLines`` is set + /// to 1, it formats like below. /// \code /// int a; // these are /// diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index ac03a427f419d..235db80047195 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -11682,6 +11682,10 @@ class Sema final { /// Called on well-formed '\#pragma omp taskyield'. StmtResult ActOnOpenMPTaskyieldDirective(SourceLocation StartLoc, SourceLocation EndLoc); + /// Called on well-formed '\#pragma omp error'. + StmtResult ActOnOpenMPErrorDirective(ArrayRef Clauses, + SourceLocation StartLoc, + SourceLocation EndLoc); /// Called on well-formed '\#pragma omp barrier'. StmtResult ActOnOpenMPBarrierDirective(SourceLocation StartLoc, SourceLocation EndLoc); @@ -12620,6 +12624,12 @@ class Sema final { /// extension. IncompatibleFunctionPointer, + /// IncompatibleFunctionPointerStrict - The assignment is between two + /// function pointer types that are not identical, but are compatible, + /// unless compiled with -fsanitize=cfi, in which case the type mismatch + /// may trip an indirect call runtime check. + IncompatibleFunctionPointerStrict, + /// IncompatiblePointerSign - The assignment is between two pointers types /// which point to integers which have a different sign, but are otherwise /// identical. This is a subset of the above, but broken out because it's by diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index a68b490c04803..7090ec86240dd 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -1937,6 +1937,7 @@ enum StmtCode { STMT_OMP_PARALLEL_SECTIONS_DIRECTIVE, STMT_OMP_TASK_DIRECTIVE, STMT_OMP_TASKYIELD_DIRECTIVE, + STMT_OMP_ERROR_DIRECTIVE, STMT_OMP_BARRIER_DIRECTIVE, STMT_OMP_TASKWAIT_DIRECTIVE, STMT_OMP_FLUSH_DIRECTIVE, diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h index cb929fc19bd21..09ee1744e8945 100644 --- a/clang/include/clang/Serialization/ASTWriter.h +++ b/clang/include/clang/Serialization/ASTWriter.h @@ -736,7 +736,7 @@ class ASTWriter : public ASTDeserializationListener, bool hasChain() const { return Chain; } ASTReader *getChain() const { return Chain; } - bool isWritingNamedModules() const { + bool isWritingStdCXXNamedModules() const { return WritingModule && WritingModule->isModulePurview(); } diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index c1878a49a81aa..683e54df5e332 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -12321,16 +12321,14 @@ static Decl *getCommonDecl(Decl *X, Decl *Y) { llvm_unreachable("Corrupt redecls chain"); } -template ::value, bool> = true> +template , bool> = true> T *getCommonDecl(T *X, T *Y) { return cast_or_null( getCommonDecl(const_cast(cast_or_null(X)), const_cast(cast_or_null(Y)))); } -template ::value, bool> = true> +template , bool> = true> T *getCommonDeclChecked(T *X, T *Y) { return cast(getCommonDecl(const_cast(cast(X)), const_cast(cast(Y)))); diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index b27bc051eb656..52536704d0d00 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -200,8 +200,8 @@ namespace clang { // cast the return value to `T`. template auto import(T *From) - -> std::conditional_t::value, - Expected, Expected> { + -> std::conditional_t, Expected, + Expected> { auto ToOrErr = Importer.Import(From); if (!ToOrErr) return ToOrErr.takeError(); diff --git a/clang/lib/AST/Comment.cpp b/clang/lib/AST/Comment.cpp index eaa235bbe6103..4cf3bb39c4e81 100644 --- a/clang/lib/AST/Comment.cpp +++ b/clang/lib/AST/Comment.cpp @@ -29,7 +29,7 @@ namespace comments { #undef ABSTRACT_COMMENT // DeclInfo is also allocated with a BumpPtrAllocator. -static_assert(std::is_trivially_destructible::value, +static_assert(std::is_trivially_destructible_v, "DeclInfo should be trivially destructible!"); const char *Comment::getCommentKindName() const { diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 3d967666cc914..eeee76612ac4e 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -188,8 +188,7 @@ static bool usesTypeVisibility(const NamedDecl *D) { /// Does the given declaration have member specialization information, /// and if so, is it an explicit specialization? template -static std::enable_if_t::value, - bool> +static std::enable_if_t, bool> isExplicitMemberSpecialization(const T *D) { if (const MemberSpecializationInfo *member = D->getMemberSpecializationInfo()) { diff --git a/clang/lib/AST/ExprClassification.cpp b/clang/lib/AST/ExprClassification.cpp index 12e62d95825ef..1a3cdc0e404a1 100644 --- a/clang/lib/AST/ExprClassification.cpp +++ b/clang/lib/AST/ExprClassification.cpp @@ -160,7 +160,6 @@ static Cl::Kinds ClassifyInternal(ASTContext &Ctx, const Expr *E) { case Expr::CXXPseudoDestructorExprClass: case Expr::UnaryExprOrTypeTraitExprClass: case Expr::CXXNewExprClass: - case Expr::CXXThisExprClass: case Expr::CXXNullPtrLiteralExprClass: case Expr::ImaginaryLiteralClass: case Expr::GNUNullExprClass: @@ -209,6 +208,10 @@ static Cl::Kinds ClassifyInternal(ASTContext &Ctx, const Expr *E) { case Expr::SYCLBuiltinBaseTypeExprClass: return Cl::CL_PRValue; + // Make HLSL this reference-like + case Expr::CXXThisExprClass: + return Lang.HLSL ? Cl::CL_LValue : Cl::CL_PRValue; + case Expr::ConstantExprClass: return ClassifyInternal(Ctx, cast(E)->getSubExpr()); diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp index 516f77cd3d602..91b9809861d8b 100644 --- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp +++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp @@ -160,9 +160,7 @@ bool ByteCodeExprGen::VisitIntegerLiteral(const IntegerLiteral *LE) { if (DiscardResult) return true; - if (Optional T = classify(LE->getType())) - return emitConst(*T, LE->getValue(), LE); - return this->bail(LE); + return this->emitConst(LE->getValue(), LE); } template @@ -190,67 +188,120 @@ bool ByteCodeExprGen::VisitBinaryOperator(const BinaryOperator *BO) { // Typecheck the args. Optional LT = classify(LHS->getType()); Optional RT = classify(RHS->getType()); - if (!LT || !RT) { + Optional T = classify(BO->getType()); + if (!LT || !RT || !T) { return this->bail(BO); } - if (Optional T = classify(BO->getType())) { - if (!visit(LHS)) + auto Discard = [this, T, BO](bool Result) { + if (!Result) return false; - if (!visit(RHS)) + return DiscardResult ? this->emitPop(*T, BO) : true; + }; + + // Pointer arithmetic special case. + if (BO->getOpcode() == BO_Add || BO->getOpcode() == BO_Sub) { + if (*T == PT_Ptr || (*LT == PT_Ptr && *RT == PT_Ptr)) + return this->VisitPointerArithBinOp(BO); + } + + if (!visit(LHS) || !visit(RHS)) + return false; + + switch (BO->getOpcode()) { + case BO_EQ: + return Discard(this->emitEQ(*LT, BO)); + case BO_NE: + return Discard(this->emitNE(*LT, BO)); + case BO_LT: + return Discard(this->emitLT(*LT, BO)); + case BO_LE: + return Discard(this->emitLE(*LT, BO)); + case BO_GT: + return Discard(this->emitGT(*LT, BO)); + case BO_GE: + return Discard(this->emitGE(*LT, BO)); + case BO_Sub: + return Discard(this->emitSub(*T, BO)); + case BO_Add: + return Discard(this->emitAdd(*T, BO)); + case BO_Mul: + return Discard(this->emitMul(*T, BO)); + case BO_Rem: + return Discard(this->emitRem(*T, BO)); + case BO_Div: + return Discard(this->emitDiv(*T, BO)); + case BO_Assign: + if (DiscardResult) + return this->emitStorePop(*T, BO); + return this->emitStore(*T, BO); + case BO_And: + return Discard(this->emitBitAnd(*T, BO)); + case BO_Or: + return Discard(this->emitBitOr(*T, BO)); + case BO_Shl: + return Discard(this->emitShl(*LT, *RT, BO)); + case BO_Shr: + return Discard(this->emitShr(*LT, *RT, BO)); + case BO_Xor: + return Discard(this->emitBitXor(*T, BO)); + case BO_LAnd: + case BO_LOr: + default: + return this->bail(BO); + } + + llvm_unreachable("Unhandled binary op"); +} + +/// Perform addition/subtraction of a pointer and an integer or +/// subtraction of two pointers. +template +bool ByteCodeExprGen::VisitPointerArithBinOp(const BinaryOperator *E) { + BinaryOperatorKind Op = E->getOpcode(); + const Expr *LHS = E->getLHS(); + const Expr *RHS = E->getRHS(); + + if ((Op != BO_Add && Op != BO_Sub) || + (!LHS->getType()->isPointerType() && !RHS->getType()->isPointerType())) + return false; + + Optional LT = classify(LHS); + Optional RT = classify(RHS); + + if (!LT || !RT) + return false; + + if (LHS->getType()->isPointerType() && RHS->getType()->isPointerType()) { + if (Op != BO_Sub) return false; - auto Discard = [this, T, BO](bool Result) { - if (!Result) - return false; - return DiscardResult ? this->emitPop(*T, BO) : true; - }; - - switch (BO->getOpcode()) { - case BO_EQ: - return Discard(this->emitEQ(*LT, BO)); - case BO_NE: - return Discard(this->emitNE(*LT, BO)); - case BO_LT: - return Discard(this->emitLT(*LT, BO)); - case BO_LE: - return Discard(this->emitLE(*LT, BO)); - case BO_GT: - return Discard(this->emitGT(*LT, BO)); - case BO_GE: - return Discard(this->emitGE(*LT, BO)); - case BO_Sub: - return Discard(this->emitSub(*T, BO)); - case BO_Add: - return Discard(this->emitAdd(*T, BO)); - case BO_Mul: - return Discard(this->emitMul(*T, BO)); - case BO_Rem: - return Discard(this->emitRem(*T, BO)); - case BO_Div: - return Discard(this->emitDiv(*T, BO)); - case BO_Assign: - if (!this->emitStore(*T, BO)) - return false; - return DiscardResult ? this->emitPopPtr(BO) : true; - case BO_And: - return Discard(this->emitBitAnd(*T, BO)); - case BO_Or: - return Discard(this->emitBitOr(*T, BO)); - case BO_Shl: - return Discard(this->emitShl(*LT, *RT, BO)); - case BO_Shr: - return Discard(this->emitShr(*LT, *RT, BO)); - case BO_Xor: - return Discard(this->emitBitXor(*T, BO)); - case BO_LAnd: - case BO_LOr: - default: - return this->bail(BO); - } + assert(E->getType()->isIntegerType()); + if (!visit(RHS) || !visit(LHS)) + return false; + + return this->emitSubPtr(classifyPrim(E->getType()), E); } - return this->bail(BO); + PrimType OffsetType; + if (LHS->getType()->isIntegerType()) { + if (!visit(RHS) || !visit(LHS)) + return false; + OffsetType = *LT; + } else if (RHS->getType()->isIntegerType()) { + if (!visit(LHS) || !visit(RHS)) + return false; + OffsetType = *RT; + } else { + return false; + } + + if (Op == BO_Add) + return this->emitAddOffset(OffsetType, E); + else if (Op == BO_Sub) + return this->emitSubOffset(OffsetType, E); + + return this->bail(E); } template @@ -266,26 +317,26 @@ bool ByteCodeExprGen::VisitArraySubscriptExpr( const ArraySubscriptExpr *E) { const Expr *Base = E->getBase(); const Expr *Index = E->getIdx(); + PrimType IndexT = classifyPrim(Index->getType()); // Take pointer of LHS, add offset from RHS, narrow result. // What's left on the stack after this is a pointer. - if (Optional IndexT = classify(Index->getType())) { - if (!this->visit(Base)) - return false; + if (!this->visit(Base)) + return false; - if (!this->visit(Index)) - return false; + if (!this->visit(Index)) + return false; - if (!this->emitAddOffset(*IndexT, E)) - return false; + if (!this->emitAddOffset(IndexT, E)) + return false; - if (!this->emitNarrowPtr(E)) - return false; + if (!this->emitNarrowPtr(E)) + return false; - return true; - } + if (DiscardResult) + return this->emitPopPtr(E); - return false; + return true; } template @@ -327,7 +378,7 @@ bool ByteCodeExprGen::VisitUnaryExprOrTypeTraitExpr( Size = Ctx.getASTContext().getTypeSizeInChars(ArgType); } - return this->emitConst(E, Size.getQuantity()); + return this->emitConst(Size.getQuantity(), E); } return false; @@ -367,9 +418,7 @@ bool ByteCodeExprGen::VisitArrayInitIndexExpr( // stand-alone, e.g. via EvaluateAsInt(). if (!ArrayIndex) return false; - QualType IndexType = E->getType(); - APInt Value(getIntWidth(IndexType), *ArrayIndex); - return this->emitConst(classifyPrim(IndexType), Value, E); + return this->emitConst(*ArrayIndex, E); } template @@ -417,7 +466,7 @@ bool ByteCodeExprGen::VisitStringLiteral(const StringLiteral *E) { template bool ByteCodeExprGen::VisitCharacterLiteral( const CharacterLiteral *E) { - return this->emitConst(E, E->getValue()); + return this->emitConst(E->getValue(), E); } template @@ -662,27 +711,27 @@ bool ByteCodeExprGen::dereferenceVar( } template -bool ByteCodeExprGen::emitConst(PrimType T, const APInt &Value, - const Expr *E) { - switch (T) { +template +bool ByteCodeExprGen::emitConst(T Value, const Expr *E) { + switch (classifyPrim(E->getType())) { case PT_Sint8: - return this->emitConstSint8(Value.getSExtValue(), E); + return this->emitConstSint8(Value, E); case PT_Uint8: - return this->emitConstUint8(Value.getZExtValue(), E); + return this->emitConstUint8(Value, E); case PT_Sint16: - return this->emitConstSint16(Value.getSExtValue(), E); + return this->emitConstSint16(Value, E); case PT_Uint16: - return this->emitConstUint16(Value.getZExtValue(), E); + return this->emitConstUint16(Value, E); case PT_Sint32: - return this->emitConstSint32(Value.getSExtValue(), E); + return this->emitConstSint32(Value, E); case PT_Uint32: - return this->emitConstUint32(Value.getZExtValue(), E); + return this->emitConstUint32(Value, E); case PT_Sint64: - return this->emitConstSint64(Value.getSExtValue(), E); + return this->emitConstSint64(Value, E); case PT_Uint64: - return this->emitConstUint64(Value.getZExtValue(), E); + return this->emitConstUint64(Value, E); case PT_Bool: - return this->emitConstBool(Value.getBoolValue(), E); + return this->emitConstBool(Value, E); case PT_Ptr: llvm_unreachable("Invalid integral type"); break; @@ -690,6 +739,13 @@ bool ByteCodeExprGen::emitConst(PrimType T, const APInt &Value, llvm_unreachable("unknown primitive type"); } +template +bool ByteCodeExprGen::emitConst(const APSInt &Value, const Expr *E) { + if (Value.isSigned()) + return this->emitConst(Value.getSExtValue(), E); + return this->emitConst(Value.getZExtValue(), E); +} + template unsigned ByteCodeExprGen::allocateLocalPrimitive(DeclTy &&Src, PrimType Ty, @@ -883,6 +939,9 @@ bool ByteCodeExprGen::visitRecordInitializer(const Expr *Initializer) { if (!this->emitInitField(*T, FieldToInit->Offset, Initializer)) return false; + + if (!this->emitPopPtr(Initializer)) + return false; } else { // Non-primitive case. Get a pointer to the field-to-initialize // on the stack and recurse into visitInitializer(). @@ -1142,7 +1201,7 @@ bool ByteCodeExprGen::VisitUnaryOperator(const UnaryOperator *E) { return this->emitIncPop(*T, E); this->emitLoad(*T, E); - this->emitConst(E, 1); + this->emitConst(1, E); this->emitAdd(*T, E); return this->emitStore(*T, E); } @@ -1155,7 +1214,7 @@ bool ByteCodeExprGen::VisitUnaryOperator(const UnaryOperator *E) { return this->emitDecPop(*T, E); this->emitLoad(*T, E); - this->emitConst(E, 1); + this->emitConst(1, E); this->emitSub(*T, E); return this->emitStore(*T, E); } @@ -1228,9 +1287,7 @@ bool ByteCodeExprGen::VisitDeclRefExpr(const DeclRefExpr *E) { return this->emitGetPtrParam(It->second, E); } } else if (const auto *ECD = dyn_cast(Decl)) { - PrimType T = *classify(ECD->getType()); - - return this->emitConst(T, ECD->getInitVal(), E); + return this->emitConst(ECD->getInitVal(), E); } return false; diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.h b/clang/lib/AST/Interp/ByteCodeExprGen.h index 4c7550aa497df..9b53065945173 100644 --- a/clang/lib/AST/Interp/ByteCodeExprGen.h +++ b/clang/lib/AST/Interp/ByteCodeExprGen.h @@ -64,6 +64,7 @@ class ByteCodeExprGen : public ConstStmtVisitor, bool>, bool VisitIntegerLiteral(const IntegerLiteral *E); bool VisitParenExpr(const ParenExpr *E); bool VisitBinaryOperator(const BinaryOperator *E); + bool VisitPointerArithBinOp(const BinaryOperator *E); bool VisitCXXDefaultArgExpr(const CXXDefaultArgExpr *E); bool VisitCallExpr(const CallExpr *E); bool VisitCXXMemberCallExpr(const CXXMemberCallExpr *E); @@ -228,16 +229,14 @@ class ByteCodeExprGen : public ConstStmtVisitor, bool>, DerefKind AK, llvm::function_ref Direct, llvm::function_ref Indirect); - /// Emits an APInt constant. - bool emitConst(PrimType T, const llvm::APInt &Value, const Expr *E); + /// Emits an APSInt constant. + bool emitConst(const APSInt &Value, const Expr *E); + bool emitConst(const APInt &Value, const Expr *E) { + return emitConst(static_cast(Value), E); + } /// Emits an integer constant. - template bool emitConst(const Expr *E, T Value) { - QualType Ty = E->getType(); - APInt WrappedValue(getIntWidth(Ty), static_cast(Value), - std::is_signed::value); - return emitConst(*Ctx.classify(Ty), WrappedValue, E); - } + template bool emitConst(T Value, const Expr *E); /// Emits the initialized pointer. bool emitInitFn() { diff --git a/clang/lib/AST/Interp/ByteCodeStmtGen.cpp b/clang/lib/AST/Interp/ByteCodeStmtGen.cpp index bbe4d04c8974b..a6aa8d88622a0 100644 --- a/clang/lib/AST/Interp/ByteCodeStmtGen.cpp +++ b/clang/lib/AST/Interp/ByteCodeStmtGen.cpp @@ -114,6 +114,9 @@ bool ByteCodeStmtGen::visitFunc(const FunctionDecl *F) { if (!this->emitInitField(*T, F->Offset, InitExpr)) return false; + + if (!this->emitPopPtr(InitExpr)) + return false; } else { // Non-primitive case. Get a pointer to the field-to-initialize // on the stack and call visitInitialzer() for it. @@ -234,12 +237,11 @@ bool ByteCodeStmtGen::visitReturnStmt(const ReturnStmt *RS) { this->emitCleanup(); return this->emitRetVoid(RS); } - } else { - this->emitCleanup(); - if (!this->emitRetVoid(RS)) - return false; - return true; } + + // Void return. + this->emitCleanup(); + return this->emitRetVoid(RS); } template diff --git a/clang/lib/AST/Interp/Context.h b/clang/lib/AST/Interp/Context.h index 96e93dbfc48b0..feb809b69bf39 100644 --- a/clang/lib/AST/Interp/Context.h +++ b/clang/lib/AST/Interp/Context.h @@ -18,7 +18,6 @@ #include "InterpStack.h" #include "clang/AST/APValue.h" -#include "llvm/ADT/PointerIntPair.h" namespace clang { class ASTContext; @@ -69,7 +68,6 @@ class Context final { /// Checks a result from the interpreter. bool Check(State &Parent, llvm::Expected &&R); -private: /// Current compilation context. ASTContext &Ctx; /// Interpreter stack, shared across invocations. diff --git a/clang/lib/AST/Interp/Descriptor.cpp b/clang/lib/AST/Interp/Descriptor.cpp index 55182ec383fa1..f645063acdd01 100644 --- a/clang/lib/AST/Interp/Descriptor.cpp +++ b/clang/lib/AST/Interp/Descriptor.cpp @@ -184,6 +184,7 @@ static BlockCtorFn getCtorArrayPrim(PrimType Type) { static BlockDtorFn getDtorArrayPrim(PrimType Type) { TYPE_SWITCH(Type, return dtorArrayTy); + llvm_unreachable("unknown Expr"); } static BlockMoveFn getMoveArrayPrim(PrimType Type) { diff --git a/clang/lib/AST/Interp/Descriptor.h b/clang/lib/AST/Interp/Descriptor.h index dacec6be89c74..b2f50815fe848 100644 --- a/clang/lib/AST/Interp/Descriptor.h +++ b/clang/lib/AST/Interp/Descriptor.h @@ -113,15 +113,15 @@ struct Descriptor final { const Expr *asExpr() const { return Source.dyn_cast(); } const ValueDecl *asValueDecl() const { - return dyn_cast_or_null(asDecl()); + return dyn_cast_if_present(asDecl()); } const FieldDecl *asFieldDecl() const { - return dyn_cast_or_null(asDecl()); + return dyn_cast_if_present(asDecl()); } const RecordDecl *asRecordDecl() const { - return dyn_cast_or_null(asDecl()); + return dyn_cast_if_present(asDecl()); } /// Returns the size of the object without metadata. diff --git a/clang/lib/AST/Interp/Disasm.cpp b/clang/lib/AST/Interp/Disasm.cpp index 82debe4fcae16..d31e879d516fb 100644 --- a/clang/lib/AST/Interp/Disasm.cpp +++ b/clang/lib/AST/Interp/Disasm.cpp @@ -22,7 +22,7 @@ using namespace clang; using namespace clang::interp; template inline T ReadArg(Program &P, CodePtr &OpPC) { - if constexpr (std::is_pointer::value) { + if constexpr (std::is_pointer_v) { uint32_t ID = OpPC.read(); return reinterpret_cast(P.getNativePointer(ID)); } else { diff --git a/clang/lib/AST/Interp/Interp.cpp b/clang/lib/AST/Interp/Interp.cpp index a5984a21efb19..b22756a803459 100644 --- a/clang/lib/AST/Interp/Interp.cpp +++ b/clang/lib/AST/Interp/Interp.cpp @@ -201,8 +201,8 @@ bool CheckArray(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { bool CheckLive(InterpState &S, CodePtr OpPC, const Pointer &Ptr, AccessKinds AK) { - const auto &Src = S.Current->getSource(OpPC); if (Ptr.isZero()) { + const auto &Src = S.Current->getSource(OpPC); if (Ptr.isField()) S.FFDiag(Src, diag::note_constexpr_null_subobject) << CSK_Field; @@ -213,6 +213,7 @@ bool CheckLive(InterpState &S, CodePtr OpPC, const Pointer &Ptr, } if (!Ptr.isLive()) { + const auto &Src = S.Current->getSource(OpPC); bool IsTemp = Ptr.isTemporary(); S.FFDiag(Src, diag::note_constexpr_lifetime_ended, 1) << AK << !IsTemp; diff --git a/clang/lib/AST/Interp/Interp.h b/clang/lib/AST/Interp/Interp.h index 23d7b600e1adb..a90e1246311d8 100644 --- a/clang/lib/AST/Interp/Interp.h +++ b/clang/lib/AST/Interp/Interp.h @@ -112,8 +112,6 @@ bool CheckDivRem(InterpState &S, CodePtr OpPC, const T &LHS, const T &RHS) { return true; } -template inline bool IsTrue(const T &V) { return !V.isZero(); } - /// Interpreter entry point. bool Interpret(InterpState &S, APValue &Result); @@ -466,6 +464,16 @@ inline bool CmpHelperEQ(InterpState &S, CodePtr OpPC, CompareFn Fn) { } else { unsigned VL = LHS.getByteOffset(); unsigned VR = RHS.getByteOffset(); + + // In our Pointer class, a pointer to an array and a pointer to the first + // element in the same array are NOT equal. They have the same Base value, + // but a different Offset. This is a pretty rare case, so we fix this here + // by comparing pointers to the first elements. + if (LHS.inArray() && LHS.isRoot()) + VL = LHS.atIndex(0).getByteOffset(); + if (RHS.inArray() && RHS.isRoot()) + VR = RHS.atIndex(0).getByteOffset(); + S.Stk.push(BoolT::from(Fn(Compare(VL, VR)))); return true; } @@ -725,12 +733,12 @@ bool InitThisFieldActive(InterpState &S, CodePtr OpPC, uint32_t I) { } /// 1) Pops the value from the stack -/// 2) Pops a pointer from the stack +/// 2) Peeks a pointer from the stack /// 3) Pushes the value to field I of the pointer on the stack template ::T> bool InitField(InterpState &S, CodePtr OpPC, uint32_t I) { const T &Value = S.Stk.pop(); - const Pointer &Field = S.Stk.pop().atField(I); + const Pointer &Field = S.Stk.peek().atField(I); Field.deref() = Value; Field.activate(); Field.initialize(); @@ -991,23 +999,25 @@ template bool OffsetHelper(InterpState &S, CodePtr OpPC) { // Fetch the pointer and the offset. const T &Offset = S.Stk.pop(); const Pointer &Ptr = S.Stk.pop(); - if (!CheckNull(S, OpPC, Ptr, CSK_ArrayIndex)) - return false; + if (!CheckRange(S, OpPC, Ptr, CSK_ArrayToPointer)) return false; - // Get a version of the index comparable to the type. - T Index = T::from(Ptr.getIndex(), Offset.bitWidth()); - // A zero offset does not change the pointer, but in the case of an array - // it has to be adjusted to point to the first element instead of the array. + // A zero offset does not change the pointer. if (Offset.isZero()) { - S.Stk.push(Index.isZero() ? Ptr.atIndex(0) : Ptr); + S.Stk.push(Ptr); return true; } + + if (!CheckNull(S, OpPC, Ptr, CSK_ArrayIndex)) + return false; + // Arrays of unknown bounds cannot have pointers into them. if (!CheckArray(S, OpPC, Ptr)) return false; + // Get a version of the index comparable to the type. + T Index = T::from(Ptr.getIndex(), Offset.bitWidth()); // Compute the largest index into the array. unsigned MaxIndex = Ptr.getNumElems(); @@ -1061,6 +1071,23 @@ bool SubOffset(InterpState &S, CodePtr OpPC) { return OffsetHelper(S, OpPC); } +/// 1) Pops a Pointer from the stack. +/// 2) Pops another Pointer from the stack. +/// 3) Pushes the different of the indices of the two pointers on the stack. +template ::T> +inline bool SubPtr(InterpState &S, CodePtr OpPC) { + const Pointer &LHS = S.Stk.pop(); + const Pointer &RHS = S.Stk.pop(); + + if (!Pointer::hasSameArray(LHS, RHS)) { + // TODO: Diagnose. + return false; + } + + T A = T::from(LHS.getIndex()); + T B = T::from(RHS.getIndex()); + return AddSubMulHelper(S, OpPC, A.bitWidth(), A, B); +} //===----------------------------------------------------------------------===// // Destroy diff --git a/clang/lib/AST/Interp/Opcodes.td b/clang/lib/AST/Interp/Opcodes.td index 9f938a6440ae9..ebb0f49bfe59f 100644 --- a/clang/lib/AST/Interp/Opcodes.td +++ b/clang/lib/AST/Interp/Opcodes.td @@ -390,6 +390,12 @@ def AddOffset : AluOpcode; // [Pointer, Integral] -> [Pointer] def SubOffset : AluOpcode; +// Pointer, Pointer] - [Integral] +def SubPtr : Opcode { + let Types = [IntegerTypeClass]; + let HasGroup = 1; +} + //===----------------------------------------------------------------------===// // Binary operators. //===----------------------------------------------------------------------===// diff --git a/clang/lib/AST/Interp/Pointer.cpp b/clang/lib/AST/Interp/Pointer.cpp index c7d9c3a5cd11b..b849acb52f0c1 100644 --- a/clang/lib/AST/Interp/Pointer.cpp +++ b/clang/lib/AST/Interp/Pointer.cpp @@ -202,5 +202,5 @@ bool Pointer::hasSameBase(const Pointer &A, const Pointer &B) { } bool Pointer::hasSameArray(const Pointer &A, const Pointer &B) { - return A.Base == B.Base && A.getFieldDesc()->IsArray; + return hasSameBase(A, B) && A.Base == B.Base && A.getFieldDesc()->IsArray; } diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp index e0a4221db7ecf..88bb517e2280b 100644 --- a/clang/lib/AST/StmtOpenMP.cpp +++ b/clang/lib/AST/StmtOpenMP.cpp @@ -744,6 +744,21 @@ OMPTaskyieldDirective *OMPTaskyieldDirective::CreateEmpty(const ASTContext &C, return new (C) OMPTaskyieldDirective(); } +OMPErrorDirective *OMPErrorDirective::Create(const ASTContext &C, + SourceLocation StartLoc, + SourceLocation EndLoc, + ArrayRef Clauses) { + return createDirective( + C, Clauses, /*AssociatedStmt=*/nullptr, /*NumChildren=*/0, StartLoc, + EndLoc); +} + +OMPErrorDirective *OMPErrorDirective::CreateEmpty(const ASTContext &C, + unsigned NumClauses, + EmptyShell) { + return createEmptyDirective(C, NumClauses); +} + OMPBarrierDirective *OMPBarrierDirective::Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc) { diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp index 10263a418305e..50f6e087fb292 100644 --- a/clang/lib/AST/StmtPrinter.cpp +++ b/clang/lib/AST/StmtPrinter.cpp @@ -843,6 +843,11 @@ void StmtPrinter::VisitOMPTaskwaitDirective(OMPTaskwaitDirective *Node) { PrintOMPExecutableDirective(Node); } +void StmtPrinter::VisitOMPErrorDirective(OMPErrorDirective *Node) { + Indent() << "#pragma omp error"; + PrintOMPExecutableDirective(Node); +} + void StmtPrinter::VisitOMPTaskgroupDirective(OMPTaskgroupDirective *Node) { Indent() << "#pragma omp taskgroup"; PrintOMPExecutableDirective(Node); diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index db04873128703..b15e9c2ac1a1a 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -1014,6 +1014,9 @@ void StmtProfiler::VisitOMPTaskwaitDirective(const OMPTaskwaitDirective *S) { VisitOMPExecutableDirective(S); } +void StmtProfiler::VisitOMPErrorDirective(const OMPErrorDirective *S) { + VisitOMPExecutableDirective(S); +} void StmtProfiler::VisitOMPTaskgroupDirective(const OMPTaskgroupDirective *S) { VisitOMPExecutableDirective(S); if (const Expr *E = S->getReductionRef()) diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp index bd961a2404888..759426adcef70 100644 --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -49,109 +49,103 @@ using namespace clang; namespace { - /// RAII object that enables printing of the ARC __strong lifetime - /// qualifier. - class IncludeStrongLifetimeRAII { - PrintingPolicy &Policy; - bool Old; - - public: - explicit IncludeStrongLifetimeRAII(PrintingPolicy &Policy) - : Policy(Policy), Old(Policy.SuppressStrongLifetime) { - if (!Policy.SuppressLifetimeQualifiers) - Policy.SuppressStrongLifetime = false; - } +/// RAII object that enables printing of the ARC __strong lifetime +/// qualifier. +class IncludeStrongLifetimeRAII { + PrintingPolicy &Policy; + bool Old; + +public: + explicit IncludeStrongLifetimeRAII(PrintingPolicy &Policy) + : Policy(Policy), Old(Policy.SuppressStrongLifetime) { + if (!Policy.SuppressLifetimeQualifiers) + Policy.SuppressStrongLifetime = false; + } - ~IncludeStrongLifetimeRAII() { - Policy.SuppressStrongLifetime = Old; - } - }; + ~IncludeStrongLifetimeRAII() { Policy.SuppressStrongLifetime = Old; } +}; - class ParamPolicyRAII { - PrintingPolicy &Policy; - bool Old; +class ParamPolicyRAII { + PrintingPolicy &Policy; + bool Old; - public: - explicit ParamPolicyRAII(PrintingPolicy &Policy) - : Policy(Policy), Old(Policy.SuppressSpecifiers) { - Policy.SuppressSpecifiers = false; - } +public: + explicit ParamPolicyRAII(PrintingPolicy &Policy) + : Policy(Policy), Old(Policy.SuppressSpecifiers) { + Policy.SuppressSpecifiers = false; + } - ~ParamPolicyRAII() { - Policy.SuppressSpecifiers = Old; - } - }; + ~ParamPolicyRAII() { Policy.SuppressSpecifiers = Old; } +}; - class DefaultTemplateArgsPolicyRAII { - PrintingPolicy &Policy; - bool Old; +class DefaultTemplateArgsPolicyRAII { + PrintingPolicy &Policy; + bool Old; - public: - explicit DefaultTemplateArgsPolicyRAII(PrintingPolicy &Policy) - : Policy(Policy), Old(Policy.SuppressDefaultTemplateArgs) { - Policy.SuppressDefaultTemplateArgs = false; - } +public: + explicit DefaultTemplateArgsPolicyRAII(PrintingPolicy &Policy) + : Policy(Policy), Old(Policy.SuppressDefaultTemplateArgs) { + Policy.SuppressDefaultTemplateArgs = false; + } - ~DefaultTemplateArgsPolicyRAII() { - Policy.SuppressDefaultTemplateArgs = Old; - } - }; - - class ElaboratedTypePolicyRAII { - PrintingPolicy &Policy; - bool SuppressTagKeyword; - bool SuppressScope; - - public: - explicit ElaboratedTypePolicyRAII(PrintingPolicy &Policy) : Policy(Policy) { - SuppressTagKeyword = Policy.SuppressTagKeyword; - SuppressScope = Policy.SuppressScope; - Policy.SuppressTagKeyword = true; - Policy.SuppressScope = true; - } + ~DefaultTemplateArgsPolicyRAII() { Policy.SuppressDefaultTemplateArgs = Old; } +}; - ~ElaboratedTypePolicyRAII() { - Policy.SuppressTagKeyword = SuppressTagKeyword; - Policy.SuppressScope = SuppressScope; - } - }; - - class TypePrinter { - PrintingPolicy Policy; - unsigned Indentation; - bool HasEmptyPlaceHolder = false; - bool InsideCCAttribute = false; - - public: - explicit TypePrinter(const PrintingPolicy &Policy, unsigned Indentation = 0) - : Policy(Policy), Indentation(Indentation) {} - - void print(const Type *ty, Qualifiers qs, raw_ostream &OS, - StringRef PlaceHolder); - void print(QualType T, raw_ostream &OS, StringRef PlaceHolder); - - static bool canPrefixQualifiers(const Type *T, bool &NeedARCStrongQualifier); - void spaceBeforePlaceHolder(raw_ostream &OS); - void printTypeSpec(NamedDecl *D, raw_ostream &OS); - void printTemplateId(const TemplateSpecializationType *T, raw_ostream &OS, - bool FullyQualify); - - void printBefore(QualType T, raw_ostream &OS); - void printAfter(QualType T, raw_ostream &OS); - void AppendScope(DeclContext *DC, raw_ostream &OS, - DeclarationName NameInScope); - void printTag(TagDecl *T, raw_ostream &OS); - void printFunctionAfter(const FunctionType::ExtInfo &Info, raw_ostream &OS); +class ElaboratedTypePolicyRAII { + PrintingPolicy &Policy; + bool SuppressTagKeyword; + bool SuppressScope; + +public: + explicit ElaboratedTypePolicyRAII(PrintingPolicy &Policy) : Policy(Policy) { + SuppressTagKeyword = Policy.SuppressTagKeyword; + SuppressScope = Policy.SuppressScope; + Policy.SuppressTagKeyword = true; + Policy.SuppressScope = true; + } + + ~ElaboratedTypePolicyRAII() { + Policy.SuppressTagKeyword = SuppressTagKeyword; + Policy.SuppressScope = SuppressScope; + } +}; + +class TypePrinter { + PrintingPolicy Policy; + unsigned Indentation; + bool HasEmptyPlaceHolder = false; + bool InsideCCAttribute = false; + +public: + explicit TypePrinter(const PrintingPolicy &Policy, unsigned Indentation = 0) + : Policy(Policy), Indentation(Indentation) {} + + void print(const Type *ty, Qualifiers qs, raw_ostream &OS, + StringRef PlaceHolder); + void print(QualType T, raw_ostream &OS, StringRef PlaceHolder); + + static bool canPrefixQualifiers(const Type *T, bool &NeedARCStrongQualifier); + void spaceBeforePlaceHolder(raw_ostream &OS); + void printTypeSpec(NamedDecl *D, raw_ostream &OS); + void printTemplateId(const TemplateSpecializationType *T, raw_ostream &OS, + bool FullyQualify); + + void printBefore(QualType T, raw_ostream &OS); + void printAfter(QualType T, raw_ostream &OS); + void AppendScope(DeclContext *DC, raw_ostream &OS, + DeclarationName NameInScope); + void printTag(TagDecl *T, raw_ostream &OS); + void printFunctionAfter(const FunctionType::ExtInfo &Info, raw_ostream &OS); #define ABSTRACT_TYPE(CLASS, PARENT) -#define TYPE(CLASS, PARENT) \ - void print##CLASS##Before(const CLASS##Type *T, raw_ostream &OS); \ - void print##CLASS##After(const CLASS##Type *T, raw_ostream &OS); +#define TYPE(CLASS, PARENT) \ + void print##CLASS##Before(const CLASS##Type *T, raw_ostream &OS); \ + void print##CLASS##After(const CLASS##Type *T, raw_ostream &OS); #include "clang/AST/TypeNodes.inc" - private: - void printBefore(const Type *ty, Qualifiers qs, raw_ostream &OS); - void printAfter(const Type *ty, Qualifiers qs, raw_ostream &OS); - }; +private: + void printBefore(const Type *ty, Qualifiers qs, raw_ostream &OS); + void printAfter(const Type *ty, Qualifiers qs, raw_ostream &OS); +}; } // namespace diff --git a/clang/lib/Analysis/CFG.cpp b/clang/lib/Analysis/CFG.cpp index 20c6c68e44a07..458de974e46bf 100644 --- a/clang/lib/Analysis/CFG.cpp +++ b/clang/lib/Analysis/CFG.cpp @@ -727,9 +727,9 @@ class CFGBuilder { // hence strict duck-typing. template ::value || - std::is_base_of::value || - std::is_base_of::value>> + std::is_base_of_v || + std::is_base_of_v || + std::is_base_of_v>> void findConstructionContextsForArguments(CallLikeExpr *E) { for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { Expr *Arg = E->getArg(i); diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp index 0b098c43ba3d1..ab1241d95eea0 100644 --- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp @@ -295,8 +295,8 @@ bool Environment::equivalentTo(const Environment &Other, assert(It->second != nullptr); if (!areEquivalentValues(*Val, *It->second) && - !Model.compareEquivalent(Loc->getType(), *Val, *this, *It->second, - Other)) + Model.compare(Loc->getType(), *Val, *this, *It->second, Other) != + ComparisonResult::Same) return false; } diff --git a/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp b/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp index 1ffd88697f3a7..1a41cfaa5fa13 100644 --- a/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp +++ b/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp @@ -208,7 +208,7 @@ QualType stripReference(QualType Type) { } /// Returns true if and only if `Type` is an optional type. -bool IsOptionalType(QualType Type) { +bool isOptionalType(QualType Type) { if (!Type->isRecordType()) return false; // FIXME: Optimize this by avoiding the `getQualifiedNameAsString` call. @@ -222,7 +222,7 @@ bool IsOptionalType(QualType Type) { /// For example, if `Type` is `optional>`, the result of this /// function will be 2. int countOptionalWrappers(const ASTContext &ASTCtx, QualType Type) { - if (!IsOptionalType(Type)) + if (!isOptionalType(Type)) return 0; return 1 + countOptionalWrappers( ASTCtx, @@ -720,12 +720,14 @@ void UncheckedOptionalAccessModel::transfer(const CFGElement *Elt, TransferMatchSwitch(*Elt, getASTContext(), State); } -bool UncheckedOptionalAccessModel::compareEquivalent(QualType Type, - const Value &Val1, - const Environment &Env1, - const Value &Val2, - const Environment &Env2) { - return isNonEmptyOptional(Val1, Env1) == isNonEmptyOptional(Val2, Env2); +ComparisonResult UncheckedOptionalAccessModel::compare( + QualType Type, const Value &Val1, const Environment &Env1, + const Value &Val2, const Environment &Env2) { + if (!isOptionalType(Type)) + return ComparisonResult::Unknown; + return isNonEmptyOptional(Val1, Env1) == isNonEmptyOptional(Val2, Env2) + ? ComparisonResult::Same + : ComparisonResult::Different; } bool UncheckedOptionalAccessModel::merge(QualType Type, const Value &Val1, @@ -734,7 +736,7 @@ bool UncheckedOptionalAccessModel::merge(QualType Type, const Value &Val1, const Environment &Env2, Value &MergedVal, Environment &MergedEnv) { - if (!IsOptionalType(Type)) + if (!isOptionalType(Type)) return true; auto &HasValueVal = MergedEnv.makeAtomicBoolValue(); diff --git a/clang/lib/Analysis/RetainSummaryManager.cpp b/clang/lib/Analysis/RetainSummaryManager.cpp index 5e9c73534aeba..143c037dda9f6 100644 --- a/clang/lib/Analysis/RetainSummaryManager.cpp +++ b/clang/lib/Analysis/RetainSummaryManager.cpp @@ -32,7 +32,7 @@ constexpr static bool isOneOf() { /// rest of varargs. template constexpr static bool isOneOf() { - return std::is_same::value || isOneOf(); + return std::is_same_v || isOneOf(); } namespace { diff --git a/clang/lib/Basic/CMakeLists.txt b/clang/lib/Basic/CMakeLists.txt index 5d197f59ac4f7..f0f3839a7e2c3 100644 --- a/clang/lib/Basic/CMakeLists.txt +++ b/clang/lib/Basic/CMakeLists.txt @@ -111,3 +111,7 @@ add_clang_library(clangBasic omp_gen ) +target_link_libraries(clangBasic + PRIVATE + ${LLVM_ATOMIC_LIB} +) diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index 2f2e6537ebd3e..7d74ec8701569 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -763,6 +763,7 @@ void clang::getOpenMPCaptureRegions( case OMPD_allocate: case OMPD_taskyield: case OMPD_barrier: + case OMPD_error: case OMPD_taskwait: case OMPD_cancellation_point: case OMPD_cancel: diff --git a/clang/lib/Basic/SourceLocation.cpp b/clang/lib/Basic/SourceLocation.cpp index 6e5e55fb09cef..f9ecd52e5f27b 100644 --- a/clang/lib/Basic/SourceLocation.cpp +++ b/clang/lib/Basic/SourceLocation.cpp @@ -42,11 +42,11 @@ void PrettyStackTraceLoc::print(raw_ostream &OS) const { // SourceLocation //===----------------------------------------------------------------------===// -static_assert(std::is_trivially_destructible::value, +static_assert(std::is_trivially_destructible_v, "SourceLocation must be trivially destructible because it is " "used in unions"); -static_assert(std::is_trivially_destructible::value, +static_assert(std::is_trivially_destructible_v, "SourceRange must be trivially destructible because it is " "used in unions"); diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp index f2db186aac4cb..c38849058e13d 100644 --- a/clang/lib/Basic/Targets/ARM.cpp +++ b/clang/lib/Basic/Targets/ARM.cpp @@ -705,8 +705,11 @@ void ARMTargetInfo::getTargetDefines(const LangOptions &Opts, // For bare-metal none-eabi. if (getTriple().getOS() == llvm::Triple::UnknownOS && (getTriple().getEnvironment() == llvm::Triple::EABI || - getTriple().getEnvironment() == llvm::Triple::EABIHF)) + getTriple().getEnvironment() == llvm::Triple::EABIHF)) { Builder.defineMacro("__ELF__"); + if (Opts.CPlusPlus) + Builder.defineMacro("_GNU_SOURCE"); + } // Target properties. Builder.defineMacro("__REGISTER_PREFIX__", ""); diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp index 08da01602599f..f6db5b8aaf45d 100644 --- a/clang/lib/Basic/Targets/RISCV.cpp +++ b/clang/lib/Basic/Targets/RISCV.cpp @@ -252,10 +252,12 @@ RISCVTargetInfo::getVScaleRange(const LangOptions &LangOpts) const { return std::pair( LangOpts.VScaleMin ? LangOpts.VScaleMin : 1, LangOpts.VScaleMax); - if (unsigned MinVLen = ISAInfo->getMinVLen()) { + if (unsigned MinVLen = ISAInfo->getMinVLen(); + MinVLen >= llvm::RISCV::RVVBitsPerBlock) { unsigned MaxVLen = ISAInfo->getMaxVLen(); // RISCV::RVVBitsPerBlock is 64. - return std::pair(MinVLen/64, MaxVLen/64); + return std::make_pair(MinVLen / llvm::RISCV::RVVBitsPerBlock, + MaxVLen / llvm::RISCV::RVVBitsPerBlock); } return None; diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index 2d3f3d10c5716..a33a6f06c0182 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -524,6 +524,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, case CK_Tigerlake: case CK_SapphireRapids: case CK_Alderlake: + case CK_Raptorlake: + case CK_Meteorlake: // FIXME: Historically, we defined this legacy name, it would be nice to // remove it at some point. We've never exposed fine-grained names for // recent primary x86 CPUs, and we should keep it that way. @@ -1194,6 +1196,7 @@ bool X86TargetInfo::validateCpuIs(StringRef FeatureStr) const { #define X86_VENDOR(ENUM, STRING) .Case(STRING, true) #define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) .Case(ALIAS, true) #define X86_CPU_TYPE(ENUM, STR) .Case(STR, true) +#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS) .Case(ALIAS, true) #define X86_CPU_SUBTYPE(ENUM, STR) .Case(STR, true) #include "llvm/Support/X86TargetParser.def" .Default(false); @@ -1408,6 +1411,8 @@ Optional X86TargetInfo::getCPUCacheLineSize() const { case CK_Rocketlake: case CK_IcelakeServer: case CK_Alderlake: + case CK_Raptorlake: + case CK_Meteorlake: case CK_KNL: case CK_KNM: // K7 diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index 71ab946018584..d4e6097f152fd 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -241,12 +241,16 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { bool checkCFProtectionReturnSupported(DiagnosticsEngine &Diags) const override { - return true; + if (CPU == llvm::X86::CK_None || CPU >= llvm::X86::CK_PentiumPro) + return true; + return TargetInfo::checkCFProtectionReturnSupported(Diags); }; bool checkCFProtectionBranchSupported(DiagnosticsEngine &Diags) const override { - return true; + if (CPU == llvm::X86::CK_None || CPU >= llvm::X86::CK_PentiumPro) + return true; + return TargetInfo::checkCFProtectionBranchSupported(Diags); }; virtual bool validateOperandSize(const llvm::StringMap &FeatureMap, diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 91dd0ec306ad5..f4d126fd7134e 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -12941,6 +12941,8 @@ Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) { .Case(ALIAS, {1u, static_cast(llvm::X86::ENUM)}) #define X86_CPU_TYPE(ENUM, STR) \ .Case(STR, {1u, static_cast(llvm::X86::ENUM)}) +#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS) \ + .Case(ALIAS, {2u, static_cast(llvm::X86::ENUM)}) #define X86_CPU_SUBTYPE(ENUM, STR) \ .Case(STR, {2u, static_cast(llvm::X86::ENUM)}) #include "llvm/Support/X86TargetParser.def" @@ -21465,7 +21467,7 @@ RValue CodeGenFunction::EmitIntelFPGAMemBuiltin(const CallExpr *E) { llvm::Value *Ann = EmitAnnotationCall(F, PtrVal, AnnotStr, SourceLocation()); - cast(Ann)->addFnAttr(llvm::Attribute::ReadNone); + cast(Ann)->setDoesNotAccessMemory(); return RValue::get(Ann); } diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index adf2834325296..796ce34397afd 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -2128,6 +2128,15 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, // The NoBuiltinAttr attached to the target FunctionDecl. const NoBuiltinAttr *NBA = nullptr; + // Some ABIs may result in additional accesses to arguments that may + // otherwise not be present. + auto AddPotentialArgAccess = [&]() { + llvm::Attribute A = FuncAttrs.getAttribute(llvm::Attribute::Memory); + if (A.isValid()) + FuncAttrs.addMemoryAttr(A.getMemoryEffects() | + llvm::MemoryEffects::argMemOnly()); + }; + // Collect function IR attributes based on declaration-specific // information. // FIXME: handle sseregparm someday... @@ -2174,18 +2183,18 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, // 'const', 'pure' and 'noalias' attributed functions are also nounwind. if (TargetDecl->hasAttr()) { - FuncAttrs.addAttribute(llvm::Attribute::ReadNone); + FuncAttrs.addMemoryAttr(llvm::MemoryEffects::none()); FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); // gcc specifies that 'const' functions have greater restrictions than // 'pure' functions, so they also cannot have infinite loops. FuncAttrs.addAttribute(llvm::Attribute::WillReturn); } else if (TargetDecl->hasAttr()) { - FuncAttrs.addAttribute(llvm::Attribute::ReadOnly); + FuncAttrs.addMemoryAttr(llvm::MemoryEffects::readOnly()); FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); // gcc specifies that 'pure' functions cannot have infinite loops. FuncAttrs.addAttribute(llvm::Attribute::WillReturn); } else if (TargetDecl->hasAttr()) { - FuncAttrs.addAttribute(llvm::Attribute::ArgMemOnly); + FuncAttrs.addMemoryAttr(llvm::MemoryEffects::argMemOnly()); FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); } if (TargetDecl->hasAttr()) @@ -2363,8 +2372,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, case ABIArgInfo::InAlloca: case ABIArgInfo::Indirect: { // inalloca and sret disable readnone and readonly - FuncAttrs.removeAttribute(llvm::Attribute::ReadOnly) - .removeAttribute(llvm::Attribute::ReadNone); + AddPotentialArgAccess(); break; } @@ -2534,9 +2542,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, Attrs.addAlignmentAttr(Align.getQuantity()); // byval disables readnone and readonly. - FuncAttrs.removeAttribute(llvm::Attribute::ReadOnly) - .removeAttribute(llvm::Attribute::ReadNone); - + AddPotentialArgAccess(); break; } case ABIArgInfo::IndirectAliased: { @@ -2552,8 +2558,7 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, case ABIArgInfo::InAlloca: // inalloca disables readnone and readonly. - FuncAttrs.removeAttribute(llvm::Attribute::ReadOnly) - .removeAttribute(llvm::Attribute::ReadNone); + AddPotentialArgAccess(); continue; } diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 7a0b07bbe0c92..95551329ba6b1 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -1384,6 +1384,8 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) { return EmitOMPArraySectionExpr(cast(E)); case Expr::ExtVectorElementExprClass: return EmitExtVectorElementExpr(cast(E)); + case Expr::CXXThisExprClass: + return MakeAddrLValue(LoadCXXThisAddress(), E->getType()); case Expr::MemberExprClass: return EmitMemberExpr(cast(E)); case Expr::CompoundLiteralExprClass: @@ -5061,8 +5063,7 @@ static CGCallee EmitDirectCallee(CodeGenFunction &CGF, GlobalDecl GD) { std::string NoBuiltinFD = ("no-builtin-" + FD->getName()).str(); std::string NoBuiltins = "no-builtins"; - auto *A = FD->getAttr(); - StringRef Ident = A ? A->getLabel() : FD->getName(); + StringRef Ident = CGF.CGM.getMangledName(GD); std::string FDInlineName = (Ident + ".inline").str(); bool IsPredefinedLibFunction = diff --git a/clang/lib/CodeGen/CGObjCMac.cpp b/clang/lib/CodeGen/CGObjCMac.cpp index c3b8e6e8afa91..92eadb39e6ded 100644 --- a/clang/lib/CodeGen/CGObjCMac.cpp +++ b/clang/lib/CodeGen/CGObjCMac.cpp @@ -737,14 +737,17 @@ class ObjCNonFragileABITypesHelper : public ObjCCommonTypesHelper { // Also it is safe to make it readnone, since we never load or store the // classref except by calling this function. llvm::Type *params[] = { Int8PtrPtrTy }; + llvm::LLVMContext &C = CGM.getLLVMContext(); + llvm::AttributeSet AS = llvm::AttributeSet::get(C, { + llvm::Attribute::get(C, llvm::Attribute::NonLazyBind), + llvm::Attribute::getWithMemoryEffects(C, llvm::MemoryEffects::none()), + llvm::Attribute::get(C, llvm::Attribute::NoUnwind), + }); llvm::FunctionCallee F = CGM.CreateRuntimeFunction( llvm::FunctionType::get(ClassnfABIPtrTy, params, false), "objc_loadClassref", llvm::AttributeList::get(CGM.getLLVMContext(), - llvm::AttributeList::FunctionIndex, - {llvm::Attribute::NonLazyBind, - llvm::Attribute::ReadNone, - llvm::Attribute::NoUnwind})); + llvm::AttributeList::FunctionIndex, AS)); if (!CGM.getTriple().isOSBinFormatCOFF()) cast(F.getCallee())->setLinkage( llvm::Function::ExternalWeakLinkage); diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 122006f667ed2..b87e69b641a63 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1858,7 +1858,7 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, auto EntryInfo = getTargetEntryUniqueInfo(CGM.getContext(), Loc, VD->getName()); SmallString<128> Buffer, Out; - EntryInfo.getTargetRegionEntryFnName(Buffer); + OffloadEntriesInfoManager.getTargetRegionEntryFnName(Buffer, EntryInfo); const Expr *Init = VD->getAnyInitializer(); if (CGM.getLangOpts().CPlusPlus && PerformInit) { @@ -2950,194 +2950,57 @@ enum KmpTaskTFields { }; } // anonymous namespace -void CGOpenMPRuntime::createOffloadEntry( - llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, - llvm::GlobalValue::LinkageTypes Linkage) { - OMPBuilder.emitOffloadingEntry(ID, Addr->getName(), Size, Flags); -} - void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { - // Emit the offloading entries and metadata so that the device codegen side - // can easily figure out what to emit. The produced metadata looks like - // this: - // - // !omp_offload.info = !{!1, ...} - // - // Right now we only generate metadata for function that contain target - // regions. - // If we are in simd mode or there are no entries, we don't need to do // anything. if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) return; - llvm::Module &M = CGM.getModule(); - llvm::LLVMContext &C = M.getContext(); - SmallVector< - std::tuple, - 16> - OrderedEntries(OffloadEntriesInfoManager.size()); - llvm::SmallVector ParentFunctions( - OffloadEntriesInfoManager.size()); - - // Auxiliary methods to create metadata values and strings. - auto &&GetMDInt = [this](unsigned V) { - return llvm::ConstantAsMetadata::get( - llvm::ConstantInt::get(CGM.Int32Ty, V)); - }; - - auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; - - // Create the offloading info metadata node. - llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); - - // Create function that emits metadata for each target region entry; - auto &&TargetRegionMetadataEmitter = - [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, - &GetMDString]( - const llvm::TargetRegionEntryInfo &EntryInfo, - const llvm::OffloadEntriesInfoManager::OffloadEntryInfoTargetRegion - &E) { - // Generate metadata for target regions. Each entry of this metadata - // contains: - // - Entry 0 -> Kind of this type of metadata (0). - // - Entry 1 -> Device ID of the file where the entry was identified. - // - Entry 2 -> File ID of the file where the entry was identified. - // - Entry 3 -> Mangled name of the function where the entry was - // identified. - // - Entry 4 -> Line in the file where the entry was identified. - // - Entry 5 -> Order the entry was created. - // The first element of the metadata node is the kind. - llvm::Metadata *Ops[] = { - GetMDInt(E.getKind()), GetMDInt(EntryInfo.DeviceID), - GetMDInt(EntryInfo.FileID), GetMDString(EntryInfo.ParentName), - GetMDInt(EntryInfo.Line), GetMDInt(E.getOrder())}; - - SourceLocation Loc; - for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), - E = CGM.getContext().getSourceManager().fileinfo_end(); - I != E; ++I) { - if (I->getFirst()->getUniqueID().getDevice() == EntryInfo.DeviceID && - I->getFirst()->getUniqueID().getFile() == EntryInfo.FileID) { - Loc = CGM.getContext().getSourceManager().translateFileLineCol( - I->getFirst(), EntryInfo.Line, 1); - break; - } - } - // Save this entry in the right position of the ordered entries array. - OrderedEntries[E.getOrder()] = - std::make_tuple(&E, Loc, StringRef(EntryInfo.ParentName)); - ParentFunctions[E.getOrder()] = StringRef(EntryInfo.ParentName); - - // Add metadata to the named metadata node. - MD->addOperand(llvm::MDNode::get(C, Ops)); - }; - - OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( - TargetRegionMetadataEmitter); - - // Create function that emits metadata for each device global variable entry; - auto &&DeviceGlobalVarMetadataEmitter = - [&C, &OrderedEntries, &GetMDInt, &GetMDString, MD]( - StringRef MangledName, - const llvm::OffloadEntriesInfoManager::OffloadEntryInfoDeviceGlobalVar - &E) { - // Generate metadata for global variables. Each entry of this metadata - // contains: - // - Entry 0 -> Kind of this type of metadata (1). - // - Entry 1 -> Mangled name of the variable. - // - Entry 2 -> Declare target kind. - // - Entry 3 -> Order the entry was created. - // The first element of the metadata node is the kind. - llvm::Metadata *Ops[] = { - GetMDInt(E.getKind()), GetMDString(MangledName), - GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; - - // Save this entry in the right position of the ordered entries array. - OrderedEntries[E.getOrder()] = - std::make_tuple(&E, SourceLocation(), MangledName); - - // Add metadata to the named metadata node. - MD->addOperand(llvm::MDNode::get(C, Ops)); - }; - - OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( - DeviceGlobalVarMetadataEmitter); - - for (const auto &E : OrderedEntries) { - assert(std::get<0>(E) && "All ordered entries must exist!"); - if (const auto *CE = dyn_cast< - llvm::OffloadEntriesInfoManager::OffloadEntryInfoTargetRegion>( - std::get<0>(E))) { - if (!CE->getID() || !CE->getAddress()) { - // Do not blame the entry if the parent funtion is not emitted. - StringRef FnName = ParentFunctions[CE->getOrder()]; - if (!CGM.GetGlobalValue(FnName)) - continue; - unsigned DiagID = CGM.getDiags().getCustomDiagID( - DiagnosticsEngine::Error, - "Offloading entry for target region in %0 is incorrect: either the " - "address or the ID is invalid."); - CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; - continue; - } - createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, - CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); - } else if (const auto *CE = dyn_cast( - std::get<0>(E))) { - llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags = - static_cast< - llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind>( - CE->getFlags()); - switch (Flags) { - case llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo: { - if (CGM.getLangOpts().OpenMPIsDevice && - CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) - continue; - if (!CE->getAddress()) { - unsigned DiagID = CGM.getDiags().getCustomDiagID( - DiagnosticsEngine::Error, "Offloading entry for declare target " - "variable %0 is incorrect: the " - "address is invalid."); - CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); - continue; - } - // The vaiable has no definition - no need to add the entry. - if (CE->getVarSize() == 0) - continue; - break; - } - case llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink: - assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || - (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && - "Declaret target link address is set."); - if (CGM.getLangOpts().OpenMPIsDevice) - continue; - if (!CE->getAddress()) { - unsigned DiagID = CGM.getDiags().getCustomDiagID( - DiagnosticsEngine::Error, - "Offloading entry for declare target variable is incorrect: the " - "address is invalid."); - CGM.getDiags().Report(DiagID); - continue; + llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn = + [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind, + const llvm::TargetRegionEntryInfo &EntryInfo) -> void { + SourceLocation Loc; + if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) { + for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), + E = CGM.getContext().getSourceManager().fileinfo_end(); + I != E; ++I) { + if (I->getFirst()->getUniqueID().getDevice() == EntryInfo.DeviceID && + I->getFirst()->getUniqueID().getFile() == EntryInfo.FileID) { + Loc = CGM.getContext().getSourceManager().translateFileLineCol( + I->getFirst(), EntryInfo.Line, 1); + break; } - break; } - - // Hidden or internal symbols on the device are not externally visible. We - // should not attempt to register them by creating an offloading entry. - if (auto *GV = dyn_cast(CE->getAddress())) - if (GV->hasLocalLinkage() || GV->hasHiddenVisibility()) - continue; - - createOffloadEntry(CE->getAddress(), CE->getAddress(), CE->getVarSize(), - Flags, CE->getLinkage()); - } else { - llvm_unreachable("Unsupported entry kind."); } - } + switch (Kind) { + case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: { + unsigned DiagID = CGM.getDiags().getCustomDiagID( + DiagnosticsEngine::Error, "Offloading entry for target region in " + "%0 is incorrect: either the " + "address or the ID is invalid."); + CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName; + } break; + case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: { + unsigned DiagID = CGM.getDiags().getCustomDiagID( + DiagnosticsEngine::Error, "Offloading entry for declare target " + "variable %0 is incorrect: the " + "address is invalid."); + CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName; + } break; + case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: { + unsigned DiagID = CGM.getDiags().getCustomDiagID( + DiagnosticsEngine::Error, + "Offloading entry for declare target variable is incorrect: the " + "address is invalid."); + CGM.getDiags().Report(DiagID); + } break; + } + }; + + OMPBuilder.createOffloadEntriesAndInfoMetadata( + OffloadEntriesInfoManager, isTargetCodegen(), + CGM.getLangOpts().OpenMPIsDevice, + CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory(), ErrorReportFn); } /// Loads all the offload entries information from the host IR @@ -3171,52 +3034,7 @@ void CGOpenMPRuntime::loadOffloadInfoMetadata() { return; } - llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); - if (!MD) - return; - - for (llvm::MDNode *MN : MD->operands()) { - auto &&GetMDInt = [MN](unsigned Idx) { - auto *V = cast(MN->getOperand(Idx)); - return cast(V->getValue())->getZExtValue(); - }; - - auto &&GetMDString = [MN](unsigned Idx) { - auto *V = cast(MN->getOperand(Idx)); - return V->getString(); - }; - - switch (GetMDInt(0)) { - default: - llvm_unreachable("Unexpected metadata!"); - break; - case llvm::OffloadEntriesInfoManager::OffloadEntryInfo:: - OffloadingEntryInfoTargetRegion: { - assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " - "only required for the " - "device code generation."); - llvm::TargetRegionEntryInfo EntryInfo(/*ParentName=*/GetMDString(3), - /*DeviceID=*/GetMDInt(1), - /*FileID=*/GetMDInt(2), - /*Line=*/GetMDInt(4)); - OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( - EntryInfo, /*Order=*/GetMDInt(5)); - break; - } - case llvm::OffloadEntriesInfoManager::OffloadEntryInfo:: - OffloadingEntryInfoDeviceGlobalVar: - assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " - "only required for the " - "device code generation."); - OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( - /*MangledName=*/GetMDString(1), - static_cast< - llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind>( - /*Flags=*/GetMDInt(2)), - /*Order=*/GetMDInt(3)); - break; - } - } + OMPBuilder.loadOffloadInfoMetadata(*ME.get(), OffloadEntriesInfoManager); } void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { @@ -6283,18 +6101,20 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( // Create a unique name for the entry function using the source location // information of the current target region. The name will be something like: // - // __omp_offloading_DD_FFFF_PP_lBB + // __omp_offloading_DD_FFFF_PP_lBB[_CC] // // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the // mangled name of the function that encloses the target region and BB is the - // line number of the target region. + // line number of the target region. CC is a count added when more than one + // region is located at the same location. const bool BuildOutlinedFn = CGM.getLangOpts().OpenMPIsDevice || !CGM.getLangOpts().OpenMPOffloadMandatory; auto EntryInfo = getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), ParentName); + SmallString<64> EntryFnName; - EntryInfo.getTargetRegionEntryFnName(EntryFnName); + OffloadEntriesInfoManager.getTargetRegionEntryFnName(EntryFnName, EntryInfo); const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); @@ -8919,7 +8739,7 @@ class MappableExprsHandler { // If this declaration appears in a is_device_ptr clause we just have to // pass the pointer by value. If it is a reference to a declaration, we just // pass its value. - if (VD && DevPointersMap.count(VD)) { + if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) { CombinedInfo.Exprs.push_back(VD); CombinedInfo.BasePointers.emplace_back(Arg, VD); CombinedInfo.Pointers.push_back(Arg); @@ -8932,30 +8752,6 @@ class MappableExprsHandler { CombinedInfo.Mappers.push_back(nullptr); return; } - if (VD && HasDevAddrsMap.count(VD)) { - auto I = HasDevAddrsMap.find(VD); - CombinedInfo.Exprs.push_back(VD); - Expr *E = nullptr; - for (auto &MCL : I->second) { - E = MCL.begin()->getAssociatedExpression(); - break; - } - llvm::Value *Ptr = nullptr; - if (E->isGLValue()) - Ptr = CGF.EmitLValue(E).getPointer(CGF); - else - Ptr = CGF.EmitScalarExpr(E); - CombinedInfo.BasePointers.emplace_back(Ptr, VD); - CombinedInfo.Pointers.push_back(Ptr); - CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( - CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, - /*isSigned=*/true)); - CombinedInfo.Types.push_back( - (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | - OMP_MAP_TARGET_PARAM); - CombinedInfo.Mappers.push_back(nullptr); - return; - } using MapData = std::tuple(Addr); - if (!Fn) - return; - - llvm::Module &M = CGM.getModule(); - llvm::LLVMContext &Ctx = CGM.getLLVMContext(); - - // Get "nvvm.annotations" metadata node. - llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations"); - - llvm::Metadata *MDVals[] = { - llvm::ConstantAsMetadata::get(Fn), llvm::MDString::get(Ctx, "kernel"), - llvm::ConstantAsMetadata::get( - llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))}; - // Append metadata to nvvm.annotations. - MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); - - // Add a function attribute for the kernel. - Fn->addFnAttr(llvm::Attribute::get(Ctx, "kernel")); -} - void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h index 9e8130966735a..214f5e3d618aa 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h @@ -64,12 +64,6 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime { // Base class overrides. // - /// Creates offloading entry for the provided entry ID \a ID, - /// address \a Addr, size \a Size, and flags \a Flags. - void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr, - uint64_t Size, int32_t Flags, - llvm::GlobalValue::LinkageTypes Linkage) override; - /// Emit outlined function specialized for the Fork-Join /// programming model for applicable target directives on the NVPTX device. /// \param D Directive to emit. @@ -169,6 +163,8 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime { explicit CGOpenMPRuntimeGPU(CodeGenModule &CGM); void clear() override; + bool isTargetCodegen() const override { return true; }; + /// Declare generalized virtual functions which need to be defined /// by all specializations of OpenMPGPURuntime Targets like AMDGCN /// and NVPTX. diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 30c955b3d43fd..9531b855780c7 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -254,6 +254,9 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef Attrs) { case Stmt::OMPTaskyieldDirectiveClass: EmitOMPTaskyieldDirective(cast(*S)); break; + case Stmt::OMPErrorDirectiveClass: + EmitOMPErrorDirective(cast(*S)); + break; case Stmt::OMPBarrierDirectiveClass: EmitOMPBarrierDirective(cast(*S)); break; diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 40d84d754f9d3..4619be474b463 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1347,6 +1347,7 @@ void CodeGenFunction::EmitOMPReductionClauseInit( case OMPD_parallel_for_simd: case OMPD_task: case OMPD_taskyield: + case OMPD_error: case OMPD_barrier: case OMPD_taskwait: case OMPD_taskgroup: @@ -5244,6 +5245,10 @@ void CodeGenFunction::EmitOMPTaskyieldDirective( CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getBeginLoc()); } +void CodeGenFunction::EmitOMPErrorDirective(const OMPErrorDirective &S) { + llvm_unreachable("CodeGen for 'omp error' is not supported yet."); +} + void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier); } diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp index 26234f026f1f8..bfd0a03ba29a1 100644 --- a/clang/lib/CodeGen/CodeGenAction.cpp +++ b/clang/lib/CodeGen/CodeGenAction.cpp @@ -1128,6 +1128,8 @@ CodeGenAction::loadModule(MemoryBufferRef MBRef) { CompilerInstance &CI = getCompilerInstance(); SourceManager &SM = CI.getSourceManager(); + VMContext->setOpaquePointers(CI.getCodeGenOpts().OpaquePointers); + // For ThinLTO backend invocations, ensure that the context // merges types based on ODR identifiers. We also need to read // the correct module out of a multi-module bitcode file. diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index f6acb008831f6..6259aeae1bc05 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3520,6 +3520,7 @@ class CodeGenFunction : public CodeGenTypeCache { void EmitOMPParallelMasterDirective(const OMPParallelMasterDirective &S); void EmitOMPTaskDirective(const OMPTaskDirective &S); void EmitOMPTaskyieldDirective(const OMPTaskyieldDirective &S); + void EmitOMPErrorDirective(const OMPErrorDirective &S); void EmitOMPBarrierDirective(const OMPBarrierDirective &S); void EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S); void EmitOMPTaskgroupDirective(const OMPTaskgroupDirective &S); diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index 44bae154c33fb..705b3d2363ae3 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -1325,8 +1325,9 @@ static llvm::FunctionCallee getItaniumDynamicCastFn(CodeGenFunction &CGF) { llvm::FunctionType *FTy = llvm::FunctionType::get(Int8PtrTy, Args, false); // Mark the function as nounwind readonly. - llvm::Attribute::AttrKind FuncAttrs[] = { llvm::Attribute::NoUnwind, - llvm::Attribute::ReadOnly }; + llvm::AttrBuilder FuncAttrs(CGF.getLLVMContext()); + FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); + FuncAttrs.addMemoryAttr(llvm::MemoryEffects::readOnly()); llvm::AttributeList Attrs = llvm::AttributeList::get( CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex, FuncAttrs); diff --git a/clang/lib/Driver/Distro.cpp b/clang/lib/Driver/Distro.cpp index 1898667279cc3..87a0c5a585115 100644 --- a/clang/lib/Driver/Distro.cpp +++ b/clang/lib/Driver/Distro.cpp @@ -92,6 +92,7 @@ static Distro::DistroType DetectLsbRelease(llvm::vfs::FileSystem &VFS) { .Case("impish", Distro::UbuntuImpish) .Case("jammy", Distro::UbuntuJammy) .Case("kinetic", Distro::UbuntuKinetic) + .Case("lunar", Distro::UbuntuLunar) .Default(Distro::UnknownDistro); return Version; } diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index c10c91b335c76..206aa34b14175 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -1894,6 +1894,11 @@ bool Driver::getCrashDiagnosticFile(StringRef ReproCrashFilename, return false; } +static const char BugReporMsg[] = + "\n********************\n\n" + "PLEASE ATTACH THE FOLLOWING FILES TO THE BUG REPORT:\n" + "Preprocessed source(s) and associated run script(s) are located at:"; + // When clang crashes, produce diagnostic information including the fully // preprocessed source file(s). Request that the developer attach the // diagnostic information to a bug report. @@ -1949,6 +1954,29 @@ void Driver::generateCompilationDiagnostics( // Suppress tool output. C.initCompilationForDiagnostics(); + // If lld failed, rerun it again with --reproduce. + if (IsLLD) { + const char *TmpName = CreateTempFile(C, "linker-crash", "tar"); + Command NewLLDInvocation = Cmd; + llvm::opt::ArgStringList ArgList = NewLLDInvocation.getArguments(); + StringRef ReproduceOption = + C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment() + ? "/reproduce:" + : "--reproduce="; + ArgList.push_back(Saver.save(Twine(ReproduceOption) + TmpName).data()); + NewLLDInvocation.replaceArguments(std::move(ArgList)); + + // Redirect stdout/stderr to /dev/null. + NewLLDInvocation.Execute({None, {""}, {""}}, nullptr, nullptr); + Diag(clang::diag::note_drv_command_failed_diag_msg) << BugReporMsg; + Diag(clang::diag::note_drv_command_failed_diag_msg) << TmpName; + Diag(clang::diag::note_drv_command_failed_diag_msg) + << "\n\n********************"; + if (Report) + Report->TemporaryFiles.push_back(TmpName); + return; + } + // Construct the list of inputs. InputList Inputs; BuildInputs(C.getDefaultToolChain(), C.getArgs(), Inputs); @@ -2026,22 +2054,6 @@ void Driver::generateCompilationDiagnostics( return; } - // If lld failed, rerun it again with --reproduce. - if (IsLLD) { - const char *TmpName = CreateTempFile(C, "linker-crash", "tar"); - Command NewLLDInvocation = Cmd; - llvm::opt::ArgStringList ArgList = NewLLDInvocation.getArguments(); - StringRef ReproduceOption = - C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment() - ? "/reproduce:" - : "--reproduce="; - ArgList.push_back(Saver.save(Twine(ReproduceOption) + TmpName).data()); - NewLLDInvocation.replaceArguments(std::move(ArgList)); - - // Redirect stdout/stderr to /dev/null. - NewLLDInvocation.Execute({None, {""}, {""}}, nullptr, nullptr); - } - const TempFileList &TempFiles = C.getTempFiles(); if (TempFiles.empty()) { Diag(clang::diag::note_drv_command_failed_diag_msg) @@ -2049,10 +2061,7 @@ void Driver::generateCompilationDiagnostics( return; } - Diag(clang::diag::note_drv_command_failed_diag_msg) - << "\n********************\n\n" - "PLEASE ATTACH THE FOLLOWING FILES TO THE BUG REPORT:\n" - "Preprocessed source(s) and associated run script(s) are located at:"; + Diag(clang::diag::note_drv_command_failed_diag_msg) << BugReporMsg; SmallString<128> VFS; SmallString<128> ReproCrashFilename; diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp index edbb7625aaf2f..a710081fa2d47 100644 --- a/clang/lib/Driver/SanitizerArgs.cpp +++ b/clang/lib/Driver/SanitizerArgs.cpp @@ -923,10 +923,14 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC, !TC.getTriple().isOSBinFormatELF() || TC.getTriple().isOSFuchsia() || TC.getTriple().isPS()); + // Enable ODR indicators which allow better handling of mixed instrumented + // and uninstrumented globals. Disable them for Windows where weak odr + // indicators (.weak.__odr_asan_gen*) may cause multiple definition linker + // errors in the absence of -lldmingw. AsanUseOdrIndicator = Args.hasFlag(options::OPT_fsanitize_address_use_odr_indicator, options::OPT_fno_sanitize_address_use_odr_indicator, - AsanUseOdrIndicator); + !TC.getTriple().isOSWindows()); if (AllAddedKinds & SanitizerKind::PointerCompare & ~AllRemove) { AsanInvalidPointerCmp = true; @@ -1236,8 +1240,8 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args, if (AsanGlobalsDeadStripping) CmdArgs.push_back("-fsanitize-address-globals-dead-stripping"); - if (AsanUseOdrIndicator) - CmdArgs.push_back("-fsanitize-address-use-odr-indicator"); + if (!AsanUseOdrIndicator) + CmdArgs.push_back("-fno-sanitize-address-use-odr-indicator"); if (AsanInvalidPointerCmp) { CmdArgs.push_back("-mllvm"); diff --git a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp index c845e69c14e84..081e8ff4a168f 100644 --- a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp +++ b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/Error.h" +#include "llvm/Support/Host.h" #include "llvm/Support/RISCVISAInfo.h" #include "llvm/Support/TargetParser.h" #include "llvm/Support/raw_ostream.h" @@ -48,16 +49,12 @@ static bool getArchFeatures(const Driver &D, StringRef Arch, } // Get features except standard extension feature -static void getRISCFeaturesFromMcpu(const Driver &D, const llvm::Triple &Triple, - const llvm::opt::ArgList &Args, - const llvm::opt::Arg *A, StringRef Mcpu, +static bool getRISCFeaturesFromMcpu(const llvm::Triple &Triple, StringRef Mcpu, std::vector &Features) { bool Is64Bit = Triple.isRISCV64(); llvm::RISCV::CPUKind CPUKind = llvm::RISCV::parseCPUKind(Mcpu); - if (!llvm::RISCV::checkCPUKind(CPUKind, Is64Bit) || - !llvm::RISCV::getCPUFeaturesExceptStdExt(CPUKind, Features)) { - D.Diag(clang::diag::err_drv_clang_unsupported) << A->getAsString(Args); - } + return llvm::RISCV::checkCPUKind(CPUKind, Is64Bit) && + llvm::RISCV::getCPUFeaturesExceptStdExt(CPUKind, Features); } void riscv::getRISCVTargetFeatures(const Driver &D, const llvm::Triple &Triple, @@ -70,8 +67,14 @@ void riscv::getRISCVTargetFeatures(const Driver &D, const llvm::Triple &Triple, // If users give march and mcpu, get std extension feature from MArch // and other features (ex. mirco architecture feature) from mcpu - if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) - getRISCFeaturesFromMcpu(D, Triple, Args, A, A->getValue(), Features); + if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) { + StringRef CPU = A->getValue(); + if (CPU == "native") + CPU = llvm::sys::getHostCPUName(); + if (!getRISCFeaturesFromMcpu(Triple, CPU, Features)) + D.Diag(clang::diag::err_drv_unsupported_option_argument) + << A->getOption().getName() << CPU; + } // Handle features corresponding to "-ffixed-X" options if (Args.hasArg(options::OPT_ffixed_x1)) @@ -260,7 +263,10 @@ StringRef riscv::getRISCVArch(const llvm::opt::ArgList &Args, // 2. Get march (isa string) based on `-mcpu=` if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) { - StringRef MArch = llvm::RISCV::getMArchFromMcpu(A->getValue()); + StringRef CPU = A->getValue(); + if (CPU == "native") + CPU = llvm::sys::getHostCPUName(); + StringRef MArch = llvm::RISCV::getMArchFromMcpu(CPU); // Bypass if target cpu's default march is empty. if (MArch != "") return MArch; @@ -299,3 +305,20 @@ StringRef riscv::getRISCVArch(const llvm::opt::ArgList &Args, return "rv64imafdc"; } } + +std::string riscv::getRISCVTargetCPU(const llvm::opt::ArgList &Args, + const llvm::Triple &Triple) { + std::string CPU; + // If we have -mcpu, use that. + if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) + CPU = A->getValue(); + + // Handle CPU name is 'native'. + if (CPU == "native") + CPU = llvm::sys::getHostCPUName(); + + if (!CPU.empty()) + return CPU; + + return Triple.isRISCV64() ? "generic-rv64" : "generic-rv32"; +} diff --git a/clang/lib/Driver/ToolChains/Arch/RISCV.h b/clang/lib/Driver/ToolChains/Arch/RISCV.h index d4a519cdab340..c30f1098ddda5 100644 --- a/clang/lib/Driver/ToolChains/Arch/RISCV.h +++ b/clang/lib/Driver/ToolChains/Arch/RISCV.h @@ -26,6 +26,8 @@ StringRef getRISCVABI(const llvm::opt::ArgList &Args, const llvm::Triple &Triple); StringRef getRISCVArch(const llvm::opt::ArgList &Args, const llvm::Triple &Triple); +std::string getRISCVTargetCPU(const llvm::opt::ArgList &Args, + const llvm::Triple &Triple); } // end namespace riscv } // namespace tools } // end namespace driver diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 9a946fb9c259c..edd089620233d 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -2298,7 +2298,10 @@ void Clang::AddRISCVTargetArgs(const ArgList &Args, if (const Arg *A = Args.getLastArg(options::OPT_mtune_EQ)) { CmdArgs.push_back("-tune-cpu"); - CmdArgs.push_back(A->getValue()); + if (strcmp(A->getValue(), "native") == 0) + CmdArgs.push_back(Args.MakeArgString(llvm::sys::getHostCPUName())); + else + CmdArgs.push_back(A->getValue()); } } @@ -6281,6 +6284,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Args.AddLastArg(CmdArgs, options::OPT_fclang_abi_compat_EQ); + if (getLastProfileSampleUseArg(Args) && + Args.hasArg(options::OPT_fsample_profile_use_profi)) { + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-sample-profile-use-profi"); + } + // Add runtime flag for PS4/PS5 when PGO, coverage, or sanitizers are enabled. if (RawTriple.isPS() && !Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { @@ -7568,18 +7577,21 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, // Forward -Xclang arguments to -cc1, and -mllvm arguments to the LLVM option // parser. - // -finclude-default-header flag is for preprocessor, - // do not pass it to other cc1 commands when save-temps is enabled - if (C.getDriver().isSaveTempsEnabled() && - !isa(JA)) { - for (auto *Arg : Args.filtered(options::OPT_Xclang)) { - Arg->claim(); - if (StringRef(Arg->getValue()) != "-finclude-default-header") - CmdArgs.push_back(Arg->getValue()); + for (auto Arg : Args.filtered(options::OPT_Xclang)) { + Arg->claim(); + // -finclude-default-header flag is for preprocessor, + // do not pass it to other cc1 commands when save-temps is enabled + if (C.getDriver().isSaveTempsEnabled() && + !isa(JA)) { + if (StringRef(Arg->getValue()) == "-finclude-default-header") + continue; } - } - else { - Args.AddAllArgValues(CmdArgs, options::OPT_Xclang); + if (StringRef(Arg->getValue()) == "-fexperimental-assignment-tracking") { + // Add the llvm version of this flag too. + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-experimental-assignment-tracking"); + } + CmdArgs.push_back(Arg->getValue()); } for (const Arg *A : Args.filtered(options::OPT_mllvm)) { A->claim(); @@ -7806,15 +7818,29 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (SplitLTOUnit) CmdArgs.push_back("-fsplit-lto-unit"); - if (Arg *A = Args.getLastArg(options::OPT_fglobal_isel, - options::OPT_fno_global_isel)) { + A = Args.getLastArg(options::OPT_fglobal_isel, options::OPT_fno_global_isel); + // If a configuration is fully supported, we don't issue any warnings or + // remarks. + bool IsFullySupported = getToolChain().getTriple().isOSDarwin() && + Triple.getArch() == llvm::Triple::aarch64; + if (IsFullySupported) { + if (A && A->getOption().matches(options::OPT_fno_global_isel)) { + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-global-isel=0"); + } else { + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-global-isel=1"); + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-global-isel-abort=0"); + } + } else if (A) { CmdArgs.push_back("-mllvm"); if (A->getOption().matches(options::OPT_fglobal_isel)) { CmdArgs.push_back("-global-isel=1"); // GISel is on by default on AArch64 -O0, so don't bother adding // the fallback remarks for it. Other combinations will add a warning of - // some kind. + // some kind, unless we're on Darwin. bool IsArchSupported = Triple.getArch() == llvm::Triple::aarch64; bool IsOptLevelSupported = false; diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 3a332b2867b28..bde130f3ba3f6 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -12,6 +12,7 @@ #include "Arch/M68k.h" #include "Arch/Mips.h" #include "Arch/PPC.h" +#include "Arch/RISCV.h" #include "Arch/Sparc.h" #include "Arch/SystemZ.h" #include "Arch/VE.h" @@ -440,9 +441,7 @@ std::string tools::getCPUName(const Driver &D, const ArgList &Args, return "ck810"; case llvm::Triple::riscv32: case llvm::Triple::riscv64: - if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) - return A->getValue(); - return ""; + return riscv::getRISCVTargetCPU(Args, T); case llvm::Triple::bpfel: case llvm::Triple::bpfeb: diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index 39f459e9ef652..661764e6eb00b 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -381,10 +381,11 @@ void darwin::Linker::AddLinkArgs(Compilation &C, const ArgList &Args, D.Diag(diag::err_drv_bitcode_unsupported_on_toolchain); } - // If GlobalISel is enabled, pass it through to LLVM. - if (Arg *A = Args.getLastArg(options::OPT_fglobal_isel, - options::OPT_fno_global_isel)) { - if (A->getOption().matches(options::OPT_fglobal_isel)) { + // GlobalISel is enabled by default on AArch64 Darwin. + if (getToolChain().getArch() == llvm::Triple::aarch64) { + Arg *A = Args.getLastArg(options::OPT_fglobal_isel, + options::OPT_fno_global_isel); + if (!A || !A->getOption().matches(options::OPT_fno_global_isel)) { CmdArgs.push_back("-mllvm"); CmdArgs.push_back("-global-isel"); // Disable abort and fall back to SDAG silently. diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 6c6895da61299..43f6a82c33c49 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -83,6 +83,12 @@ void Flang::AddPicOptions(const ArgList &Args, ArgStringList &CmdArgs) const { static void addFloatingPointOptions(const Driver &D, const ArgList &Args, ArgStringList &CmdArgs) { StringRef FPContract; + bool HonorINFs = true; + bool HonorNaNs = true; + bool ApproxFunc = false; + bool SignedZeros = true; + bool AssociativeMath = false; + bool ReciprocalMath = false; if (const Arg *A = Args.getLastArg(options::OPT_ffp_contract)) { const StringRef Val = A->getValue(); @@ -101,8 +107,75 @@ static void addFloatingPointOptions(const Driver &D, const ArgList &Args, << A->getOption().getName() << Val; } + for (const Arg *A : Args) { + auto optId = A->getOption().getID(); + switch (optId) { + // if this isn't an FP option, skip the claim below + default: + continue; + + case options::OPT_fhonor_infinities: + HonorINFs = true; + break; + case options::OPT_fno_honor_infinities: + HonorINFs = false; + break; + case options::OPT_fhonor_nans: + HonorNaNs = true; + break; + case options::OPT_fno_honor_nans: + HonorNaNs = false; + break; + case options::OPT_fapprox_func: + ApproxFunc = true; + break; + case options::OPT_fno_approx_func: + ApproxFunc = false; + break; + case options::OPT_fsigned_zeros: + SignedZeros = true; + break; + case options::OPT_fno_signed_zeros: + SignedZeros = false; + break; + case options::OPT_fassociative_math: + AssociativeMath = true; + break; + case options::OPT_fno_associative_math: + AssociativeMath = false; + break; + case options::OPT_freciprocal_math: + ReciprocalMath = true; + break; + case options::OPT_fno_reciprocal_math: + ReciprocalMath = false; + break; + } + + // If we handled this option claim it + A->claim(); + } + if (!FPContract.empty()) CmdArgs.push_back(Args.MakeArgString("-ffp-contract=" + FPContract)); + + if (!HonorINFs) + CmdArgs.push_back("-menable-no-infs"); + + if (!HonorNaNs) + CmdArgs.push_back("-menable-no-nans"); + + if (ApproxFunc) + CmdArgs.push_back("-fapprox-func"); + + if (!SignedZeros) + CmdArgs.push_back("-fno-signed-zeros"); + + if (AssociativeMath && !SignedZeros) + CmdArgs.push_back("-mreassociate"); + + if (ReciprocalMath) + CmdArgs.push_back("-freciprocal-math"); } void Flang::ConstructJob(Compilation &C, const JobAction &JA, diff --git a/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp b/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp index 988ecd2defa9c..641f1ae812a58 100644 --- a/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp +++ b/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp @@ -559,6 +559,7 @@ void SymbolGraphSerializer::serializeRelationship(RelationshipKind Kind, Object Relationship; Relationship["source"] = Source.USR; Relationship["target"] = Target.USR; + Relationship["targetFallback"] = Target.Name; Relationship["kind"] = getRelationshipString(Kind); Relationships.emplace_back(std::move(Relationship)); diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index f1b5d184963ce..3fa3e6bcbb569 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -331,6 +331,15 @@ bool ContinuationIndenter::canBreak(const LineState &State) { if (Previous.is(tok::l_square) && Previous.is(TT_ObjCMethodExpr)) return false; + if (Current.is(TT_ConditionalExpr) && Previous.is(tok::r_paren) && + Previous.MatchingParen && Previous.MatchingParen->Previous && + Previous.MatchingParen->Previous->MatchingParen && + Previous.MatchingParen->Previous->MatchingParen->is(TT_LambdaLBrace)) { + // We have a lambda within a conditional expression, allow breaking here. + assert(Previous.MatchingParen->Previous->is(tok::r_brace)); + return true; + } + return !CurrentState.NoLineBreak; } diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 4901c5ce71066..dbfe88c531322 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -362,7 +362,8 @@ class AnnotatingParser { FormatToken *Next = CurrentToken->Next; if (PrevPrev && PrevPrev->is(tok::identifier) && Prev->isOneOf(tok::star, tok::amp, tok::ampamp) && - CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) { + CurrentToken->is(tok::identifier) && + !Next->isOneOf(tok::equal, tok::l_brace)) { Prev->setType(TT_BinaryOperator); LookForDecls = false; } @@ -1989,7 +1990,9 @@ class AnnotatingParser { } else if (Current.isOneOf(tok::identifier, tok::kw_const, tok::kw_noexcept, tok::kw_requires) && Current.Previous && - !Current.Previous->isOneOf(tok::equal, tok::at) && + !Current.Previous->isOneOf(tok::equal, tok::at, + TT_CtorInitializerComma, + TT_CtorInitializerColon) && Line.MightBeFunctionDecl && Contexts.size() == 1) { // Line.MightBeFunctionDecl can only be true after the parentheses of a // function declaration have been found. @@ -2385,6 +2388,12 @@ class AnnotatingParser { return TT_PointerOrReference; } + // if (Class* obj { function() }) + if (PrevToken->Tok.isAnyIdentifier() && NextToken->Tok.isAnyIdentifier() && + NextToken->Next && NextToken->Next->is(tok::l_brace)) { + return TT_PointerOrReference; + } + if (PrevToken->endsSequence(tok::r_square, tok::l_square, tok::kw_delete)) return TT_UnaryOperator; diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index 0372b89397db9..18ec0844db3d4 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -1108,12 +1108,10 @@ void UnwrappedLineParser::parsePPDirective() { parsePPIf(/*IfDef=*/true); break; case tok::pp_else: - parsePPElse(); - break; case tok::pp_elifdef: case tok::pp_elifndef: case tok::pp_elif: - parsePPElIf(); + parsePPElse(); break; case tok::pp_endif: parsePPEndIf(); @@ -1144,12 +1142,10 @@ void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { ++PPBranchLevel; assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { - // If the first branch is unreachable, set the BranchIndex to 1. This way - // the next branch will be parsed if there is one. - PPLevelBranchIndex.push_back(Unreachable ? 1 : 0); + PPLevelBranchIndex.push_back(0); PPLevelBranchCount.push_back(0); } - PPChainBranchIndex.push(0); + PPChainBranchIndex.push(Unreachable ? -1 : 0); bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; conditionalCompilationCondition(Unreachable || Skip); } @@ -1225,8 +1221,6 @@ void UnwrappedLineParser::parsePPElse() { ++PPBranchLevel; } -void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } - void UnwrappedLineParser::parsePPEndIf() { conditionalCompilationEnd(); parsePPUnknown(); diff --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h index b9b106bcc89a4..34f211c9ebb35 100644 --- a/clang/lib/Format/UnwrappedLineParser.h +++ b/clang/lib/Format/UnwrappedLineParser.h @@ -119,7 +119,6 @@ class UnwrappedLineParser { void parsePPDirective(); void parsePPDefine(); void parsePPIf(bool IfDef); - void parsePPElIf(); void parsePPElse(); void parsePPEndIf(); void parsePPPragma(); diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 44a8b1f758db9..92664cce304d4 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -199,8 +199,7 @@ static void denormalizeSimpleFlag(SmallVectorImpl &Args, } template static constexpr bool is_uint64_t_convertible() { - return !std::is_same::value && - llvm::is_integral_or_enum::value; + return !std::is_same_v && llvm::is_integral_or_enum::value; } template @@ -2262,7 +2262,7 @@ extern __inline __m128d #else #include_next -#endif /* defined(__ppc64__) && +#endif /* defined(__powerpc64__) && \ * (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */ #endif /* EMMINTRIN_H_ */ diff --git a/clang/lib/Headers/ppc_wrappers/mm_malloc.h b/clang/lib/Headers/ppc_wrappers/mm_malloc.h index 65920917f3bdc..7c1e625e44d51 100644 --- a/clang/lib/Headers/ppc_wrappers/mm_malloc.h +++ b/clang/lib/Headers/ppc_wrappers/mm_malloc.h @@ -10,7 +10,7 @@ #ifndef _MM_MALLOC_H_INCLUDED #define _MM_MALLOC_H_INCLUDED -#if defined(__ppc64__) && \ +#if defined(__powerpc64__) && \ (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) #include diff --git a/clang/lib/Headers/ppc_wrappers/mmintrin.h b/clang/lib/Headers/ppc_wrappers/mmintrin.h index 70e8b81e11ee6..0be3af2b0bd72 100644 --- a/clang/lib/Headers/ppc_wrappers/mmintrin.h +++ b/clang/lib/Headers/ppc_wrappers/mmintrin.h @@ -35,7 +35,7 @@ #ifndef _MMINTRIN_H_INCLUDED #define _MMINTRIN_H_INCLUDED -#if defined(__ppc64__) && \ +#if defined(__powerpc64__) && \ (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) #include @@ -1447,7 +1447,7 @@ extern __inline __m64 #else #include_next -#endif /* defined(__ppc64__) && +#endif /* defined(__powerpc64__) && \ * (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */ #endif /* _MMINTRIN_H_INCLUDED */ diff --git a/clang/lib/Headers/ppc_wrappers/pmmintrin.h b/clang/lib/Headers/ppc_wrappers/pmmintrin.h index fda39edbaa223..db128192abfb4 100644 --- a/clang/lib/Headers/ppc_wrappers/pmmintrin.h +++ b/clang/lib/Headers/ppc_wrappers/pmmintrin.h @@ -39,7 +39,7 @@ #ifndef PMMINTRIN_H_ #define PMMINTRIN_H_ -#if defined(__ppc64__) && \ +#if defined(__powerpc64__) && \ (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) /* We need definitions from the SSE2 and SSE header files*/ @@ -139,7 +139,7 @@ extern __inline __m128i #else #include_next -#endif /* defined(__ppc64__) && +#endif /* defined(__powerpc64__) && \ * (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */ #endif /* PMMINTRIN_H_ */ diff --git a/clang/lib/Headers/ppc_wrappers/smmintrin.h b/clang/lib/Headers/ppc_wrappers/smmintrin.h index 6fe6c8a93d9ba..6fe6d2a157a59 100644 --- a/clang/lib/Headers/ppc_wrappers/smmintrin.h +++ b/clang/lib/Headers/ppc_wrappers/smmintrin.h @@ -29,7 +29,7 @@ #ifndef SMMINTRIN_H_ #define SMMINTRIN_H_ -#if defined(__ppc64__) && \ +#if defined(__powerpc64__) && \ (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) #include @@ -657,7 +657,7 @@ extern __inline __m128i #else #include_next -#endif /* defined(__ppc64__) && +#endif /* defined(__powerpc64__) && \ * (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */ #endif /* SMMINTRIN_H_ */ diff --git a/clang/lib/Headers/ppc_wrappers/tmmintrin.h b/clang/lib/Headers/ppc_wrappers/tmmintrin.h index 6185ca1e7e710..92f08676d2dfa 100644 --- a/clang/lib/Headers/ppc_wrappers/tmmintrin.h +++ b/clang/lib/Headers/ppc_wrappers/tmmintrin.h @@ -25,7 +25,7 @@ #ifndef TMMINTRIN_H_ #define TMMINTRIN_H_ -#if defined(__ppc64__) && \ +#if defined(__powerpc64__) && \ (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) #include @@ -447,7 +447,7 @@ extern __inline __m64 #else #include_next -#endif /* defined(__ppc64__) && +#endif /* defined(__powerpc64__) && \ * (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */ #endif /* TMMINTRIN_H_ */ diff --git a/clang/lib/Headers/ppc_wrappers/xmmintrin.h b/clang/lib/Headers/ppc_wrappers/xmmintrin.h index ee0032ca159cb..9dd21b65c2f70 100644 --- a/clang/lib/Headers/ppc_wrappers/xmmintrin.h +++ b/clang/lib/Headers/ppc_wrappers/xmmintrin.h @@ -35,7 +35,7 @@ #ifndef XMMINTRIN_H_ #define XMMINTRIN_H_ -#if defined(__ppc64__) && \ +#if defined(__powerpc64__) && \ (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) /* Define four value permute mask */ @@ -1821,7 +1821,7 @@ extern __inline void #else #include_next -#endif /* defined(__ppc64__) && +#endif /* defined(__powerpc64__) && \ * (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */ #endif /* XMMINTRIN_H_ */ diff --git a/clang/lib/Lex/MacroArgs.cpp b/clang/lib/Lex/MacroArgs.cpp index 7ede00b4aa642..1a2fd665fbd09 100644 --- a/clang/lib/Lex/MacroArgs.cpp +++ b/clang/lib/Lex/MacroArgs.cpp @@ -62,7 +62,7 @@ MacroArgs *MacroArgs::create(const MacroInfo *MI, // Copy the actual unexpanded tokens to immediately after the result ptr. if (!UnexpArgTokens.empty()) { - static_assert(std::is_trivial::value, + static_assert(std::is_trivial_v, "assume trivial copyability if copying into the " "uninitialized array (as opposed to reusing a cached " "MacroArgs)"); @@ -94,7 +94,7 @@ MacroArgs *MacroArgs::deallocate() { // Run the dtor to deallocate the vectors. this->~MacroArgs(); // Release the memory for the object. - static_assert(std::is_trivially_destructible::value, + static_assert(std::is_trivially_destructible_v, "assume trivially destructible and forego destructors"); free(this); diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp index 6fcc782d7bbb2..5502d49791912 100644 --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -1447,6 +1447,11 @@ static bool isTargetEnvironment(const TargetInfo &TI, const IdentifierInfo *II) { std::string EnvName = (llvm::Twine("---") + II->getName().lower()).str(); llvm::Triple Env(EnvName); + // The unknown environment is matched only if + // '__is_target_environment(unknown)' is used. + if (Env.getEnvironment() == llvm::Triple::UnknownEnvironment && + EnvName != "---unknown") + return false; return TI.getTriple().getEnvironment() == Env.getEnvironment(); } diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp index e34bd8d7bca40..a768c4da504af 100644 --- a/clang/lib/Parse/ParseExprCXX.cpp +++ b/clang/lib/Parse/ParseExprCXX.cpp @@ -1291,7 +1291,22 @@ ExprResult Parser::ParseLambdaExpressionAfterIntroducer( if (getLangOpts().CUDA) { // In CUDA code, GNU attributes are allowed to appear immediately after the // "[...]", even if there is no "(...)" before the lambda body. - MaybeParseGNUAttributes(D); + // + // Note that we support __noinline__ as a keyword in this mode and thus + // it has to be separately handled. + while (true) { + if (Tok.is(tok::kw___noinline__)) { + IdentifierInfo *AttrName = Tok.getIdentifierInfo(); + SourceLocation AttrNameLoc = ConsumeToken(); + Attr.addNew(AttrName, AttrNameLoc, nullptr, AttrNameLoc, nullptr, 0, + ParsedAttr::AS_Keyword); + } else if (Tok.is(tok::kw___attribute)) + ParseGNUAttributes(Attr, nullptr, &D); + else + break; + } + + D.takeAttributes(Attr); } // Helper to emit a warning if we see a CUDA host/device/global attribute diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 6f8a467b9a657..7f48b16f97d61 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -2310,6 +2310,7 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl( case OMPD_unroll: case OMPD_task: case OMPD_taskyield: + case OMPD_error: case OMPD_barrier: case OMPD_taskwait: case OMPD_taskgroup: @@ -2410,8 +2411,8 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl( /// annot_pragma_openmp 'parallel' | 'simd' | 'for' | 'sections' | /// 'section' | 'single' | 'master' | 'critical' [ '(' ')' ] | /// 'parallel for' | 'parallel sections' | 'parallel master' | 'task' | -/// 'taskyield' | 'barrier' | 'taskwait' | 'flush' | 'ordered' | -/// 'atomic' | 'for simd' | 'parallel for simd' | 'target' | 'target +/// 'taskyield' | 'barrier' | 'taskwait' | 'flush' | 'ordered' | 'error' +/// | 'atomic' | 'for simd' | 'parallel for simd' | 'target' | 'target /// data' | 'taskgroup' | 'teams' | 'taskloop' | 'taskloop simd' | /// 'master taskloop' | 'master taskloop simd' | 'parallel master /// taskloop' | 'parallel master taskloop simd' | 'distribute' | 'target @@ -2697,6 +2698,7 @@ StmtResult Parser::ParseOpenMPDeclarativeOrExecutableDirective( case OMPD_depobj: case OMPD_scan: case OMPD_taskyield: + case OMPD_error: case OMPD_barrier: case OMPD_taskwait: case OMPD_cancellation_point: diff --git a/clang/lib/Sema/HLSLExternalSemaSource.cpp b/clang/lib/Sema/HLSLExternalSemaSource.cpp index f1b90fa971866..7459d9ce82455 100644 --- a/clang/lib/Sema/HLSLExternalSemaSource.cpp +++ b/clang/lib/Sema/HLSLExternalSemaSource.cpp @@ -175,9 +175,11 @@ struct BuiltinTypeDeclBuilder { Expr *Call = CallExpr::Create(AST, Fn, {RCExpr}, AST.VoidPtrTy, VK_PRValue, SourceLocation(), FPOptionsOverride()); - CXXThisExpr *This = new (AST) - CXXThisExpr(SourceLocation(), Constructor->getThisType(), true); - Expr *Handle = MemberExpr::CreateImplicit(AST, This, true, Fields["h"], + CXXThisExpr *This = new (AST) CXXThisExpr( + SourceLocation(), + Constructor->getThisType().getTypePtr()->getPointeeType(), true); + This->setValueKind(ExprValueKind::VK_LValue); + Expr *Handle = MemberExpr::CreateImplicit(AST, This, false, Fields["h"], Fields["h"]->getType(), VK_LValue, OK_Ordinary); @@ -260,10 +262,12 @@ struct BuiltinTypeDeclBuilder { auto FnProtoLoc = TSInfo->getTypeLoc().getAs(); FnProtoLoc.setParam(0, IdxParam); - auto *This = new (AST) - CXXThisExpr(SourceLocation(), MethodDecl->getThisType(), true); + auto *This = new (AST) CXXThisExpr( + SourceLocation(), + MethodDecl->getThisType().getTypePtr()->getPointeeType(), true); + This->setValueKind(ExprValueKind::VK_LValue); auto *HandleAccess = MemberExpr::CreateImplicit( - AST, This, true, Handle, Handle->getType(), VK_LValue, OK_Ordinary); + AST, This, false, Handle, Handle->getType(), VK_LValue, OK_Ordinary); auto *IndexExpr = DeclRefExpr::Create( AST, NestedNameSpecifierLoc(), SourceLocation(), IdxParam, false, diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 504085a2a36e3..3ff52285683e4 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -202,7 +202,7 @@ static unsigned getNumAttributeArgs(const ParsedAttr &AL) { /// A helper function to provide Attribute Location for the Attr types /// AND the ParsedAttr. template -static std::enable_if_t::value, SourceLocation> +static std::enable_if_t, SourceLocation> getAttrLoc(const AttrInfo &AL) { return AL.getLocation(); } diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 1509751580122..4c15bac663cfe 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -102,24 +102,31 @@ bool CheckDefaultArgumentVisitor::VisitDeclRefExpr(const DeclRefExpr *DRE) { return S.Diag(DRE->getBeginLoc(), diag::err_param_default_argument_references_param) << Param->getDeclName() << DefaultArg->getSourceRange(); - } else if (const auto *VDecl = dyn_cast(Decl)) { - // C++ [dcl.fct.default]p7: - // Local variables shall not be used in default argument - // expressions. - // - // C++17 [dcl.fct.default]p7 (by CWG 2082): - // A local variable shall not appear as a potentially-evaluated - // expression in a default argument. - // - // C++20 [dcl.fct.default]p7 (DR as part of P0588R1, see also CWG 2346): - // Note: A local variable cannot be odr-used (6.3) in a default argument. - // - if (VDecl->isLocalVarDecl() && !DRE->isNonOdrUse()) - return S.Diag(DRE->getBeginLoc(), - diag::err_param_default_argument_references_local) - << VDecl->getDeclName() << DefaultArg->getSourceRange(); + } else { + const VarDecl *VD = nullptr; + if (const auto *BD = dyn_cast(Decl)) + VD = dyn_cast_if_present(BD->getDecomposedDecl()); + else + VD = dyn_cast(Decl); + if (VD) { + // C++ [dcl.fct.default]p7: + // Local variables shall not be used in default argument + // expressions. + // + // C++17 [dcl.fct.default]p7 (by CWG 2082): + // A local variable shall not appear as a potentially-evaluated + // expression in a default argument. + // + // C++20 [dcl.fct.default]p7 (DR as part of P0588R1, see also CWG 2346): + // Note: A local variable cannot be odr-used (6.3) in a default + // argument. + // + if (VD->isLocalVarDecl() && !DRE->isNonOdrUse()) + return S.Diag(DRE->getBeginLoc(), + diag::err_param_default_argument_references_local) + << Decl->getDeclName() << DefaultArg->getSourceRange(); + } } - return false; } @@ -149,13 +156,20 @@ bool CheckDefaultArgumentVisitor::VisitPseudoObjectExpr( } bool CheckDefaultArgumentVisitor::VisitLambdaExpr(const LambdaExpr *Lambda) { - // C++11 [expr.lambda.prim]p13: - // A lambda-expression appearing in a default argument shall not - // implicitly or explicitly capture any entity. - if (Lambda->capture_begin() == Lambda->capture_end()) - return false; - - return S.Diag(Lambda->getBeginLoc(), diag::err_lambda_capture_default_arg); + // [expr.prim.lambda.capture]p9 + // a lambda-expression appearing in a default argument cannot implicitly or + // explicitly capture any local entity. Such a lambda-expression can still + // have an init-capture if any full-expression in its initializer satisfies + // the constraints of an expression appearing in a default argument. + bool Invalid = false; + for (const LambdaCapture &LC : Lambda->captures()) { + if (!Lambda->isInitCapture(&LC)) + return S.Diag(LC.getLocation(), diag::err_lambda_capture_default_arg); + // Init captures are always VarDecl. + auto *D = cast(LC.getCapturedVar()); + Invalid |= Visit(D->getInit()); + } + return Invalid; } } // namespace @@ -14713,7 +14727,8 @@ void Sema::DefineImplicitCopyAssignment(SourceLocation CurrentLocation, MemberBuilder From(OtherRef, OtherRefType, /*IsArrow=*/false, MemberLookup); - MemberBuilder To(This, getCurrentThisType(), /*IsArrow=*/true, MemberLookup); + MemberBuilder To(This, getCurrentThisType(), /*IsArrow=*/!LangOpts.HLSL, + MemberLookup); // Build the copy of this field. StmtResult Copy = buildSingleCopyAssign(*this, Loc, FieldType, @@ -14731,9 +14746,16 @@ void Sema::DefineImplicitCopyAssignment(SourceLocation CurrentLocation, if (!Invalid) { // Add a "return *this;" - ExprResult ThisObj = CreateBuiltinUnaryOp(Loc, UO_Deref, This.build(*this, Loc)); + Expr *ThisExpr = nullptr; + if (!LangOpts.HLSL) { + ExprResult ThisObj = + CreateBuiltinUnaryOp(Loc, UO_Deref, This.build(*this, Loc)); + ThisExpr = ThisObj.get(); + } else { + ThisExpr = This.build(*this, Loc); + } - StmtResult Return = BuildReturnStmt(Loc, ThisObj.get()); + StmtResult Return = BuildReturnStmt(Loc, ThisExpr); if (Return.isInvalid()) Invalid = true; else diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp index 8e302dd9427bc..2185f898668eb 100644 --- a/clang/lib/Sema/SemaExceptionSpec.cpp +++ b/clang/lib/Sema/SemaExceptionSpec.cpp @@ -1498,6 +1498,7 @@ CanThrowResult Sema::canThrow(const Stmt *S) { case Stmt::OMPTaskLoopSimdDirectiveClass: case Stmt::OMPTaskwaitDirectiveClass: case Stmt::OMPTaskyieldDirectiveClass: + case Stmt::OMPErrorDirectiveClass: case Stmt::OMPTeamsDirectiveClass: case Stmt::OMPTeamsDistributeDirectiveClass: case Stmt::OMPTeamsDistributeParallelForDirectiveClass: diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index e00a6612daa4c..0a5a26573da43 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -9327,7 +9327,8 @@ static bool IsInvalidCmseNSCallConversion(Sema &S, QualType FromType, // This circumvents the usual type rules specified in 6.2.7p1 & 6.7.5.[1-3]. // FIXME: add a couple examples in this comment. static Sema::AssignConvertType -checkPointerTypesForAssignment(Sema &S, QualType LHSType, QualType RHSType) { +checkPointerTypesForAssignment(Sema &S, QualType LHSType, QualType RHSType, + SourceLocation Loc) { assert(LHSType.isCanonical() && "LHS not canonicalized!"); assert(RHSType.isCanonical() && "RHS not canonicalized!"); @@ -9396,6 +9397,13 @@ checkPointerTypesForAssignment(Sema &S, QualType LHSType, QualType RHSType) { return Sema::FunctionVoidPointer; } + if (!S.Diags.isIgnored( + diag::warn_typecheck_convert_incompatible_function_pointer_strict, + Loc) && + RHSType->isFunctionPointerType() && LHSType->isFunctionPointerType() && + !S.IsFunctionConversion(RHSType, LHSType, RHSType)) + return Sema::IncompatibleFunctionPointerStrict; + // C99 6.5.16.1p1 (constraint 3): both operands are pointers to qualified or // unqualified versions of compatible types, ... QualType ltrans = QualType(lhptee, 0), rtrans = QualType(rhptee, 0); @@ -9747,7 +9755,8 @@ Sema::CheckAssignmentConstraints(QualType LHSType, ExprResult &RHS, Kind = CK_NoOp; else Kind = CK_BitCast; - return checkPointerTypesForAssignment(*this, LHSType, RHSType); + return checkPointerTypesForAssignment(*this, LHSType, RHSType, + RHS.get()->getBeginLoc()); } // int -> T* @@ -15674,7 +15683,7 @@ ExprResult Sema::CreateBuiltinUnaryOp(SourceLocation OpLoc, } } - if (getLangOpts().HLSL) { + if (getLangOpts().HLSL && OpLoc.isValid()) { if (Opc == UO_AddrOf) return ExprError(Diag(OpLoc, diag::err_hlsl_operator_unsupported) << 0); if (Opc == UO_Deref) @@ -17036,6 +17045,12 @@ bool Sema::DiagnoseAssignmentResult(AssignConvertType ConvTy, ConvHints.tryToFixConversion(SrcExpr, SrcType, DstType, *this); MayHaveConvFixit = true; break; + case IncompatibleFunctionPointerStrict: + DiagKind = + diag::warn_typecheck_convert_incompatible_function_pointer_strict; + ConvHints.tryToFixConversion(SrcExpr, SrcType, DstType, *this); + MayHaveConvFixit = true; + break; case IncompatibleFunctionPointer: if (getLangOpts().CPlusPlus) { DiagKind = diag::err_typecheck_convert_incompatible_function_pointer; diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index 81a25107c888d..6e9db6da197ea 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -1395,6 +1395,13 @@ ExprResult Sema::ActOnCXXThis(SourceLocation Loc) { Expr *Sema::BuildCXXThisExpr(SourceLocation Loc, QualType Type, bool IsImplicit) { + if (getLangOpts().HLSL && Type.getTypePtr()->isPointerType()) { + auto *This = new (Context) + CXXThisExpr(Loc, Type.getTypePtr()->getPointeeType(), IsImplicit); + This->setValueKind(ExprValueKind::VK_LValue); + MarkThisReferenced(This); + return This; + } auto *This = new (Context) CXXThisExpr(Loc, Type, IsImplicit); MarkThisReferenced(This); return This; diff --git a/clang/lib/Sema/SemaExprMember.cpp b/clang/lib/Sema/SemaExprMember.cpp index fc68b526e30ca..8eeed1a29dfc7 100644 --- a/clang/lib/Sema/SemaExprMember.cpp +++ b/clang/lib/Sema/SemaExprMember.cpp @@ -1903,6 +1903,14 @@ Sema::BuildImplicitMemberExpr(const CXXScopeSpec &SS, if (SS.getRange().isValid()) Loc = SS.getRange().getBegin(); baseExpr = BuildCXXThisExpr(loc, ThisTy, /*IsImplicit=*/true); + if (getLangOpts().HLSL && ThisTy.getTypePtr()->isPointerType()) { + ThisTy = ThisTy.getTypePtr()->getPointeeType(); + return BuildMemberReferenceExpr(baseExpr, ThisTy, + /*OpLoc*/ SourceLocation(), + /*IsArrow*/ false, SS, TemplateKWLoc, + /*FirstQualifierInScope*/ nullptr, R, + TemplateArgs, S); + } } return BuildMemberReferenceExpr(baseExpr, ThisTy, diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index d92aacd0df133..bd9e08a10479f 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -7597,15 +7597,15 @@ static SourceRange nextPathEntryRange(const IndirectLocalPath &Path, unsigned I, } static bool pathOnlyInitializesGslPointer(IndirectLocalPath &Path) { - for (auto It = Path.rbegin(), End = Path.rend(); It != End; ++It) { - if (It->Kind == IndirectLocalPathEntry::VarInit) + for (const auto &It : llvm::reverse(Path)) { + if (It.Kind == IndirectLocalPathEntry::VarInit) continue; - if (It->Kind == IndirectLocalPathEntry::AddressOf) + if (It.Kind == IndirectLocalPathEntry::AddressOf) continue; - if (It->Kind == IndirectLocalPathEntry::LifetimeBoundCall) + if (It.Kind == IndirectLocalPathEntry::LifetimeBoundCall) continue; - return It->Kind == IndirectLocalPathEntry::GslPointerInit || - It->Kind == IndirectLocalPathEntry::GslReferenceInit; + return It.Kind == IndirectLocalPathEntry::GslPointerInit || + It.Kind == IndirectLocalPathEntry::GslReferenceInit; } return false; } diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp index 621987a18825f..61f4f464063b3 100644 --- a/clang/lib/Sema/SemaLookup.cpp +++ b/clang/lib/Sema/SemaLookup.cpp @@ -992,11 +992,9 @@ bool Sema::LookupBuiltin(LookupResult &R) { // If this is a builtin on this (or all) targets, create the decl. if (unsigned BuiltinID = II->getBuiltinID()) { - // In C++, C2x, and OpenCL (spec v1.2 s6.9.f), we don't have any - // predefined library functions like 'malloc'. Instead, we'll just - // error. - if ((getLangOpts().CPlusPlus || getLangOpts().OpenCL || - getLangOpts().C2x) && + // In C++ and OpenCL (spec v1.2 s6.9.f), we don't have any predefined + // library functions like 'malloc'. Instead, we'll just error. + if ((getLangOpts().CPlusPlus || getLangOpts().OpenCL) && Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID)) return false; diff --git a/clang/lib/Sema/SemaModule.cpp b/clang/lib/Sema/SemaModule.cpp index 4b01f109fc881..1db716e77e7d5 100644 --- a/clang/lib/Sema/SemaModule.cpp +++ b/clang/lib/Sema/SemaModule.cpp @@ -144,6 +144,36 @@ void Sema::HandleStartOfHeaderUnit() { TU->setLocalOwningModule(Mod); } +/// Tests whether the given identifier is reserved as a module name and +/// diagnoses if it is. Returns true if a diagnostic is emitted and false +/// otherwise. +static bool DiagReservedModuleName(Sema &S, const IdentifierInfo *II, + SourceLocation Loc) { + enum { + Valid = -1, + Invalid = 0, + Reserved = 1, + } Reason = Valid; + + if (II->isStr("module") || II->isStr("import")) + Reason = Invalid; + else if (II->isReserved(S.getLangOpts()) != + ReservedIdentifierStatus::NotReserved) + Reason = Reserved; + + // If the identifier is reserved (not invalid) but is in a system header, + // we do not diagnose (because we expect system headers to use reserved + // identifiers). + if (Reason == Reserved && S.getSourceManager().isInSystemHeader(Loc)) + Reason = Valid; + + if (Reason != Valid) { + S.Diag(Loc, diag::err_invalid_module_name) << II << (int)Reason; + return true; + } + return false; +} + Sema::DeclGroupPtrTy Sema::ActOnModuleDecl(SourceLocation StartLoc, SourceLocation ModuleLoc, ModuleDeclKind MDK, ModuleIdPath Path, @@ -238,6 +268,32 @@ Sema::ActOnModuleDecl(SourceLocation StartLoc, SourceLocation ModuleLoc, } } + // C++2b [module.unit]p1: ... The identifiers module and import shall not + // appear as identifiers in a module-name or module-partition. All + // module-names either beginning with an identifier consisting of std + // followed by zero or more digits or containing a reserved identifier + // ([lex.name]) are reserved and shall not be specified in a + // module-declaration; no diagnostic is required. + + // Test the first part of the path to see if it's std[0-9]+ but allow the + // name in a system header. + StringRef FirstComponentName = Path[0].first->getName(); + if (!getSourceManager().isInSystemHeader(Path[0].second) && + (FirstComponentName == "std" || + (FirstComponentName.startswith("std") && + llvm::all_of(FirstComponentName.drop_front(3), &llvm::isDigit)))) { + Diag(Path[0].second, diag::err_invalid_module_name) + << Path[0].first << /*reserved*/ 1; + return nullptr; + } + + // Then test all of the components in the path to see if any of them are + // using another kind of reserved or invalid identifier. + for (auto Part : Path) { + if (DiagReservedModuleName(*this, Part.first, Part.second)) + return nullptr; + } + // Flatten the dots in a module name. Unlike Clang's hierarchical module map // modules, the dots here are just another character that can appear in a // module name. diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 67908f4e20be3..05d7db513ea5b 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -4534,6 +4534,7 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { case OMPD_threadprivate: case OMPD_allocate: case OMPD_taskyield: + case OMPD_error: case OMPD_barrier: case OMPD_taskwait: case OMPD_cancellation_point: @@ -6307,6 +6308,11 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( "No associated statement allowed for 'omp taskyield' directive"); Res = ActOnOpenMPTaskyieldDirective(StartLoc, EndLoc); break; + case OMPD_error: + assert(AStmt == nullptr && + "No associated statement allowed for 'omp taskyield' directive"); + Res = ActOnOpenMPErrorDirective(ClausesWithImplicit, StartLoc, EndLoc); + break; case OMPD_barrier: assert(ClausesWithImplicit.empty() && "No clauses are allowed for 'omp barrier' directive"); @@ -11022,6 +11028,12 @@ StmtResult Sema::ActOnOpenMPBarrierDirective(SourceLocation StartLoc, return OMPBarrierDirective::Create(Context, StartLoc, EndLoc); } +StmtResult Sema::ActOnOpenMPErrorDirective(ArrayRef Clauses, + SourceLocation StartLoc, + SourceLocation EndLoc) { + return OMPErrorDirective::Create(Context, StartLoc, EndLoc, Clauses); +} + StmtResult Sema::ActOnOpenMPTaskwaitDirective(ArrayRef Clauses, SourceLocation StartLoc, SourceLocation EndLoc) { @@ -15315,6 +15327,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_threadprivate: case OMPD_allocate: case OMPD_taskyield: + case OMPD_error: case OMPD_barrier: case OMPD_taskwait: case OMPD_cancellation_point: @@ -15403,6 +15416,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_threadprivate: case OMPD_allocate: case OMPD_taskyield: + case OMPD_error: case OMPD_barrier: case OMPD_taskwait: case OMPD_cancellation_point: @@ -15499,6 +15513,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_threadprivate: case OMPD_allocate: case OMPD_taskyield: + case OMPD_error: case OMPD_barrier: case OMPD_taskwait: case OMPD_cancellation_point: @@ -15590,6 +15605,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_threadprivate: case OMPD_allocate: case OMPD_taskyield: + case OMPD_error: case OMPD_barrier: case OMPD_taskwait: case OMPD_cancellation_point: @@ -15678,6 +15694,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_threadprivate: case OMPD_allocate: case OMPD_taskyield: + case OMPD_error: case OMPD_barrier: case OMPD_taskwait: case OMPD_cancellation_point: @@ -15769,6 +15786,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_threadprivate: case OMPD_allocate: case OMPD_taskyield: + case OMPD_error: case OMPD_barrier: case OMPD_taskwait: case OMPD_cancellation_point: @@ -15863,6 +15881,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_threadprivate: case OMPD_allocate: case OMPD_taskyield: + case OMPD_error: case OMPD_barrier: case OMPD_taskwait: case OMPD_cancellation_point: @@ -15954,6 +15973,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPD_threadprivate: case OMPD_allocate: case OMPD_taskyield: + case OMPD_error: case OMPD_barrier: case OMPD_taskwait: case OMPD_cancellation_point: @@ -16567,9 +16587,8 @@ getListOfPossibleValues(OpenMPClauseKind K, unsigned First, unsigned Last, SmallString<256> Buffer; llvm::raw_svector_ostream Out(Buffer); unsigned Skipped = Exclude.size(); - auto S = Exclude.begin(), E = Exclude.end(); for (unsigned I = First; I < Last; ++I) { - if (std::find(S, E, I) != E) { + if (llvm::is_contained(Exclude, I)) { --Skipped; continue; } diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp index 17c0e2f04f4bd..6d57cd8542d6d 100644 --- a/clang/lib/Sema/SemaTemplateDeduction.cpp +++ b/clang/lib/Sema/SemaTemplateDeduction.cpp @@ -1128,9 +1128,7 @@ DeduceTemplateArguments(Sema &S, // During partial ordering, if Ai was originally a function parameter pack: // - if P does not contain a function parameter type corresponding to Ai then // Ai is ignored; - bool ClangABICompat15 = S.Context.getLangOpts().getClangABICompat() <= - LangOptions::ClangABI::Ver15; - if (!ClangABICompat15 && PartialOrdering && ArgIdx + 1 == NumArgs && + if (PartialOrdering && ArgIdx + 1 == NumArgs && isa(Args[ArgIdx])) return Sema::TDK_Success; @@ -2466,9 +2464,6 @@ static bool isSameTemplateArg(ASTContext &Context, if (X.getKind() != Y.getKind()) return false; - bool ClangABICompat15 = - Context.getLangOpts().getClangABICompat() <= LangOptions::ClangABI::Ver15; - switch (X.getKind()) { case TemplateArgument::Null: llvm_unreachable("Comparing NULL template argument"); @@ -2500,45 +2495,33 @@ static bool isSameTemplateArg(ASTContext &Context, return XID == YID; } - case TemplateArgument::Pack: - if (ClangABICompat15) { - if (X.pack_size() != Y.pack_size()) + case TemplateArgument::Pack: { + unsigned PackIterationSize = X.pack_size(); + if (X.pack_size() != Y.pack_size()) { + if (!PartialOrdering) return false; - for (TemplateArgument::pack_iterator XP = X.pack_begin(), - XPEnd = X.pack_end(), - YP = Y.pack_begin(); - XP != XPEnd; ++XP, ++YP) - if (!isSameTemplateArg(Context, *XP, *YP, PartialOrdering, - PackExpansionMatchesPack)) - return false; - } else { - unsigned PackIterationSize = X.pack_size(); - if (X.pack_size() != Y.pack_size()) { - if (!PartialOrdering) - return false; - - // C++0x [temp.deduct.type]p9: - // During partial ordering, if Ai was originally a pack expansion: - // - if P does not contain a template argument corresponding to Ai - // then Ai is ignored; - bool XHasMoreArg = X.pack_size() > Y.pack_size(); - if (!(XHasMoreArg && X.pack_elements().back().isPackExpansion()) && - !(!XHasMoreArg && Y.pack_elements().back().isPackExpansion())) - return false; - - if (XHasMoreArg) - PackIterationSize = Y.pack_size(); - } + // C++0x [temp.deduct.type]p9: + // During partial ordering, if Ai was originally a pack expansion: + // - if P does not contain a template argument corresponding to Ai + // then Ai is ignored; + bool XHasMoreArg = X.pack_size() > Y.pack_size(); + if (!(XHasMoreArg && X.pack_elements().back().isPackExpansion()) && + !(!XHasMoreArg && Y.pack_elements().back().isPackExpansion())) + return false; - ArrayRef XP = X.pack_elements(); - ArrayRef YP = Y.pack_elements(); - for (unsigned i = 0; i < PackIterationSize; ++i) - if (!isSameTemplateArg(Context, XP[i], YP[i], PartialOrdering, - PackExpansionMatchesPack)) - return false; + if (XHasMoreArg) + PackIterationSize = Y.pack_size(); } + + ArrayRef XP = X.pack_elements(); + ArrayRef YP = Y.pack_elements(); + for (unsigned i = 0; i < PackIterationSize; ++i) + if (!isSameTemplateArg(Context, XP[i], YP[i], PartialOrdering, + PackExpansionMatchesPack)) + return false; return true; + } } llvm_unreachable("Invalid TemplateArgument Kind!"); @@ -5245,34 +5228,30 @@ FunctionTemplateDecl *Sema::getMoreSpecializedTemplate( // This a speculative fix for CWG1432 (Similar to the fix for CWG1395) that // there is no wording or even resolution for this issue. - bool ClangABICompat15 = - Context.getLangOpts().getClangABICompat() <= LangOptions::ClangABI::Ver15; - if (!ClangABICompat15) { - for (int i = 0, e = std::min(NumParams1, NumParams2); i < e; ++i) { - QualType T1 = FD1->getParamDecl(i)->getType().getCanonicalType(); - QualType T2 = FD2->getParamDecl(i)->getType().getCanonicalType(); - auto *TST1 = dyn_cast(T1); - auto *TST2 = dyn_cast(T2); - if (!TST1 || !TST2) - continue; - const TemplateArgument &TA1 = TST1->template_arguments().back(); - if (TA1.getKind() == TemplateArgument::Pack) { - assert(TST1->template_arguments().size() == - TST2->template_arguments().size()); - const TemplateArgument &TA2 = TST2->template_arguments().back(); - assert(TA2.getKind() == TemplateArgument::Pack); - unsigned PackSize1 = TA1.pack_size(); - unsigned PackSize2 = TA2.pack_size(); - bool IsPackExpansion1 = - PackSize1 && TA1.pack_elements().back().isPackExpansion(); - bool IsPackExpansion2 = - PackSize2 && TA2.pack_elements().back().isPackExpansion(); - if (PackSize1 != PackSize2 && IsPackExpansion1 != IsPackExpansion2) { - if (PackSize1 > PackSize2 && IsPackExpansion1) - return FT2; - if (PackSize1 < PackSize2 && IsPackExpansion2) - return FT1; - } + for (int i = 0, e = std::min(NumParams1, NumParams2); i < e; ++i) { + QualType T1 = FD1->getParamDecl(i)->getType().getCanonicalType(); + QualType T2 = FD2->getParamDecl(i)->getType().getCanonicalType(); + auto *TST1 = dyn_cast(T1); + auto *TST2 = dyn_cast(T2); + if (!TST1 || !TST2) + continue; + const TemplateArgument &TA1 = TST1->template_arguments().back(); + if (TA1.getKind() == TemplateArgument::Pack) { + assert(TST1->template_arguments().size() == + TST2->template_arguments().size()); + const TemplateArgument &TA2 = TST2->template_arguments().back(); + assert(TA2.getKind() == TemplateArgument::Pack); + unsigned PackSize1 = TA1.pack_size(); + unsigned PackSize2 = TA2.pack_size(); + bool IsPackExpansion1 = + PackSize1 && TA1.pack_elements().back().isPackExpansion(); + bool IsPackExpansion2 = + PackSize2 && TA2.pack_elements().back().isPackExpansion(); + if (PackSize1 != PackSize2 && IsPackExpansion1 != IsPackExpansion2) { + if (PackSize1 > PackSize2 && IsPackExpansion1) + return FT2; + if (PackSize1 < PackSize2 && IsPackExpansion2) + return FT1; } } } @@ -5512,12 +5491,12 @@ namespace { // specialized than primary" check. struct GetP2 { template ::value, bool> = true> + std::enable_if_t, bool> = true> T2 *operator()(T1 *, T2 *P2) { return P2; } template ::value, bool> = true> + std::enable_if_t, bool> = true> T1 *operator()(T1 *, T2 *) { return nullptr; } @@ -5529,7 +5508,7 @@ struct TemplateArgumentListAreEqual { TemplateArgumentListAreEqual(ASTContext &Ctx) : Ctx(Ctx) {} template ::value, bool> = true> + std::enable_if_t, bool> = true> bool operator()(T1 *PS1, T2 *PS2) { ArrayRef Args1 = PS1->getTemplateArgs().asArray(), Args2 = PS2->getTemplateArgs().asArray(); @@ -5548,7 +5527,7 @@ struct TemplateArgumentListAreEqual { } template ::value, bool> = true> + std::enable_if_t, bool> = true> bool operator()(T1 *Spec, T2 *Primary) { ArrayRef Args1 = Spec->getTemplateArgs().asArray(), Args2 = Primary->getInjectedTemplateArgs(); @@ -5597,7 +5576,7 @@ static TemplateLikeDecl * getMoreSpecialized(Sema &S, QualType T1, QualType T2, TemplateLikeDecl *P1, PrimaryDel *P2, TemplateDeductionInfo &Info) { constexpr bool IsMoreSpecialThanPrimaryCheck = - !std::is_same::value; + !std::is_same_v; bool Better1 = isAtLeastAsSpecializedAs(S, T1, T2, P2, Info); if (IsMoreSpecialThanPrimaryCheck && !Better1) @@ -5618,29 +5597,25 @@ getMoreSpecialized(Sema &S, QualType T1, QualType T2, TemplateLikeDecl *P1, // This a speculative fix for CWG1432 (Similar to the fix for CWG1395) that // there is no wording or even resolution for this issue. - bool ClangABICompat15 = S.Context.getLangOpts().getClangABICompat() <= - LangOptions::ClangABI::Ver15; - if (!ClangABICompat15) { - auto *TST1 = cast(T1); - auto *TST2 = cast(T2); - const TemplateArgument &TA1 = TST1->template_arguments().back(); - if (TA1.getKind() == TemplateArgument::Pack) { - assert(TST1->template_arguments().size() == - TST2->template_arguments().size()); - const TemplateArgument &TA2 = TST2->template_arguments().back(); - assert(TA2.getKind() == TemplateArgument::Pack); - unsigned PackSize1 = TA1.pack_size(); - unsigned PackSize2 = TA2.pack_size(); - bool IsPackExpansion1 = - PackSize1 && TA1.pack_elements().back().isPackExpansion(); - bool IsPackExpansion2 = - PackSize2 && TA2.pack_elements().back().isPackExpansion(); - if (PackSize1 != PackSize2 && IsPackExpansion1 != IsPackExpansion2) { - if (PackSize1 > PackSize2 && IsPackExpansion1) - return GetP2()(P1, P2); - if (PackSize1 < PackSize2 && IsPackExpansion2) - return P1; - } + auto *TST1 = cast(T1); + auto *TST2 = cast(T2); + const TemplateArgument &TA1 = TST1->template_arguments().back(); + if (TA1.getKind() == TemplateArgument::Pack) { + assert(TST1->template_arguments().size() == + TST2->template_arguments().size()); + const TemplateArgument &TA2 = TST2->template_arguments().back(); + assert(TA2.getKind() == TemplateArgument::Pack); + unsigned PackSize1 = TA1.pack_size(); + unsigned PackSize2 = TA2.pack_size(); + bool IsPackExpansion1 = + PackSize1 && TA1.pack_elements().back().isPackExpansion(); + bool IsPackExpansion2 = + PackSize2 && TA2.pack_elements().back().isPackExpansion(); + if (PackSize1 != PackSize2 && IsPackExpansion1 != IsPackExpansion2) { + if (PackSize1 > PackSize2 && IsPackExpansion1) + return GetP2()(P1, P2); + if (PackSize1 < PackSize2 && IsPackExpansion2) + return P1; } } diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 2924be98212cc..30c060c219e9b 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -8930,6 +8930,17 @@ TreeTransform::TransformOMPTaskwaitDirective(OMPTaskwaitDirective *D) { return Res; } +template +StmtResult +TreeTransform::TransformOMPErrorDirective(OMPErrorDirective *D) { + DeclarationNameInfo DirName; + getDerived().getSema().StartOpenMPDSABlock(OMPD_error, DirName, nullptr, + D->getBeginLoc()); + StmtResult Res = getDerived().TransformOMPExecutableDirective(D); + getDerived().getSema().EndOpenMPDSABlock(Res.get()); + return Res; +} + template StmtResult TreeTransform::TransformOMPTaskgroupDirective( OMPTaskgroupDirective *D) { diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index f6049bb988681..ca9c68c65b284 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -8235,8 +8235,8 @@ namespace serialization { /// Add the given set of methods to the method list. static void addMethodsToPool(Sema &S, ArrayRef Methods, ObjCMethodList &List) { - for (auto I = Methods.rbegin(), E = Methods.rend(); I != E; ++I) - S.addMethodToGlobalList(&List, *I); + for (ObjCMethodDecl *M : llvm::reverse(Methods)) + S.addMethodToGlobalList(&List, M); } void ASTReader::ReadMethodPool(Selector Sel) { diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index a96727854c895..494d75ffda8fb 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -2444,6 +2444,13 @@ void ASTStmtReader::VisitOMPTaskwaitDirective(OMPTaskwaitDirective *D) { VisitOMPExecutableDirective(D); } +void ASTStmtReader::VisitOMPErrorDirective(OMPErrorDirective *D) { + VisitStmt(D); + // The NumClauses field was read in ReadStmtFromStream. + Record.skipInts(1); + VisitOMPExecutableDirective(D); +} + void ASTStmtReader::VisitOMPTaskgroupDirective(OMPTaskgroupDirective *D) { VisitStmt(D); VisitOMPExecutableDirective(D); @@ -3395,6 +3402,11 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { Context, Record[ASTStmtReader::NumStmtFields], Empty); break; + case STMT_OMP_ERROR_DIRECTIVE: + S = OMPErrorDirective::CreateEmpty( + Context, Record[ASTStmtReader::NumStmtFields], Empty); + break; + case STMT_OMP_TASKGROUP_DIRECTIVE: S = OMPTaskgroupDirective::CreateEmpty( Context, Record[ASTStmtReader::NumStmtFields], Empty); diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 3658d69908c3c..1dcb5426b314d 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -4334,7 +4334,8 @@ void ASTRecordWriter::AddAttr(const Attr *A) { // FIXME: Clang can't handle the serialization/deserialization of // preferred_name properly now. See // https://github.com/llvm/llvm-project/issues/56490 for example. - if (!A || (isa(A) && Writer->isWritingNamedModules())) + if (!A || (isa(A) && + Writer->isWritingStdCXXNamedModules())) return Record.push_back(0); Record.push_back(A->getKind() + 1); // FIXME: stable encoding, target attrs diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index c0b001e2267e1..6c30dbfa4644d 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -2433,6 +2433,13 @@ void ASTStmtWriter::VisitOMPTaskwaitDirective(OMPTaskwaitDirective *D) { Code = serialization::STMT_OMP_TASKWAIT_DIRECTIVE; } +void ASTStmtWriter::VisitOMPErrorDirective(OMPErrorDirective *D) { + VisitStmt(D); + Record.push_back(D->getNumClauses()); + VisitOMPExecutableDirective(D); + Code = serialization::STMT_OMP_ERROR_DIRECTIVE; +} + void ASTStmtWriter::VisitOMPTaskgroupDirective(OMPTaskgroupDirective *D) { VisitStmt(D); VisitOMPExecutableDirective(D); diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index bb31642e7b0c2..e4fe409469067 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -912,9 +912,9 @@ static void printStateTraitWithLocationContextJson( // Try to do as much compile time checking as possible. // FIXME: check for invocable instead of function? - static_assert(std::is_function>::value, + static_assert(std::is_function_v>, "Printer is not a function!"); - static_assert(std::is_convertible::value, + static_assert(std::is_convertible_v, "Printer doesn't have the required type!"); if (LCtx && !State->get().isEmpty()) { @@ -1744,6 +1744,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred, case Stmt::OMPTaskyieldDirectiveClass: case Stmt::OMPBarrierDirectiveClass: case Stmt::OMPTaskwaitDirectiveClass: + case Stmt::OMPErrorDirectiveClass: case Stmt::OMPTaskgroupDirectiveClass: case Stmt::OMPFlushDirectiveClass: case Stmt::OMPDepobjDirectiveClass: diff --git a/clang/lib/Tooling/ASTDiff/ASTDiff.cpp b/clang/lib/Tooling/ASTDiff/ASTDiff.cpp index 0821863adcc6e..6b359c1910bca 100644 --- a/clang/lib/Tooling/ASTDiff/ASTDiff.cpp +++ b/clang/lib/Tooling/ASTDiff/ASTDiff.cpp @@ -117,13 +117,11 @@ class SyntaxTree::Impl { Impl(SyntaxTree *Parent, Stmt *N, ASTContext &AST); template Impl(SyntaxTree *Parent, - std::enable_if_t::value, T> *Node, - ASTContext &AST) + std::enable_if_t, T> *Node, ASTContext &AST) : Impl(Parent, dyn_cast(Node), AST) {} template Impl(SyntaxTree *Parent, - std::enable_if_t::value, T> *Node, - ASTContext &AST) + std::enable_if_t, T> *Node, ASTContext &AST) : Impl(Parent, dyn_cast(Node), AST) {} SyntaxTree *Parent; diff --git a/clang/test/AST/HLSL/RWBuffer-AST.hlsl b/clang/test/AST/HLSL/RWBuffer-AST.hlsl index 0929462e51831..9dd9244b73eed 100644 --- a/clang/test/AST/HLSL/RWBuffer-AST.hlsl +++ b/clang/test/AST/HLSL/RWBuffer-AST.hlsl @@ -46,8 +46,8 @@ RWBuffer Buffer; // CHECK-NEXT: CompoundStmt 0x{{[0-9A-Fa-f]+}} <> // CHECK-NEXT: ReturnStmt 0x{{[0-9A-Fa-f]+}} <> // CHECK-NEXT: ArraySubscriptExpr 0x{{[0-9A-Fa-f]+}} <> 'element_type' lvalue -// CHECK-NEXT: MemberExpr 0x{{[0-9A-Fa-f]+}} <> 'element_type *' lvalue ->h 0x{{[0-9A-Fa-f]+}} -// CHECK-NEXT: CXXThisExpr 0x{{[0-9A-Fa-f]+}} <> 'const RWBuffer *' implicit this +// CHECK-NEXT: MemberExpr 0x{{[0-9A-Fa-f]+}} <> 'element_type *' lvalue .h 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT: CXXThisExpr 0x{{[0-9A-Fa-f]+}} <> 'const RWBuffer' lvalue implicit this // CHECK-NEXT: DeclRefExpr 0x{{[0-9A-Fa-f]+}} <> 'unsigned int' ParmVar 0x{{[0-9A-Fa-f]+}} 'Idx' 'unsigned int' // CHECK-NEXT: AlwaysInlineAttr 0x{{[0-9A-Fa-f]+}} <> Implicit always_inline @@ -56,8 +56,8 @@ RWBuffer Buffer; // CHECK-NEXT: CompoundStmt 0x{{[0-9A-Fa-f]+}} <> // CHECK-NEXT: ReturnStmt 0x{{[0-9A-Fa-f]+}} <> // CHECK-NEXT: ArraySubscriptExpr 0x{{[0-9A-Fa-f]+}} <> 'element_type' lvalue -// CHECK-NEXT: MemberExpr 0x{{[0-9A-Fa-f]+}} <> 'element_type *' lvalue ->h 0x{{[0-9A-Fa-f]+}} -// CHECK-NEXT: CXXThisExpr 0x{{[0-9A-Fa-f]+}} <> 'RWBuffer *' implicit this +// CHECK-NEXT: MemberExpr 0x{{[0-9A-Fa-f]+}} <> 'element_type *' lvalue .h 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT: CXXThisExpr 0x{{[0-9A-Fa-f]+}} <> 'RWBuffer' lvalue implicit this // CHECK-NEXT: DeclRefExpr 0x{{[0-9A-Fa-f]+}} <> 'unsigned int' ParmVar 0x{{[0-9A-Fa-f]+}} 'Idx' 'unsigned int' // CHECK-NEXT: AlwaysInlineAttr 0x{{[0-9A-Fa-f]+}} <> Implicit always_inline diff --git a/clang/test/AST/HLSL/this-reference-template.hlsl b/clang/test/AST/HLSL/this-reference-template.hlsl new file mode 100644 index 0000000000000..c27d69d36ca3e --- /dev/null +++ b/clang/test/AST/HLSL/this-reference-template.hlsl @@ -0,0 +1,46 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -ast-dump -disable-llvm-passes -o - -hlsl-entry main %s | FileCheck %s + +template +struct Pair { + K First; + V Second; + + K getFirst() { + return this.First; + } + + V getSecond() { + return Second; + } +}; + +[numthreads(1, 1, 1)] +void main() { + Pair Vals = {1, 2.0}; + Vals.First = Vals.getFirst(); + Vals.Second = Vals.getSecond(); +} + +// CHECK: -CXXMethodDecl 0x{{[0-9A-Fa-f]+}} line:8:5 getFirst 'K ()' implicit-inline +// CHECK-NEXT:-CompoundStmt 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:-ReturnStmt 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:-CXXDependentScopeMemberExpr 0x{{[0-9A-Fa-f]+}} '' lvalue .First +// CHECK-NEXT:-CXXThisExpr 0x{{[0-9A-Fa-f]+}} 'Pair' lvalue this +// CHECK-NEXT:-CXXMethodDecl 0x{{[0-9A-Fa-f]+}} line:12:5 getSecond 'V ()' implicit-inline +// CHECK-NEXT:-CompoundStmt 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:-ReturnStmt 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:-MemberExpr 0x{{[0-9A-Fa-f]+}} 'V' lvalue .Second 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:-CXXThisExpr 0x{{[0-9A-Fa-f]+}} 'Pair' lvalue implicit this + +// CHECK: -CXXMethodDecl 0x{{[0-9A-Fa-f]+}} line:8:5 used getFirst 'int ()' implicit-inline +// CHECK-NEXT:-CompoundStmt 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:-ReturnStmt 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:-ImplicitCastExpr 0x{{[0-9A-Fa-f]+}} 'int':'int' +// CHECK-NEXT:-MemberExpr 0x{{[0-9A-Fa-f]+}} 'int':'int' lvalue .First 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:-CXXThisExpr 0x{{[0-9A-Fa-f]+}} 'Pair' lvalue this +// CHECK-NEXT:-CXXMethodDecl 0x{{[0-9A-Fa-f]+}} line:12:5 used getSecond 'float ()' implicit-inline +// CHECK-NEXT:-CompoundStmt 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:-ReturnStmt 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:-ImplicitCastExpr 0x{{[0-9A-Fa-f]+}} 'float':'float' +// CHECK-NEXT:-MemberExpr 0x{{[0-9A-Fa-f]+}} 'float':'float' lvalue .Second 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:-CXXThisExpr 0x{{[0-9A-Fa-f]+}} 'Pair' lvalue implicit this diff --git a/clang/test/AST/HLSL/this-reference.hlsl b/clang/test/AST/HLSL/this-reference.hlsl new file mode 100644 index 0000000000000..67d8e7b7b9119 --- /dev/null +++ b/clang/test/AST/HLSL/this-reference.hlsl @@ -0,0 +1,62 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -ast-dump -disable-llvm-passes -o - -hlsl-entry main %s | FileCheck %s + +class Pair { + int First; + int Second; + + int getFirst() { + return this.First; + } + + int getSecond() { + return Second; + } +}; + +class PairInfo : Pair { + int Sum; + + int getSum() { + return this.First + Second; + } +}; + +[numthreads(1, 1, 1)] +void main() { + Pair Vals = {1, 2}; + Vals.First = Vals.getFirst(); + Vals.Second = Vals.getSecond(); + + PairInfo ValsInfo; + ValsInfo.First = Vals.First; + ValsInfo.Second = Vals.Second; + ValsInfo.Sum = ValsInfo.getSum(); + +} + +// CHECK: -CXXMethodDecl 0x{{[0-9A-Fa-f]+}} line:7:7 used getFirst 'int ()' implicit-inline +// CHECK-NEXT:`-CompoundStmt 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:`-ReturnStmt 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:`-ImplicitCastExpr 0x{{[0-9A-Fa-f]+}} 'int' +// CHECK-NEXT:`-MemberExpr 0x{{[0-9A-Fa-f]+}} 'int' lvalue .First 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:`-CXXThisExpr 0x{{[0-9A-Fa-f]+}} 'Pair' lvalue this +// CHECK-NEXT:-CXXMethodDecl 0x{{[0-9A-Fa-f]+}} line:11:7 used getSecond 'int ()' implicit-inline +// CHECK-NEXT:`-CompoundStmt 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:`-ReturnStmt 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:`-ImplicitCastExpr 0x{{[0-9A-Fa-f]+}} 'int' +// CHECK-NEXT:`-MemberExpr 0x{{[0-9A-Fa-f]+}} 'int' lvalue .Second 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:`-CXXThisExpr 0x{{[0-9A-Fa-f]+}} 'Pair' lvalue implicit this + + +// CHECK: CXXMethodDecl 0x{{[0-9A-Fa-f]+}} line:19:7 used getSum 'int ()' implicit-inline +// CHECK-NEXT:`-CompoundStmt 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:`-ReturnStmt 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:`-BinaryOperator 0x{{[0-9A-Fa-f]+}} 'int' '+' +// CHECK-NEXT:-ImplicitCastExpr 0x{{[0-9A-Fa-f]+}} 'int' +// CHECK-NEXT:`-MemberExpr 0x{{[0-9A-Fa-f]+}} 'int' lvalue .First 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:`-ImplicitCastExpr 0x{{[0-9A-Fa-f]+}} 'Pair' lvalue +// CHECK-NEXT:`-CXXThisExpr 0x{{[0-9A-Fa-f]+}} 'PairInfo' lvalue this +// CHECK-NEXT:`-ImplicitCastExpr 0x{{[0-9A-Fa-f]+}} 'int' +// CHECK-NEXT:`-MemberExpr 0x{{[0-9A-Fa-f]+}} 'int' lvalue .Second 0x{{[0-9A-Fa-f]+}} +// CHECK-NEXT:`-ImplicitCastExpr 0x{{[0-9A-Fa-f]+}} 'Pair' lvalue +// CHECK-NEXT:`-CXXThisExpr 0x{{[0-9A-Fa-f]+}} 'PairInfo' lvalue implicit this diff --git a/clang/test/AST/Interp/arrays.cpp b/clang/test/AST/Interp/arrays.cpp index a13bce6fb27d4..bba54ee2b2fa1 100644 --- a/clang/test/AST/Interp/arrays.cpp +++ b/clang/test/AST/Interp/arrays.cpp @@ -37,6 +37,60 @@ constexpr int getElement(int i) { static_assert(getElement(1) == 4, ""); static_assert(getElement(5) == 36, ""); +constexpr int data[] = {5, 4, 3, 2, 1}; +constexpr int getElement(const int *Arr, int index) { + return *(Arr + index); +} + +static_assert(getElement(data, 1) == 4, ""); +static_assert(getElement(data, 4) == 1, ""); + +constexpr int getElementFromEnd(const int *Arr, int size, int index) { + return *(Arr + size - index - 1); +} +static_assert(getElementFromEnd(data, 5, 0) == 1, ""); +static_assert(getElementFromEnd(data, 5, 4) == 5, ""); + + +constexpr static int arr[2] = {1,2}; +constexpr static int arr2[2] = {3,4}; +constexpr int *p1 = nullptr; +constexpr int *p2 = p1 + 1; // expected-error {{must be initialized by a constant expression}} \ + // expected-note {{cannot perform pointer arithmetic on null pointer}} \ + // ref-error {{must be initialized by a constant expression}} \ + // ref-note {{cannot perform pointer arithmetic on null pointer}} +constexpr int *p3 = p1 + 0; +constexpr int *p4 = p1 - 0; +constexpr int *p5 = 0 + p1; +constexpr int *p6 = 0 - p1; // expected-error {{invalid operands to binary expression}} \ + // ref-error {{invalid operands to binary expression}} + +constexpr int const * ap1 = &arr[0]; +constexpr int const * ap2 = ap1 + 3; // expected-error {{must be initialized by a constant expression}} \ + // expected-note {{cannot refer to element 3 of array of 2}} \ + // ref-error {{must be initialized by a constant expression}} \ + // ref-note {{cannot refer to element 3 of array of 2}} + +constexpr auto ap3 = arr - 1; // expected-error {{must be initialized by a constant expression}} \ + // expected-note {{cannot refer to element -1}} \ + // ref-error {{must be initialized by a constant expression}} \ + // ref-note {{cannot refer to element -1}} +constexpr int k1 = &arr[1] - &arr[0]; +static_assert(k1 == 1, ""); +static_assert((&arr[0] - &arr[1]) == -1, ""); + +constexpr int k2 = &arr2[1] - &arr[0]; // expected-error {{must be initialized by a constant expression}} \ + // ref-error {{must be initialized by a constant expression}} + +static_assert((arr + 0) == arr, ""); +static_assert(&arr[0] == arr, ""); +static_assert(*(&arr[0]) == 1, ""); +static_assert(*(&arr[1]) == 2, ""); + +constexpr const int *OOB = (arr + 3) - 3; // expected-error {{must be initialized by a constant expression}} \ + // expected-note {{cannot refer to element 3 of array of 2}} \ + // ref-error {{must be initialized by a constant expression}} \ + // ref-note {{cannot refer to element 3 of array of 2}} template constexpr T getElementOf(T* array, int i) { @@ -52,7 +106,6 @@ constexpr T& getElementOfArray(T (&array)[N], int I) { static_assert(getElementOfArray(foo[2], 3) == &m, ""); -constexpr int data[] = {5, 4, 3, 2, 1}; static_assert(data[0] == 4, ""); // expected-error{{failed}} \ // expected-note{{5 == 4}} \ // ref-error{{failed}} \ diff --git a/clang/test/AST/Interp/cxx20.cpp b/clang/test/AST/Interp/cxx20.cpp index 036e7f914bbed..ec273f0713410 100644 --- a/clang/test/AST/Interp/cxx20.cpp +++ b/clang/test/AST/Interp/cxx20.cpp @@ -86,3 +86,27 @@ constexpr int f() { } static_assert(f()); #endif + +/// Distinct literals have disctinct addresses. +/// see https://github.com/llvm/llvm-project/issues/58754 +constexpr auto foo(const char *p) { return p; } +constexpr auto p1 = "test1"; +constexpr auto p2 = "test2"; + +constexpr bool b1 = foo(p1) == foo(p1); +static_assert(b1); + +constexpr bool b2 = foo(p1) == foo(p2); // ref-error {{must be initialized by a constant expression}} \ + // ref-note {{declared here}} +static_assert(!b2); // ref-error {{not an integral constant expression}} \ + // ref-note {{not a constant expression}} + +constexpr auto name1() { return "name1"; } +constexpr auto name2() { return "name2"; } + +constexpr auto b3 = name1() == name1(); +static_assert(b3); +constexpr auto b4 = name1() == name2(); // ref-error {{must be initialized by a constant expression}} \ + // ref-note {{declared here}} +static_assert(!b4); // ref-error {{not an integral constant expression}} \ + // ref-note {{not a constant expression}} diff --git a/clang/test/CXX/dcl.decl/dcl.meaning/dcl.fct.default/p7.cpp b/clang/test/CXX/dcl.decl/dcl.meaning/dcl.fct.default/p7.cpp index 52986faa4e859..97b227222eb09 100644 --- a/clang/test/CXX/dcl.decl/dcl.meaning/dcl.fct.default/p7.cpp +++ b/clang/test/CXX/dcl.decl/dcl.meaning/dcl.fct.default/p7.cpp @@ -27,5 +27,7 @@ void h() { struct S { int i; }; auto [x] = S(); - extern void h7(int = x); // FIXME: reject + extern void h7(int = x); + // expected-error@-1 {{default argument references local variable 'x' of enclosing function}} + } diff --git a/clang/test/CXX/drs/dr23xx.cpp b/clang/test/CXX/drs/dr23xx.cpp index 8d6b4a5dc16ea..371ead504bf32 100644 --- a/clang/test/CXX/drs/dr23xx.cpp +++ b/clang/test/CXX/drs/dr23xx.cpp @@ -89,6 +89,16 @@ namespace dr2353 { // dr2353: 9 #pragma clang __debug dump not_use_2 } +#if __cplusplus >= 201402L +namespace dr2358 { // dr2358: 16 + void f2() { + int i = 1; + void g1(int = [xxx=1] { return xxx; }()); // OK + void g2(int = [xxx=i] { return xxx; }()); // expected-error {{default argument references local variable 'i' of enclosing function}} + } +} +#endif + #if __cplusplus >= 201707L // Otherwise, if the qualified-id std::tuple_size names a complete class // type **with a member value**, the expression std::tuple_size::value shall diff --git a/clang/test/CXX/expr/expr.prim/expr.prim.lambda/p13.cpp b/clang/test/CXX/expr/expr.prim/expr.prim.lambda/p13.cpp index b55beb7d4ed78..0635a01466afb 100644 --- a/clang/test/CXX/expr/expr.prim/expr.prim.lambda/p13.cpp +++ b/clang/test/CXX/expr/expr.prim/expr.prim.lambda/p13.cpp @@ -1,4 +1,8 @@ -// RUN: %clang_cc1 -std=c++11 %s -Wunused -Wno-unused-lambda-capture -verify +// RUN: %clang_cc1 -std=c++11 %s -Wunused -Wno-unused-lambda-capture -Wno-c++14-extensions -verify +// RUN: %clang_cc1 -std=c++17 %s -Wunused -Wno-unused-lambda-capture -Wno-c++14-extensions -verify + + +const int global = 0; void f2() { int i = 1; @@ -7,7 +11,20 @@ void f2() { void g3(int = ([=]{ return i; })()); // expected-error{{lambda expression in default argument cannot capture any entity}} void g4(int = ([=]{ return 0; })()); void g5(int = ([]{ return sizeof i; })()); + void g6(int = ([x=1, y = global, &z = global]{ return x; })()); + void g7(int = ([x=i, &y=i]{ return x; })()); // expected-error 2{{default argument references local variable 'i' of enclosing function}} +} + +#if __cplusplus >= 201703L +int global_array[] = { 1, 2 }; +auto [ga, gb] = global_array; + +void structured_bindings() { + int array[] = { 1, 2 }; + auto [a, b] = array; + void func(int c = [x = a, &xref = a, y = ga, &yref = ga] { return x; }()); // expected-error 2{{default argument references local variable 'a' of enclosing function}} } +#endif namespace lambda_in_default_args { int f(int = [] () -> int { int n; return ++n; } ()); diff --git a/clang/test/CodeGen/Inputs/thinlto-opaque.ll b/clang/test/CodeGen/Inputs/thinlto-opaque.ll new file mode 100644 index 0000000000000..bd576ab830143 --- /dev/null +++ b/clang/test/CodeGen/Inputs/thinlto-opaque.ll @@ -0,0 +1,6 @@ +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64--" + +define ptr @f2() { + ret ptr null +} diff --git a/clang/test/CodeGen/PR44896.ll b/clang/test/CodeGen/PR44896.ll index b155bfcb8293d..ae756aa37af9c 100644 --- a/clang/test/CodeGen/PR44896.ll +++ b/clang/test/CodeGen/PR44896.ll @@ -1,6 +1,6 @@ -; RUN: %clang -fdiscard-value-names -S %s -o /dev/null 2>&1 | FileCheck --check-prefix=WARNING %s -; RUN: %clang -S %s -o /dev/null 2>&1 | FileCheck --check-prefix=NOWARNING %s -; RUN: %clang_cc1 -S -emit-llvm %s -discard-value-names -o /dev/null +; RUN: %clang -Xclang -opaque-pointers -fdiscard-value-names -S %s -o /dev/null 2>&1 | FileCheck --check-prefix=WARNING %s +; RUN: %clang -Xclang -opaque-pointers -S %s -o /dev/null 2>&1 | FileCheck --check-prefix=NOWARNING %s +; RUN: %clang_cc1 -opaque-pointers -S -emit-llvm %s -discard-value-names -o /dev/null ; PR 44896 ; WARNING: ignoring -fdiscard-value-names for LLVM Bitcode diff --git a/clang/test/CodeGen/X86/indirect-branch-cs-prefix.c b/clang/test/CodeGen/X86/indirect-branch-cs-prefix.c index 369db26677b4d..67d2a69bc246c 100644 --- a/clang/test/CodeGen/X86/indirect-branch-cs-prefix.c +++ b/clang/test/CodeGen/X86/indirect-branch-cs-prefix.c @@ -1,4 +1,4 @@ -// RUN: %clang -target i386-unknown-unknown -o - -emit-llvm -S -mindirect-branch-cs-prefix %s | FileCheck %s +// RUN: %clang_cc1 -emit-llvm -triple i386 -o - -mindirect-branch-cs-prefix %s | FileCheck %s // CHECK: !{i32 4, !"indirect_branch_cs_prefix", i32 1} void foo() {} diff --git a/clang/test/CodeGen/X86/mmx-inline-asm.c b/clang/test/CodeGen/X86/mmx-inline-asm.c index 635e2a6b71efc..19c24a3a91e14 100644 --- a/clang/test/CodeGen/X86/mmx-inline-asm.c +++ b/clang/test/CodeGen/X86/mmx-inline-asm.c @@ -1,5 +1,4 @@ -// RUN: %clang -mmmx -target i386-unknown-unknown -emit-llvm -S %s -o - | FileCheck %s -// +// RUN: %clang_cc1 -emit-llvm -triple i386 -target-feature +mmx %s -o - | FileCheck %s #include // CHECK: { x86_mmx, x86_mmx, x86_mmx, x86_mmx, x86_mmx, x86_mmx, x86_mmx } diff --git a/clang/test/CodeGen/X86/mmx-shift-with-immediate.c b/clang/test/CodeGen/X86/mmx-shift-with-immediate.c index ecd1881c4875c..83be6b5517c01 100644 --- a/clang/test/CodeGen/X86/mmx-shift-with-immediate.c +++ b/clang/test/CodeGen/X86/mmx-shift-with-immediate.c @@ -1,4 +1,4 @@ -// RUN: %clang -mmmx -target i386-unknown-unknown -emit-llvm -S %s -o - | FileCheck %s +// RUN: %clang_cc1 -emit-llvm -triple i386 -target-feature +mmx %s -o - | FileCheck %s #include void shift(__m64 a, __m64 b, int c) { diff --git a/clang/test/CodeGen/X86/x86-cf-protection.c b/clang/test/CodeGen/X86/x86-cf-protection.c index de6906ec51812..359bad714493b 100644 --- a/clang/test/CodeGen/X86/x86-cf-protection.c +++ b/clang/test/CodeGen/X86/x86-cf-protection.c @@ -1,9 +1,10 @@ -// RUN: %clang -target i386-unknown-unknown -x c -E -dM -o - -fcf-protection=return %s | FileCheck %s --check-prefix=RETURN -// RUN: %clang -target i386-unknown-unknown -x c -E -dM -o - -fcf-protection=branch %s | FileCheck %s --check-prefix=BRANCH -// RUN: %clang -target i386-unknown-unknown -x c -E -dM -o - -fcf-protection=full %s | FileCheck %s --check-prefix=FULL -// RUN: %clang -target i386-unknown-unknown -o - -emit-llvm -S -fcf-protection=branch -mibt-seal -flto %s | FileCheck %s --check-prefixes=CFPROT,IBTSEAL -// RUN: %clang -target i386-unknown-unknown -o - -emit-llvm -S -fcf-protection=branch -flto %s | FileCheck %s --check-prefixes=CFPROT,NOIBTSEAL -// RUN: %clang -target i386-unknown-unknown -o - -emit-llvm -S -fcf-protection=branch -mibt-seal %s | FileCheck %s --check-prefixes=CFPROT,NOIBTSEAL +// RUN: %clang_cc1 -E -triple i386 -dM -o - -fcf-protection=return %s | FileCheck %s --check-prefix=RETURN +// RUN: %clang_cc1 -E -triple i386 -dM -o - -fcf-protection=branch %s | FileCheck %s --check-prefix=BRANCH +// RUN: %clang_cc1 -E -triple i386 -dM -o - -fcf-protection=full %s | FileCheck %s --check-prefix=FULL +// RUN: %clang_cc1 -emit-llvm -triple i386 -o - -fcf-protection=branch -mibt-seal -flto %s | FileCheck %s --check-prefixes=CFPROT,IBTSEAL +// RUN: %clang_cc1 -emit-llvm -triple i386 -o - -fcf-protection=branch -flto %s | FileCheck %s --check-prefixes=CFPROT,NOIBTSEAL +// RUN: %clang_cc1 -emit-llvm -triple i386 -o - -fcf-protection=branch -mibt-seal %s | FileCheck %s --check-prefixes=CFPROT,NOIBTSEAL +// RUN: not %clang_cc1 -emit-llvm-only -triple i386 -target-cpu pentium-mmx -fcf-protection=branch %s 2>&1 | FileCheck %s --check-prefix=NOCFPROT // RETURN: #define __CET__ 2 // BRANCH: #define __CET__ 1 @@ -11,4 +12,7 @@ // CFPROT: !{i32 8, !"cf-protection-branch", i32 1} // IBTSEAL: !{i32 8, !"ibt-seal", i32 1} // NOIBTSEAL-NOT: "ibt-seal", i32 1 + +// NOCFPROT: error: option 'cf-protection=branch' cannot be specified on this target + void foo() {} diff --git a/clang/test/CodeGen/asan-globals-odr.cpp b/clang/test/CodeGen/asan-globals-odr.cpp index d6b5ed7377fe5..e8fcc81516a57 100644 --- a/clang/test/CodeGen/asan-globals-odr.cpp +++ b/clang/test/CodeGen/asan-globals-odr.cpp @@ -1,12 +1,11 @@ -// RUN: %clang_cc1 -fsanitize=address -emit-llvm -o - -triple x86_64-linux %s | FileCheck %s --check-prefixes=CHECK,INDICATOR0,GLOB_VAR,ALIAS0 -// RUN: %clang_cc1 -fsanitize=address -fsanitize-address-use-odr-indicator -emit-llvm -o - -triple x86_64-linux %s | FileCheck %s --check-prefixes=CHECK,INDICATOR1,GLOB_ALIAS_INDICATOR,ALIAS1 +// RUN: %clang_cc1 -fsanitize=address -emit-llvm -o - -triple x86_64-linux %s | FileCheck %s --check-prefixes=CHECK,INDICATOR1,GLOB_ALIAS_INDICATOR,ALIAS1 // RUN: %clang_cc1 -fsanitize=address -fno-sanitize-address-use-odr-indicator -emit-llvm -o - -triple x86_64-linux %s | FileCheck %s --check-prefixes=CHECK,INDICATOR0,GLOB_VAR,ALIAS0 // RUN: %clang_cc1 -fsanitize=address -fno-sanitize-address-use-odr-indicator -fsanitize-address-use-odr-indicator -emit-llvm -o - -triple x86_64-linux %s | FileCheck %s --check-prefixes=CHECK,INDICATOR1,GLOB_ALIAS_INDICATOR,ALIAS1 // RUN: %clang_cc1 -fsanitize=address -fsanitize-address-use-odr-indicator -fno-sanitize-address-use-odr-indicator -emit-llvm -o - -triple x86_64-linux %s | FileCheck %s --check-prefixes=CHECK,INDICATOR0,GLOB_VAR,ALIAS0 // No alias on Windows but indicators should work. -// RUN: %clang_cc1 -fsanitize=address -emit-llvm -o - -triple x86_64-windows-msvc %s | FileCheck %s --check-prefixes=CHECK,GLOB_VAR,ALIAS0 -// RUN: %clang_cc1 -fsanitize=address -fsanitize-address-use-odr-indicator -emit-llvm -o - -triple x86_64-windows-msvc %s | FileCheck %s --check-prefixes=CHECK,INDICATOR1,GLOB_VAR_INDICATOR,ALIAS0 +// RUN: %clang_cc1 -fsanitize=address -fno-sanitize-address-use-odr-indicator -emit-llvm -o - -triple x86_64-windows-msvc %s | FileCheck %s --check-prefixes=CHECK,GLOB_VAR,ALIAS0 +// RUN: %clang_cc1 -fsanitize=address -emit-llvm -o - -triple x86_64-windows-msvc %s | FileCheck %s --check-prefixes=CHECK,INDICATOR1,GLOB_VAR_INDICATOR,ALIAS0 int global; diff --git a/clang/test/CodeGen/asan-static-odr.cpp b/clang/test/CodeGen/asan-static-odr.cpp index a4ba1112db437..bd918243299c1 100644 --- a/clang/test/CodeGen/asan-static-odr.cpp +++ b/clang/test/CodeGen/asan-static-odr.cpp @@ -1,7 +1,7 @@ -// RUN: %clang_cc1 -fsanitize=address -emit-llvm -o - -triple x86_64-linux %s | FileCheck %s +// RUN: %clang_cc1 -fsanitize=address -fno-sanitize-address-use-odr-indicator -emit-llvm -o - -triple x86_64-linux %s | FileCheck %s // No alias on Windows but indicators should work. -// RUN: %clang_cc1 -fsanitize=address -emit-llvm -o - -triple x86_64-windows-msvc %s | FileCheck %s +// RUN: %clang_cc1 -fsanitize=address -fno-sanitize-address-use-odr-indicator -emit-llvm -o - -triple x86_64-windows-msvc %s | FileCheck %s static int global; diff --git a/clang/test/CodeGen/asm-attrs.c b/clang/test/CodeGen/asm-attrs.c index fa07601cc01ed..6d95e10d0af0b 100644 --- a/clang/test/CodeGen/asm-attrs.c +++ b/clang/test/CodeGen/asm-attrs.c @@ -10,9 +10,9 @@ // CHECK: call void asm sideeffect "foo7", {{.*}} [[NOATTRS]] // CHECK: call i32 asm "foo8", {{.*}} [[READNONE]] -// CHECK: attributes [[READNONE]] = { nounwind readnone } +// CHECK: attributes [[READNONE]] = { nounwind memory(none) } // CHECK: attributes [[NOATTRS]] = { nounwind } -// CHECK: attributes [[READONLY]] = { nounwind readonly } +// CHECK: attributes [[READONLY]] = { nounwind memory(read) } int g0, g1; diff --git a/clang/test/CodeGen/attr-target-mv.c b/clang/test/CodeGen/attr-target-mv.c index e5241a1bbe54e..581f18e10b081 100644 --- a/clang/test/CodeGen/attr-target-mv.c +++ b/clang/test/CodeGen/attr-target-mv.c @@ -15,6 +15,8 @@ int __attribute__((target("arch=sapphirerapids"))) foo(void) {return 10;} int __attribute__((target("arch=alderlake"))) foo(void) {return 11;} int __attribute__((target("arch=rocketlake"))) foo(void) {return 12;} int __attribute__((target("arch=core2"))) foo(void) {return 13;} +int __attribute__((target("arch=raptorlake"))) foo(void) {return 14;} +int __attribute__((target("arch=meteorlake"))) foo(void) {return 15;} int __attribute__((target("default"))) foo(void) { return 2; } int bar(void) { @@ -149,6 +151,10 @@ void calls_pr50025c(void) { pr50025c(); } // LINUX: ret i32 12 // LINUX: define{{.*}} i32 @foo.arch_core2() // LINUX: ret i32 13 +// LINUX: define{{.*}} i32 @foo.arch_raptorlake() +// LINUX: ret i32 14 +// LINUX: define{{.*}} i32 @foo.arch_meteorlake() +// LINUX: ret i32 15 // LINUX: define{{.*}} i32 @foo() // LINUX: ret i32 2 // LINUX: define{{.*}} i32 @bar() @@ -180,6 +186,10 @@ void calls_pr50025c(void) { pr50025c(); } // WINDOWS: ret i32 12 // WINDOWS: define dso_local i32 @foo.arch_core2() // WINDOWS: ret i32 13 +// WINDOWS: define dso_local i32 @foo.arch_raptorlake() +// WINDOWS: ret i32 14 +// WINDOWS: define dso_local i32 @foo.arch_meteorlake() +// WINDOWS: ret i32 15 // WINDOWS: define dso_local i32 @foo() // WINDOWS: ret i32 2 // WINDOWS: define dso_local i32 @bar() diff --git a/clang/test/CodeGen/builtin-sqrt.c b/clang/test/CodeGen/builtin-sqrt.c index 32300085682bd..2313a68d2d0e2 100644 --- a/clang/test/CodeGen/builtin-sqrt.c +++ b/clang/test/CodeGen/builtin-sqrt.c @@ -8,8 +8,8 @@ float foo(float X) { } // HAS_ERRNO: declare float @sqrtf(float noundef) [[ATTR:#[0-9]+]] -// HAS_ERRNO-NOT: attributes [[ATTR]] = {{{.*}} readnone +// HAS_ERRNO-NOT: attributes [[ATTR]] = {{{.*}} memory(none) // NO_ERRNO: declare float @llvm.sqrt.f32(float) [[ATTR:#[0-9]+]] -// NO_ERRNO: attributes [[ATTR]] = { nocallback nofree nosync nounwind readnone {{.*}}} +// NO_ERRNO: attributes [[ATTR]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } diff --git a/clang/test/CodeGen/complex-builtins.c b/clang/test/CodeGen/complex-builtins.c index 5dc5424ae9316..29d6e7ba909f6 100644 --- a/clang/test/CodeGen/complex-builtins.c +++ b/clang/test/CodeGen/complex-builtins.c @@ -197,9 +197,9 @@ void foo(float f) { // HAS_ERRNO: declare { x86_fp80, x86_fp80 } @ctanhl(ptr noundef byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]] }; -// NO__ERRNO: attributes [[READNONE]] = { {{.*}}readnone{{.*}} } +// NO__ERRNO: attributes [[READNONE]] = { {{.*}}memory(none){{.*}} } // NO__ERRNO: attributes [[NOT_READNONE]] = { nounwind {{.*}} } // HAS_ERRNO: attributes [[NOT_READNONE]] = { nounwind {{.*}} } -// HAS_ERRNO: attributes [[READNONE]] = { {{.*}}readnone{{.*}} } +// HAS_ERRNO: attributes [[READNONE]] = { {{.*}}memory(none){{.*}} } // HAS_ERRNO: attributes [[WILLRETURN_NOT_READNONE]] = { nounwind willreturn {{.*}} } diff --git a/clang/test/CodeGen/complex-libcalls.c b/clang/test/CodeGen/complex-libcalls.c index 3a197a2f2005b..7d4e3d04cf64a 100644 --- a/clang/test/CodeGen/complex-libcalls.c +++ b/clang/test/CodeGen/complex-libcalls.c @@ -197,9 +197,9 @@ void foo(float f) { // HAS_ERRNO: declare { x86_fp80, x86_fp80 } @ctanhl(ptr noundef byval({ x86_fp80, x86_fp80 }) align 16) [[NOT_READNONE]] }; -// NO__ERRNO: attributes [[READNONE]] = { {{.*}}readnone{{.*}} } +// NO__ERRNO: attributes [[READNONE]] = { {{.*}}memory(none){{.*}} } // NO__ERRNO: attributes [[NOT_READNONE]] = { nounwind {{.*}} } // HAS_ERRNO: attributes [[NOT_READNONE]] = { nounwind {{.*}} } -// HAS_ERRNO: attributes [[READNONE]] = { {{.*}}readnone{{.*}} } +// HAS_ERRNO: attributes [[READNONE]] = { {{.*}}memory(none){{.*}} } // HAS_ERRNO: attributes [[WILLRETURN_NOT_READNONE]] = { nounwind willreturn {{.*}} } diff --git a/clang/test/CodeGen/function-attributes.c b/clang/test/CodeGen/function-attributes.c index 408a4f16f9b6c..38a0fb03de1e2 100644 --- a/clang/test/CodeGen/function-attributes.c +++ b/clang/test/CodeGen/function-attributes.c @@ -111,9 +111,9 @@ void f20(void) { // CHECK: attributes [[NUW]] = { nounwind optsize{{.*}} } // CHECK: attributes [[AI]] = { alwaysinline nounwind optsize{{.*}} } -// CHECK: attributes [[NUW_OS_RN]] = { nounwind optsize readnone{{.*}} } +// CHECK: attributes [[NUW_OS_RN]] = { nounwind optsize willreturn memory(none){{.*}} } // CHECK: attributes [[SR]] = { nounwind optsize{{.*}} "stackrealign"{{.*}} } // CHECK: attributes [[RT]] = { nounwind optsize returns_twice{{.*}} } // CHECK: attributes [[NR]] = { noreturn optsize } -// CHECK: attributes [[NUW_RN]] = { nounwind optsize readnone willreturn } +// CHECK: attributes [[NUW_RN]] = { nounwind optsize willreturn memory(none) } // CHECK: attributes [[RT_CALL]] = { optsize returns_twice } diff --git a/clang/test/CodeGen/inline-builtin-asm-name.c b/clang/test/CodeGen/inline-builtin-asm-name.c new file mode 100644 index 0000000000000..969174f7ac0d2 --- /dev/null +++ b/clang/test/CodeGen/inline-builtin-asm-name.c @@ -0,0 +1,32 @@ +// RUN: %clang_cc1 -triple i686-windows-gnu -emit-llvm -o - %s -disable-llvm-optzns | FileCheck %s + +// CHECK: call i32 @"\01_asm_func_name.inline" + +// CHECK: declare dso_local i32 @"\01_asm_func_name"(ptr noundef, i32 noundef, ptr noundef, ptr noundef) + +// CHECK: define internal i32 @"\01_asm_func_name.inline" + +// CHECK: call i32 @__mingw_vsnprintf + +// CHECK: declare dso_local i32 @__mingw_vsnprintf + +typedef unsigned int size_t; + +int __mingw_vsnprintf(char *_DstBuf, size_t _MaxCount, const char *_Format, __builtin_va_list _ArgList); + +// For the real use case, "_asm_func_name" is actually "___mingw_vsnprintf", but it's renamed in the testcase for disambiguation. +int vsnprintf(char *__stream, size_t __n, const char *__format, __builtin_va_list __local_argv) __asm__("_asm_func_name"); + +extern __inline__ __attribute__((__always_inline__, __gnu_inline__)) +int vsnprintf(char *__stream, size_t __n, const char *__format, __builtin_va_list __local_argv) +{ + return __mingw_vsnprintf(__stream, __n, __format, __local_argv); +} + +void call(const char* fmt, ...) { + char buf[200]; + __builtin_va_list ap; + __builtin_va_start(ap, fmt); + vsnprintf(buf, sizeof(buf), fmt, ap); + __builtin_va_end(ap); +} diff --git a/clang/test/CodeGen/libcall-declarations.c b/clang/test/CodeGen/libcall-declarations.c index e39263c3d5c4c..ebdb05d7ff109 100644 --- a/clang/test/CodeGen/libcall-declarations.c +++ b/clang/test/CodeGen/libcall-declarations.c @@ -614,8 +614,8 @@ void *use[] = { // CHECK-ERRNO: declare { double, double } @ctanh(double noundef, double noundef) [[NONCONST]] // CHECK-ERRNO: declare <2 x float> @ctanhf(<2 x float> noundef) [[NONCONST]] -// CHECK-NOERRNO: attributes [[NUWRN]] = { nounwind readnone{{.*}} } -// CHECK-NOERRNO: attributes [[NUWRO]] = { nounwind readonly{{.*}} } +// CHECK-NOERRNO: attributes [[NUWRN]] = { nounwind willreturn memory(none){{.*}} } +// CHECK-NOERRNO: attributes [[NUWRO]] = { nounwind willreturn memory(read){{.*}} } -// CHECK-ERRNO: attributes [[NUWRN]] = { nounwind readnone{{.*}} } -// CHECK-ERRNO: attributes [[NUWRO]] = { nounwind readonly{{.*}} } +// CHECK-ERRNO: attributes [[NUWRN]] = { nounwind willreturn memory(none){{.*}} } +// CHECK-ERRNO: attributes [[NUWRO]] = { nounwind willreturn memory(read){{.*}} } diff --git a/clang/test/CodeGen/libcalls.c b/clang/test/CodeGen/libcalls.c index 8313b9a742df8..42b6df4ccbe4a 100644 --- a/clang/test/CodeGen/libcalls.c +++ b/clang/test/CodeGen/libcalls.c @@ -124,5 +124,5 @@ void test_builtins(double d, float f, long double ld) { } // CHECK-YES: attributes [[NUW]] = { nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+x87" } -// CHECK-NO-DAG: attributes [[NUW_RN]] = { nounwind readnone{{.*}} } -// CHECK-NO-DAG: attributes [[NUW_RNI]] = { nocallback nofree nosync nounwind readnone speculatable willreturn } +// CHECK-NO-DAG: attributes [[NUW_RN]] = { nounwind willreturn memory(none){{.*}} } +// CHECK-NO-DAG: attributes [[NUW_RNI]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } diff --git a/clang/test/CodeGen/math-builtins.c b/clang/test/CodeGen/math-builtins.c index 965ed77b54be3..559421a4882c5 100644 --- a/clang/test/CodeGen/math-builtins.c +++ b/clang/test/CodeGen/math-builtins.c @@ -680,16 +680,16 @@ __builtin_trunc(f); __builtin_truncf(f); __builtin_truncl(f); __builtin // HAS_ERRNO: declare fp128 @llvm.trunc.f128(fp128) [[READNONE_INTRINSIC]] }; -// NO__ERRNO: attributes [[READNONE]] = { {{.*}}readnone{{.*}} } -// NO__ERRNO: attributes [[READNONE_INTRINSIC]] = { {{.*}}readnone{{.*}} } +// NO__ERRNO: attributes [[READNONE]] = { {{.*}}memory(none){{.*}} } +// NO__ERRNO: attributes [[READNONE_INTRINSIC]] = { {{.*}}memory(none){{.*}} } // NO__ERRNO: attributes [[NOT_READNONE]] = { nounwind {{.*}} } -// NO__ERRNO: attributes [[PURE]] = { {{.*}}readonly{{.*}} } +// NO__ERRNO: attributes [[PURE]] = { {{.*}}memory(read){{.*}} } // HAS_ERRNO: attributes [[NOT_READNONE]] = { nounwind {{.*}} } -// HAS_ERRNO: attributes [[READNONE_INTRINSIC]] = { {{.*}}readnone{{.*}} } -// HAS_ERRNO: attributes [[PURE]] = { {{.*}}readonly{{.*}} } -// HAS_ERRNO: attributes [[READNONE]] = { {{.*}}readnone{{.*}} } +// HAS_ERRNO: attributes [[READNONE_INTRINSIC]] = { {{.*}}memory(none){{.*}} } +// HAS_ERRNO: attributes [[PURE]] = { {{.*}}memory(read){{.*}} } +// HAS_ERRNO: attributes [[READNONE]] = { {{.*}}memory(none){{.*}} } -// HAS_ERRNO_GNU: attributes [[READNONE_INTRINSIC]] = { {{.*}}readnone{{.*}} } -// HAS_ERRNO_WIN: attributes [[READNONE_INTRINSIC]] = { {{.*}}readnone{{.*}} } +// HAS_ERRNO_GNU: attributes [[READNONE_INTRINSIC]] = { {{.*}}memory(none){{.*}} } +// HAS_ERRNO_WIN: attributes [[READNONE_INTRINSIC]] = { {{.*}}memory(none){{.*}} } diff --git a/clang/test/CodeGen/math-libcalls.c b/clang/test/CodeGen/math-libcalls.c index 52a08d2652817..0e61f92f8c751 100644 --- a/clang/test/CodeGen/math-libcalls.c +++ b/clang/test/CodeGen/math-libcalls.c @@ -704,18 +704,18 @@ void foo(double *d, float f, float *fp, long double *l, int *i, const char *c) { // HAS_ERRNO: declare x86_fp80 @llvm.trunc.f80(x86_fp80) [[READNONE_INTRINSIC]] }; -// NO__ERRNO: attributes [[READNONE]] = { {{.*}}readnone{{.*}} } -// NO__ERRNO: attributes [[READNONE_INTRINSIC]] = { {{.*}}readnone{{.*}} } +// NO__ERRNO: attributes [[READNONE]] = { {{.*}}memory(none){{.*}} } +// NO__ERRNO: attributes [[READNONE_INTRINSIC]] = { {{.*}}memory(none){{.*}} } // NO__ERRNO: attributes [[NOT_READNONE]] = { nounwind {{.*}} } -// NO__ERRNO: attributes [[READONLY]] = { {{.*}}readonly{{.*}} } +// NO__ERRNO: attributes [[READONLY]] = { {{.*}}memory(read){{.*}} } // HAS_ERRNO: attributes [[NOT_READNONE]] = { nounwind {{.*}} } -// HAS_ERRNO: attributes [[READNONE_INTRINSIC]] = { {{.*}}readnone{{.*}} } -// HAS_ERRNO: attributes [[READONLY]] = { {{.*}}readonly{{.*}} } -// HAS_ERRNO: attributes [[READNONE]] = { {{.*}}readnone{{.*}} } +// HAS_ERRNO: attributes [[READNONE_INTRINSIC]] = { {{.*}}memory(none){{.*}} } +// HAS_ERRNO: attributes [[READONLY]] = { {{.*}}memory(read){{.*}} } +// HAS_ERRNO: attributes [[READNONE]] = { {{.*}}memory(none){{.*}} } // HAS_MAYTRAP: attributes [[NOT_READNONE]] = { nounwind {{.*}} } -// HAS_MAYTRAP: attributes [[READNONE]] = { {{.*}}readnone{{.*}} } +// HAS_MAYTRAP: attributes [[READNONE]] = { {{.*}}memory(none){{.*}} } -// HAS_ERRNO_GNU: attributes [[READNONE_INTRINSIC]] = { {{.*}}readnone{{.*}} } -// HAS_ERRNO_WIN: attributes [[READNONE_INTRINSIC]] = { {{.*}}readnone{{.*}} } +// HAS_ERRNO_GNU: attributes [[READNONE_INTRINSIC]] = { {{.*}}memory(none){{.*}} } +// HAS_ERRNO_WIN: attributes [[READNONE_INTRINSIC]] = { {{.*}}memory(none){{.*}} } diff --git a/clang/test/CodeGen/ms-declspecs.c b/clang/test/CodeGen/ms-declspecs.c index ff9a143f241ec..e390dddbe2b47 100644 --- a/clang/test/CodeGen/ms-declspecs.c +++ b/clang/test/CodeGen/ms-declspecs.c @@ -41,4 +41,4 @@ void noalias_caller(int *x) { noalias_callee(x); } // CHECK: attributes [[NUW]] = { nounwind{{.*}} } // CHECK: attributes [[NI]] = { noinline nounwind{{.*}} } // CHECK: attributes [[NR]] = { noreturn } -// CHECK: attributes [[NA]] = { argmemonly nounwind{{.*}} } +// CHECK: attributes [[NA]] = { nounwind memory(argmem: readwrite){{.*}} } diff --git a/clang/test/CodeGen/partial-order-variadic.cpp b/clang/test/CodeGen/partial-order-variadic.cpp deleted file mode 100644 index a10cd6812f988..0000000000000 --- a/clang/test/CodeGen/partial-order-variadic.cpp +++ /dev/null @@ -1,33 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fclang-abi-compat=15 -DCLANG_ABI_COMPAT=15 %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s -// RUN: %clang_cc1 -triple x86_64-unknown-unknown %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,AFTER-15 - -// CHECK: %struct.S = type { i8 } -// CHECK: @_Z2ggiRi -// CHECK: @_Z1gIiJEERiPT_DpT0_ -template int &g(T *, U...); -template void g(T); -template struct S; -template struct S {}; -void gg(int i, int &r) { - r = g(&i); - S a; -} - -// CHECK: @_Z1hIJiEEvDpPT_ -template void h(T*...) {} -template void h(const T&) {} -template void h(int*); - -#if !defined(CLANG_ABI_COMPAT) - -// AFTER-15: @_Z1fIiJEEvPT_DpT0_ -template void f(T*, U...){} -template void f(T){} -template void f(int*); - -template struct A; -template struct A {}; -template struct A; -template struct A; - -#endif diff --git a/clang/test/CodeGen/pragma-weak.c b/clang/test/CodeGen/pragma-weak.c index 306ce306f4640..52328bf9ff1be 100644 --- a/clang/test/CodeGen/pragma-weak.c +++ b/clang/test/CodeGen/pragma-weak.c @@ -202,4 +202,4 @@ void zzz(void){} int correct_linkage; // CHECK: attributes [[NI]] = { noinline nounwind{{.*}} } -// CHECK: attributes [[RN]] = { noinline nounwind optnone readnone{{.*}} } +// CHECK: attributes [[RN]] = { noinline nounwind optnone willreturn memory(none){{.*}} } diff --git a/clang/test/CodeGen/riscv-vector-bits-vscale-range.c b/clang/test/CodeGen/riscv-vector-bits-vscale-range.c index 9fbb9795657b3..ed391f5d04e56 100644 --- a/clang/test/CodeGen/riscv-vector-bits-vscale-range.c +++ b/clang/test/CodeGen/riscv-vector-bits-vscale-range.c @@ -14,6 +14,7 @@ // RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +zve64x -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-ZVE64 // RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +zve64f -target-feature +f -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-ZVE64 // RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +zve64d -target-feature +f -target-feature +d -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-ZVE64 +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +zve32x -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-ZVE32 // CHECK-LABEL: @func() #0 // CHECK: attributes #0 = { {{.*}} vscale_range([[#VBITS]],[[#VBITS]]) {{.*}} } @@ -22,4 +23,6 @@ // CHECK-V: attributes #0 = { {{.*}} vscale_range(2,1024) {{.*}} } // CHECK-ZVL: attributes #0 = { {{.*}} vscale_range(8,1024) {{.*}} } // CHECK-ZVE64: attributes #0 = { {{.*}} vscale_range(1,1024) {{.*}} } +// CHECK-ZVE32: attributes #0 +// CHECK-ZVE32-NOT: vscale_range void func(void) {} diff --git a/clang/test/CodeGen/struct-passing.c b/clang/test/CodeGen/struct-passing.c index ad7b813320849..c8cfeb9c8168a 100644 --- a/clang/test/CodeGen/struct-passing.c +++ b/clang/test/CodeGen/struct-passing.c @@ -23,5 +23,5 @@ void *ps[] = { f0, f1, f2, f3, f4, f5 }; // CHECK: declare void @f4({{.*}} byval({{.*}}) align 4) // CHECK: declare void @f5({{.*}} byval({{.*}}) align 4) -// CHECK: attributes [[RN]] = { nounwind readnone{{.*}} } -// CHECK: attributes [[RO]] = { nounwind readonly{{.*}} } +// CHECK: attributes [[RN]] = { nounwind willreturn memory(none){{.*}} } +// CHECK: attributes [[RO]] = { nounwind willreturn memory(read){{.*}} } diff --git a/clang/test/CodeGen/target-builtin-noerror.c b/clang/test/CodeGen/target-builtin-noerror.c index b1159aeacf90b..fc3d9ef8e572e 100644 --- a/clang/test/CodeGen/target-builtin-noerror.c +++ b/clang/test/CodeGen/target-builtin-noerror.c @@ -115,7 +115,9 @@ void verifycpustrings(void) { (void)__builtin_cpu_is("ivybridge"); (void)__builtin_cpu_is("knl"); (void)__builtin_cpu_is("knm"); + (void)__builtin_cpu_is("meteorlake"); (void)__builtin_cpu_is("nehalem"); + (void)__builtin_cpu_is("raptorlake"); (void)__builtin_cpu_is("rocketlake"); (void)__builtin_cpu_is("sandybridge"); (void)__builtin_cpu_is("shanghai"); diff --git a/clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll b/clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll index 959d89d61ab27..2309ed717c2a2 100644 --- a/clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll +++ b/clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll @@ -100,7 +100,7 @@ cont2: ; CHECK-IR: br i1 {{.*}}, label %trap ; We still have to call it as virtual. - ; CHECK-IR: %call3 = tail call i32 %7 + ; CHECK-IR: %call3 = tail call i32 {{%[0-9]+}} %call3 = tail call i32 %8(%struct.A* nonnull %obj, i32 %call) ret i32 %call3 } diff --git a/clang/test/CodeGen/thinlto-opaque-typed-mix.ll b/clang/test/CodeGen/thinlto-opaque-typed-mix.ll new file mode 100644 index 0000000000000..1cd301f290e9b --- /dev/null +++ b/clang/test/CodeGen/thinlto-opaque-typed-mix.ll @@ -0,0 +1,23 @@ +; REQUIRES: x86-registered-target +; Test that mixing bitcode file with opaque and typed pointers works. + +; RUN: mkdir -p %t +; RUN: opt -module-summary -o %t/typed.bc %s +; RUN: opt -module-summary -o %t/opaque.bc %S/Inputs/thinlto-opaque.ll +; RUN: llvm-lto2 run -thinlto-distributed-indexes %t/typed.bc %t/opaque.bc \ +; RUN: -o %t/native.o -r %t/typed.bc,main,plx -r %t/typed.bc,f2, \ +; RUN: -r %t/opaque.bc,f2,p + +; RUN: %clang_cc1 -triple x86_64-- -emit-obj -o %t/native.o %t/typed.bc \ +; RUN: -Wno-override-module \ +; RUN: -fthinlto-index=%t/typed.bc.thinlto.bc + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64--" + +declare i8* @f2() + +define i32 @main() { + call i8* @f2() + ret i32 0 +} diff --git a/clang/test/CodeGen/thinlto_backend.ll b/clang/test/CodeGen/thinlto_backend.ll index dea1a8ac54cd3..37ab6206a9ce5 100644 --- a/clang/test/CodeGen/thinlto_backend.ll +++ b/clang/test/CodeGen/thinlto_backend.ll @@ -1,52 +1,52 @@ ; REQUIRES: x86-registered-target -; RUN: opt -module-summary -o %t1.o %s -; RUN: opt -module-summary -o %t2.o %S/Inputs/thinlto_backend.ll -; RUN: llvm-lto -thinlto -o %t %t1.o %t2.o +; RUN: opt -opaque-pointers -module-summary -o %t1.o %s +; RUN: opt -opaque-pointers -module-summary -o %t2.o %S/Inputs/thinlto_backend.ll +; RUN: llvm-lto -opaque-pointers -thinlto -o %t %t1.o %t2.o ; Ensure clang -cc1 give expected error for incorrect input type -; RUN: not %clang_cc1 -O2 -o %t1.o -x c %s -c -fthinlto-index=%t.thinlto.bc 2>&1 | FileCheck %s -check-prefix=CHECK-WARNING +; RUN: not %clang_cc1 -opaque-pointers -O2 -o %t1.o -x c %s -c -fthinlto-index=%t.thinlto.bc 2>&1 | FileCheck %s -check-prefix=CHECK-WARNING ; CHECK-WARNING: error: invalid argument '-fthinlto-index={{.*}}' only allowed with '-x ir' ; Ensure we get expected error for missing index file -; RUN: %clang -O2 -o %t4.o -x ir %t1.o -c -fthinlto-index=bad.thinlto.bc 2>&1 | FileCheck %s -check-prefix=CHECK-ERROR1 +; RUN: %clang -Xclang -opaque-pointers -O2 -o %t4.o -x ir %t1.o -c -fthinlto-index=bad.thinlto.bc 2>&1 | FileCheck %s -check-prefix=CHECK-ERROR1 ; CHECK-ERROR1: Error loading index file 'bad.thinlto.bc' ; Ensure we ignore empty index file, and run non-ThinLTO compilation which ; would not import f2 ; RUN: touch %t4.thinlto.bc -; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t4.o -x ir %t1.o -c -fthinlto-index=%t4.thinlto.bc +; RUN: %clang -Xclang -opaque-pointers -target x86_64-unknown-linux-gnu -O2 -o %t4.o -x ir %t1.o -c -fthinlto-index=%t4.thinlto.bc ; RUN: llvm-nm %t4.o | FileCheck --check-prefix=CHECK-OBJ-IGNORE-EMPTY %s ; CHECK-OBJ-IGNORE-EMPTY: T f1 ; CHECK-OBJ-IGNORE-EMPTY: U f2 ; Ensure we don't fail with index and non-ThinLTO object file, and output must ; be empty file. -; RUN: opt -o %t5.o %s -; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t4.o -x ir %t5.o -c -fthinlto-index=%t.thinlto.bc +; RUN: opt -opaque-pointers -o %t5.o %s +; RUN: %clang -Xclang -opaque-pointers -target x86_64-unknown-linux-gnu -O2 -o %t4.o -x ir %t5.o -c -fthinlto-index=%t.thinlto.bc ; RUN: llvm-nm %t4.o 2>&1 | count 0 ; Ensure f2 was imported. Check for all 3 flavors of -save-temps[=cwd|obj]. -; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t3.o -x ir %t1.o -c -fthinlto-index=%t.thinlto.bc -save-temps=obj -; RUN: llvm-dis %t1.s.3.import.bc -o - | FileCheck --check-prefix=CHECK-IMPORT %s +; RUN: %clang -Xclang -opaque-pointers -target x86_64-unknown-linux-gnu -O2 -o %t3.o -x ir %t1.o -c -fthinlto-index=%t.thinlto.bc -save-temps=obj +; RUN: llvm-dis -opaque-pointers %t1.s.3.import.bc -o - | FileCheck --check-prefix=CHECK-IMPORT %s ; RUN: mkdir -p %T/dir1 ; RUN: cd %T/dir1 -; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t3.o -x ir %t1.o -c -fthinlto-index=%t.thinlto.bc -save-temps=cwd +; RUN: %clang -Xclang -opaque-pointers -target x86_64-unknown-linux-gnu -O2 -o %t3.o -x ir %t1.o -c -fthinlto-index=%t.thinlto.bc -save-temps=cwd ; RUN: cd ../.. -; RUN: llvm-dis %T/dir1/*1.s.3.import.bc -o - | FileCheck --check-prefix=CHECK-IMPORT %s +; RUN: llvm-dis -opaque-pointers %T/dir1/*1.s.3.import.bc -o - | FileCheck --check-prefix=CHECK-IMPORT %s ; RUN: mkdir -p %T/dir2 ; RUN: cd %T/dir2 -; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t3.o -x ir %t1.o -c -fthinlto-index=%t.thinlto.bc -save-temps +; RUN: %clang -Xclang -opaque-pointers -target x86_64-unknown-linux-gnu -O2 -o %t3.o -x ir %t1.o -c -fthinlto-index=%t.thinlto.bc -save-temps ; RUN: cd ../.. -; RUN: llvm-dis %T/dir2/*1.s.3.import.bc -o - | FileCheck --check-prefix=CHECK-IMPORT %s +; RUN: llvm-dis -opaque-pointers %T/dir2/*1.s.3.import.bc -o - | FileCheck --check-prefix=CHECK-IMPORT %s ; CHECK-IMPORT: define available_externally void @f2() ; RUN: llvm-nm %t3.o | FileCheck --check-prefix=CHECK-OBJ %s ; CHECK-OBJ: T f1 ; CHECK-OBJ-NOT: U f2 ; Ensure we get expected error for input files without summaries -; RUN: opt -o %t2.o %s -; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t3.o -x ir %t1.o -c -fthinlto-index=%t.thinlto.bc 2>&1 | FileCheck %s -check-prefix=CHECK-ERROR2 +; RUN: opt -opaque-pointers -o %t2.o %s +; RUN: %clang -Xclang -opaque-pointers -target x86_64-unknown-linux-gnu -O2 -o %t3.o -x ir %t1.o -c -fthinlto-index=%t.thinlto.bc 2>&1 | FileCheck %s -check-prefix=CHECK-ERROR2 ; CHECK-ERROR2: Error loading imported file {{.*}}: Could not find module summary target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" diff --git a/clang/test/CodeGen/thinlto_backend_local_name_conflict.ll b/clang/test/CodeGen/thinlto_backend_local_name_conflict.ll index 8a8abd53c4e18..c74271004ade9 100644 --- a/clang/test/CodeGen/thinlto_backend_local_name_conflict.ll +++ b/clang/test/CodeGen/thinlto_backend_local_name_conflict.ll @@ -14,8 +14,8 @@ ; This module will import a() and b() which should cause the read only copy ; of baz from each of those modules to be imported. Check that the both are ; imported as local copies. -; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t4.o -x ir %t.bc -c -fthinlto-index=%t.bc.thinlto.bc -save-temps=obj -; RUN: llvm-dis %t.s.3.import.bc -o - | FileCheck --check-prefix=IMPORT %s +; RUN: %clang -Xclang -opaque-pointers -target x86_64-unknown-linux-gnu -O2 -o %t4.o -x ir %t.bc -c -fthinlto-index=%t.bc.thinlto.bc -save-temps=obj +; RUN: llvm-dis -opaque-pointers %t.s.3.import.bc -o - | FileCheck --check-prefix=IMPORT %s ; IMPORT: @baz.llvm.{{.*}} = internal global i32 10 ; IMPORT: @baz.llvm.{{.*}} = internal global i32 10 diff --git a/clang/test/CodeGenCUDA/lambda-noinline.cu b/clang/test/CodeGenCUDA/lambda-noinline.cu new file mode 100644 index 0000000000000..de2196e63f074 --- /dev/null +++ b/clang/test/CodeGenCUDA/lambda-noinline.cu @@ -0,0 +1,23 @@ +// RUN: %clang_cc1 -no-opaque-pointers -x hip -emit-llvm -std=c++11 %s -o - \ +// RUN: -triple x86_64-linux-gnu \ +// RUN: | FileCheck -check-prefix=HOST %s +// RUN: %clang_cc1 -no-opaque-pointers -x hip -emit-llvm -std=c++11 %s -o - \ +// RUN: -triple amdgcn-amd-amdhsa -fcuda-is-device \ +// RUN: | FileCheck -check-prefix=DEV %s + +#include "Inputs/cuda.h" + +// Checks noinline is correctly added to the lambda function. + +// HOST: define{{.*}}@_ZZ4HostvENKUlvE_clEv({{.*}}) #[[ATTR:[0-9]+]] +// HOST: attributes #[[ATTR]]{{.*}}noinline + +// DEV: define{{.*}}@_ZZ6DevicevENKUlvE_clEv({{.*}}) #[[ATTR:[0-9]+]] +// DEV: attributes #[[ATTR]]{{.*}}noinline + +__device__ int a; +int b; + +__device__ int Device() { return ([&] __device__ __noinline__ (){ return a; })(); } + +__host__ int Host() { return ([&] __host__ __noinline__ (){ return b; })(); } diff --git a/clang/test/CodeGenCXX/2009-05-04-PureConstNounwind.cpp b/clang/test/CodeGenCXX/2009-05-04-PureConstNounwind.cpp index 99d172239daa3..cf95b54fb8c07 100644 --- a/clang/test/CodeGenCXX/2009-05-04-PureConstNounwind.cpp +++ b/clang/test/CodeGenCXX/2009-05-04-PureConstNounwind.cpp @@ -15,8 +15,8 @@ int f(void) { // CHECK: declare noundef i32 @_Z1tv() [[TF2:#[0-9]+]] // CHECK: attributes [[TF]] = { {{.*}} } -// CHECK: attributes [[NUW_RN]] = { nounwind readnone willreturn{{.*}} } -// CHECK: attributes [[NUW_RO]] = { nounwind readonly willreturn{{.*}} } +// CHECK: attributes [[NUW_RN]] = { nounwind willreturn memory(none){{.*}} } +// CHECK: attributes [[NUW_RO]] = { nounwind willreturn memory(read){{.*}} } // CHECK: attributes [[TF2]] = { {{.*}} } -// CHECK: attributes [[NUW_RN_CALL]] = { nounwind readnone willreturn } -// CHECK: attributes [[NUW_RO_CALL]] = { nounwind readonly willreturn } +// CHECK: attributes [[NUW_RN_CALL]] = { nounwind willreturn memory(none) } +// CHECK: attributes [[NUW_RO_CALL]] = { nounwind willreturn memory(read) } diff --git a/clang/test/CodeGenCXX/cxx20-module-std-subst-1.cppm b/clang/test/CodeGenCXX/cxx20-module-std-subst-1.cppm index 7d4992a2adce8..99fb2327b2d56 100644 --- a/clang/test/CodeGenCXX/cxx20-module-std-subst-1.cppm +++ b/clang/test/CodeGenCXX/cxx20-module-std-subst-1.cppm @@ -6,7 +6,9 @@ class Pooh; class Piglet; # 8 "" 2 +# 8 "" 1 3 export module std; // might happen, you can't say it won't! +# 9 "" 2 3 namespace std { export template class allocator { diff --git a/clang/test/CodeGenCXX/debug-info-enum-metadata-collision.cpp b/clang/test/CodeGenCXX/debug-info-enum-metadata-collision.cpp new file mode 100644 index 0000000000000..dd27acd0a77c5 --- /dev/null +++ b/clang/test/CodeGenCXX/debug-info-enum-metadata-collision.cpp @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -debug-info-kind=constructor %s -o - | FileCheck %s + +// Test that clang doesn't crash while resolving temporary debug metadata of +// a record with collisions in the record's enum users. + +// CHECK: !DICompositeType(tag: DW_TAG_enumeration_type, +// CHECK-SAME: scope: [[SCOPE:![0-9]+]] +// CHECK-SAME: elements: [[ELEMENTS:![0-9]+]] +// CHECK: [[SCOPE]] = !DICompositeType(tag: DW_TAG_structure_type +// CHECK-SAME: name: "Struct1" +// CHECK: [[ELEMENTS]] = !{[[ELEMENT:![0-9]+]]} +// CHECK: [[ELEMENT]] = !DIEnumerator(name: "enumValue1" + +template struct Struct1 { + enum { enumValue1 }; + Struct1(); +}; +void function2() { + struct Struct3 {}; + int i = Struct1::enumValue1; +} +void function3() { + struct Struct3 {}; + int i = Struct1::enumValue1; +} diff --git a/clang/test/CodeGenCXX/dynamic-cast.cpp b/clang/test/CodeGenCXX/dynamic-cast.cpp index 86e0f62bc9f98..1d36376a55bc7 100644 --- a/clang/test/CodeGenCXX/dynamic-cast.cpp +++ b/clang/test/CodeGenCXX/dynamic-cast.cpp @@ -20,5 +20,5 @@ const B& f(A *a) { // CHECK: declare ptr @__dynamic_cast(ptr, ptr, ptr, i64) [[NUW_RO:#[0-9]+]] -// CHECK: attributes [[NUW_RO]] = { nounwind readonly } +// CHECK: attributes [[NUW_RO]] = { nounwind memory(read) } // CHECK: attributes [[NR]] = { noreturn } diff --git a/clang/test/CodeGenCXX/thinlto-distributed-type-metadata.cpp b/clang/test/CodeGenCXX/thinlto-distributed-type-metadata.cpp index 766591f510937..ea78a11f451ca 100644 --- a/clang/test/CodeGenCXX/thinlto-distributed-type-metadata.cpp +++ b/clang/test/CodeGenCXX/thinlto-distributed-type-metadata.cpp @@ -4,11 +4,11 @@ // Ensure that a distributed backend invocation of ThinLTO lowers the type test // as expected. -// RUN: %clang_cc1 -flto=thin -flto-unit -triple x86_64-unknown-linux -fwhole-program-vtables -emit-llvm-bc -o %t.o %s -// RUN: llvm-dis %t.o -o - | FileCheck --check-prefix=TT %s -// RUN: llvm-lto -thinlto -o %t2 %t.o -// RUN: %clang -target x86_64-unknown-linux -O2 -o %t3.o -x ir %t.o -c -fthinlto-index=%t2.thinlto.bc -save-temps=obj -// RUN: llvm-dis %t.s.4.opt.bc -o - | FileCheck --check-prefix=OPT %s +// RUN: %clang_cc1 -opaque-pointers -flto=thin -flto-unit -triple x86_64-unknown-linux -fwhole-program-vtables -emit-llvm-bc -o %t.o %s +// RUN: llvm-dis -opaque-pointers %t.o -o - | FileCheck --check-prefix=TT %s +// RUN: llvm-lto -opaque-pointers -thinlto -o %t2 %t.o +// RUN: %clang -Xclang -opaque-pointers -target x86_64-unknown-linux -O2 -o %t3.o -x ir %t.o -c -fthinlto-index=%t2.thinlto.bc -save-temps=obj +// RUN: llvm-dis -opaque-pointers %t.s.4.opt.bc -o - | FileCheck --check-prefix=OPT %s // llvm-nm %t3.o | FileCheck --check-prefix=NM %s // The pre-link bitcode produced by clang should contain a type test assume @@ -34,12 +34,12 @@ // compilation pipeline is invoked. If not lowered then LLVM CodeGen may assert. // RUN: touch %t4.thinlto.bc // O2 new PM -// RUN: %clang -target x86_64-unknown-linux -O2 -o %t4.o -x ir %t.o -c -fthinlto-index=%t4.thinlto.bc -save-temps=obj -// RUN: llvm-dis %t.s.4.opt.bc -o - | FileCheck --check-prefix=OPT %s +// RUN: %clang -Xclang -opaque-pointers -target x86_64-unknown-linux -O2 -o %t4.o -x ir %t.o -c -fthinlto-index=%t4.thinlto.bc -save-temps=obj +// RUN: llvm-dis -opaque-pointers %t.s.4.opt.bc -o - | FileCheck --check-prefix=OPT %s // llvm-nm %t4.o | FileCheck --check-prefix=NM %s // O0 new PM -// RUN: %clang -target x86_64-unknown-linux -O0 -o %t4.o -x ir %t.o -c -fthinlto-index=%t4.thinlto.bc -save-temps=obj -// RUN: llvm-dis %t.s.4.opt.bc -o - | FileCheck --check-prefix=OPT %s +// RUN: %clang -Xclang -opaque-pointers -target x86_64-unknown-linux -O0 -o %t4.o -x ir %t.o -c -fthinlto-index=%t4.thinlto.bc -save-temps=obj +// RUN: llvm-dis -opaque-pointers %t.s.4.opt.bc -o - | FileCheck --check-prefix=OPT %s // llvm-nm %t4.o | FileCheck --check-prefix=NM %s struct A { diff --git a/clang/test/CodeGenCXX/threadlocal_address.cpp b/clang/test/CodeGenCXX/threadlocal_address.cpp index cb63bc2759906..0ae58ab550029 100644 --- a/clang/test/CodeGenCXX/threadlocal_address.cpp +++ b/clang/test/CodeGenCXX/threadlocal_address.cpp @@ -51,4 +51,4 @@ int f() { // CHECK-O1-NEXT: store i32 %[[INC]], ptr %[[J_ADDR]] // CHECK-O1-NEXT: ret i32 %[[INC]] // -// CHECK: attributes #[[ATTR_NUM]] = { nocallback nofree nosync nounwind readnone speculatable willreturn } +// CHECK: attributes #[[ATTR_NUM]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } diff --git a/clang/test/CodeGenCXX/wasm-eh.cpp b/clang/test/CodeGenCXX/wasm-eh.cpp index e965768bf834f..27752f5f58036 100644 --- a/clang/test/CodeGenCXX/wasm-eh.cpp +++ b/clang/test/CodeGenCXX/wasm-eh.cpp @@ -34,7 +34,7 @@ void test0() { // CHECK-NEXT: %[[EXN:.*]] = call ptr @llvm.wasm.get.exception(token %[[CATCHPAD]]) // CHECK-NEXT: store ptr %[[EXN]], ptr %exn.slot // CHECK-NEXT: %[[SELECTOR:.*]] = call i32 @llvm.wasm.get.ehselector(token %[[CATCHPAD]]) -// CHECK-NEXT: %[[TYPEID:.*]] = call i32 @llvm.eh.typeid.for(ptr @_ZTIi) #2 +// CHECK-NEXT: %[[TYPEID:.*]] = call i32 @llvm.eh.typeid.for(ptr @_ZTIi) #8 // CHECK-NEXT: %[[MATCHES:.*]] = icmp eq i32 %[[SELECTOR]], %[[TYPEID]] // CHECK-NEXT: br i1 %[[MATCHES]], label %[[CATCH_INT_BB:.*]], label %[[CATCH_FALLTHROUGH_BB:.*]] @@ -51,7 +51,7 @@ void test0() { // CHECK-NEXT: br label %[[TRY_CONT_BB:.*]] // CHECK: [[CATCH_FALLTHROUGH_BB]] -// CHECK-NEXT: %[[TYPEID:.*]] = call i32 @llvm.eh.typeid.for(ptr @_ZTId) #2 +// CHECK-NEXT: %[[TYPEID:.*]] = call i32 @llvm.eh.typeid.for(ptr @_ZTId) #8 // CHECK-NEXT: %[[MATCHES:.*]] = icmp eq i32 %[[SELECTOR]], %[[TYPEID]] // CHECK-NEXT: br i1 %[[MATCHES]], label %[[CATCH_FLOAT_BB:.*]], label %[[RETHROW_BB:.*]] diff --git a/clang/test/CodeGenCoroutines/pr56329.cpp b/clang/test/CodeGenCoroutines/pr56329.cpp index 2e9a1a244e218..855755d05f844 100644 --- a/clang/test/CodeGenCoroutines/pr56329.cpp +++ b/clang/test/CodeGenCoroutines/pr56329.cpp @@ -116,4 +116,4 @@ Task Outer() { // CHECK: musttail call // CHECK: musttail call // CHECK-NEXT: ret void -// CHEKC-NEXT: } +// CHECK-NEXT: } diff --git a/clang/test/CodeGenHLSL/this-assignment-overload.hlsl b/clang/test/CodeGenHLSL/this-assignment-overload.hlsl new file mode 100644 index 0000000000000..92504dfbd6261 --- /dev/null +++ b/clang/test/CodeGenHLSL/this-assignment-overload.hlsl @@ -0,0 +1,55 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -disable-llvm-passes -o - -std=hlsl202x %s | FileCheck %s + +struct Pair { + int First; + int Second; + int getFirst() { + Pair Another = {5, 10}; + this = Another; + return this.First; + } + int getSecond() { + this = Pair(); + return Second; + } + void operator=(Pair P) { + First = P.First; + Second = 2; + } +}; +[numthreads(1, 1, 1)] +void main() { + Pair Vals = {1, 2}; + Vals.First = Vals.getFirst(); + Vals.Second = Vals.getSecond(); +} + +// This test makes a probably safe assumption that HLSL 202x includes operator overloading for assignment operators. +// CHECK: define linkonce_odr noundef i32 @"?getFirst@Pair@@QAAHXZ"(ptr noundef nonnull align 4 dereferenceable(8) %this) #2 align 2 { +// CHECK-NEXT:entry: +// CHECK-NEXT:%this.addr = alloca ptr, align 4 +// CHECK-NEXT:%Another = alloca %struct.Pair, align 4 +// CHECK-NEXT:%agg.tmp = alloca %struct.Pair, align 4 +// CHECK-NEXT:store ptr %this, ptr %this.addr, align 4 +// CHECK-NEXT:%this1 = load ptr, ptr %this.addr, align 4 +// CHECK-NEXT:%First = getelementptr inbounds %struct.Pair, ptr %Another, i32 0, i32 0 +// CHECK-NEXT:store i32 5, ptr %First, align 4 +// CHECK-NEXT:%Second = getelementptr inbounds %struct.Pair, ptr %Another, i32 0, i32 1 +// CHECK-NEXT:store i32 10, ptr %Second, align 4 +// CHECK-NEXT:call void @llvm.memcpy.p0.p0.i32(ptr align 4 %agg.tmp, ptr align 4 %Another, i32 8, i1 false) +// CHECK-NEXT:call void @"??4Pair@@QAAXU0@@Z"(ptr noundef nonnull align 4 dereferenceable(8) %this1, ptr noundef byval(%struct.Pair) align 4 %agg.tmp) +// CHECK-NEXT:%First2 = getelementptr inbounds %struct.Pair, ptr %this1, i32 0, i32 0 +// CHECK-NEXT:%0 = load i32, ptr %First2, align 4 +// CHECK-NEXT:ret i32 %0 + +// CHECK: define linkonce_odr noundef i32 @"?getSecond@Pair@@QAAHXZ"(ptr noundef nonnull align 4 dereferenceable(8) %this) #2 align 2 { +// CHECK-NEXT:entry: +// CHECK-NEXT:%this.addr = alloca ptr, align 4 +// CHECK-NEXT:%agg.tmp = alloca %struct.Pair, align 4 +// CHECK-NEXT:store ptr %this, ptr %this.addr, align 4 +// CHECK-NEXT:%this1 = load ptr, ptr %this.addr, align 4 +// CHECK-NEXT:call void @llvm.memset.p0.i32(ptr align 4 %agg.tmp, i8 0, i32 8, i1 false) +// CHECK-NEXT:call void @"??4Pair@@QAAXU0@@Z"(ptr noundef nonnull align 4 dereferenceable(8) %this1, ptr noundef byval(%struct.Pair) align 4 %agg.tmp) +// CHECK-NEXT:%Second = getelementptr inbounds %struct.Pair, ptr %this1, i32 0, i32 1 +// CHECK-NEXT:%0 = load i32, ptr %Second, align 4 +// CHECK-NEXT:ret i32 %0 diff --git a/clang/test/CodeGenHLSL/this-assignment.hlsl b/clang/test/CodeGenHLSL/this-assignment.hlsl new file mode 100644 index 0000000000000..bb67fb6e103c5 --- /dev/null +++ b/clang/test/CodeGenHLSL/this-assignment.hlsl @@ -0,0 +1,45 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -emit-llvm -disable-llvm-passes -o - -hlsl-entry main %s | FileCheck %s + +struct Pair { + int First; + int Second; + + int getFirst() { + Pair Another = {5, 10}; + this = Another; + return this.First; + } + + int getSecond() { + this = Pair(); + return Second; + } +}; + +[numthreads(1, 1, 1)] +void main() { + Pair Vals = {1, 2.0}; + Vals.First = Vals.getFirst(); + Vals.Second = Vals.getSecond(); +} + +// This tests reference like implicit this in HLSL +// CHECK: define linkonce_odr noundef i32 @"?getFirst@Pair@@QAAHXZ"(ptr noundef nonnull align 4 dereferenceable(8) %this) #3 align 2 { +// CHECK-NEXT:entry: +// CHECK-NEXT:%this.addr = alloca ptr, align 4 +// CHECK-NEXT:%Another = alloca %struct.Pair, align 4 +// CHECK-NEXT:store ptr %this, ptr %this.addr, align 4 +// CHECK-NEXT:%this1 = load ptr, ptr %this.addr, align 4 +// CHECK-NEXT:call void @llvm.memcpy.p0.p0.i32(ptr align 4 %Another, ptr align 4 @"__const.?getFirst@Pair@@QAAHXZ.Another", i32 8, i1 false) +// CHECK-NEXT:call void @llvm.memcpy.p0.p0.i32(ptr align 4 %this1, ptr align 4 %Another, i32 8, i1 false) +// CHECK-NEXT:%First = getelementptr inbounds %struct.Pair, ptr %this1, i32 0, i32 0 + +// CHECK: define linkonce_odr noundef i32 @"?getSecond@Pair@@QAAHXZ"(ptr noundef nonnull align 4 dereferenceable(8) %this) #3 align 2 { +// CHECK-NEXT:entry: +// CHECK-NEXT:%this.addr = alloca ptr, align 4 +// CHECK-NEXT:%ref.tmp = alloca %struct.Pair, align 4 +// CHECK-NEXT:store ptr %this, ptr %this.addr, align 4 +// CHECK-NEXT:%this1 = load ptr, ptr %this.addr, align 4 +// CHECK-NEXT:call void @llvm.memset.p0.i32(ptr align 4 %ref.tmp, i8 0, i32 8, i1 false) +// CHECK-NEXT:call void @llvm.memcpy.p0.p0.i32(ptr align 4 %this1, ptr align 4 %ref.tmp, i32 8, i1 false) +// CHECK-NEXT:%Second = getelementptr inbounds %struct.Pair, ptr %this1, i32 0, i32 1 diff --git a/clang/test/CodeGenHLSL/this-reference.hlsl b/clang/test/CodeGenHLSL/this-reference.hlsl new file mode 100644 index 0000000000000..22bab1d90c70a --- /dev/null +++ b/clang/test/CodeGenHLSL/this-reference.hlsl @@ -0,0 +1,28 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -emit-llvm -disable-llvm-passes -o - -hlsl-entry main %s | FileCheck %s + +struct Pair { + int First; + float Second; + + int getFirst() { + return this.First; + } + + float getSecond() { + return Second; + } +}; + +[numthreads(1, 1, 1)] +void main() { + Pair Vals = {1, 2.0}; + Vals.First = Vals.getFirst(); + Vals.Second = Vals.getSecond(); +} + +// This tests reference like `this` in HLSL + // CHECK: %call = call noundef i32 @"?getFirst@Pair@@QAAHXZ"(ptr noundef nonnull align 4 dereferenceable(8) %Vals) + // CHECK-NEXT: %First = getelementptr inbounds %struct.Pair, ptr %Vals, i32 0, i32 0 + // CHECK-NEXT: store i32 %call, ptr %First, align 4 + // CHECK-NEXT: %call1 = call noundef float @"?getSecond@Pair@@QAAMXZ"(ptr noundef nonnull align 4 dereferenceable(8) %Vals) + // CHECK-NEXT: %Second = getelementptr inbounds %struct.Pair, ptr %Vals, i32 0, i32 1 diff --git a/clang/test/CodeGenObjC/class-stubs.m b/clang/test/CodeGenObjC/class-stubs.m index 5cd3d575596cb..d73b541dd97ba 100644 --- a/clang/test/CodeGenObjC/class-stubs.m +++ b/clang/test/CodeGenObjC/class-stubs.m @@ -81,4 +81,4 @@ - (void) anotherInstanceMethod { @end // -- calls to objc_loadClassRef() are readnone -// CHECK: attributes [[ATTRLIST]] = { nounwind nonlazybind readnone } +// CHECK: attributes [[ATTRLIST]] = { nounwind nonlazybind memory(none) } diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index 9696f3536e2f6..600194e5e6c13 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -796,7 +796,7 @@ kernel void test_s_setreg(uint val) { // CHECK-DAG: [[$WI_RANGE]] = !{i32 0, i32 1024} // CHECK-DAG: [[$WS_RANGE]] = !{i16 1, i16 1025} -// CHECK-DAG: attributes #[[$NOUNWIND_READONLY:[0-9]+]] = { nofree nounwind readonly } +// CHECK-DAG: attributes #[[$NOUNWIND_READONLY:[0-9]+]] = { nofree nounwind memory(read) } // CHECK-DAG: attributes #[[$READ_EXEC_ATTRS]] = { convergent } // CHECK-DAG: ![[$EXEC]] = !{!"exec"} // CHECK-DAG: ![[$EXEC_LO]] = !{!"exec_lo"} diff --git a/clang/test/CodeGenOpenCL/fdeclare-opencl-builtins.cl b/clang/test/CodeGenOpenCL/fdeclare-opencl-builtins.cl index f0ebf6e3c0eac..ac3bff9dbde27 100644 --- a/clang/test/CodeGenOpenCL/fdeclare-opencl-builtins.cl +++ b/clang/test/CodeGenOpenCL/fdeclare-opencl-builtins.cl @@ -49,6 +49,6 @@ void test_generic_optionality(float a, float *b) { } // CHECK: attributes [[ATTR_CONST]] = -// CHECK-SAME: readnone +// CHECK-SAME: memory(none) // CHECK: attributes [[ATTR_PURE]] = -// CHECK-SAME: readonly +// CHECK-SAME: memory(read) diff --git a/clang/test/CodeGenSYCL/intel-fpga-mem-builtin.cpp b/clang/test/CodeGenSYCL/intel-fpga-mem-builtin.cpp index de982c25b7e10..0a624f71be8a5 100644 --- a/clang/test/CodeGenSYCL/intel-fpga-mem-builtin.cpp +++ b/clang/test/CodeGenSYCL/intel-fpga-mem-builtin.cpp @@ -97,7 +97,7 @@ void foo(float *A, int *B, State *C, State &D) { z = __builtin_intel_fpga_mem(&D, PARAM_1 | PARAM_2, 128, 4, TestVal1, TestVal2); } -// CHECK-DAG: attributes [[ATT]] = { readnone } +// CHECK-DAG: attributes [[ATT]] = { memory(none) } template __attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { diff --git a/clang/test/CodeGenSYCL/no-opaque-ptr-intel-fpga-mem-builtin.cpp b/clang/test/CodeGenSYCL/no-opaque-ptr-intel-fpga-mem-builtin.cpp index 958bd1389ff52..5a3b3d7be9808 100644 --- a/clang/test/CodeGenSYCL/no-opaque-ptr-intel-fpga-mem-builtin.cpp +++ b/clang/test/CodeGenSYCL/no-opaque-ptr-intel-fpga-mem-builtin.cpp @@ -97,7 +97,7 @@ void foo(float *A, int *B, State *C, State &D) { z = __builtin_intel_fpga_mem(&D, PARAM_1 | PARAM_2, 128, 4, TestVal1, TestVal2); } -// CHECK-DAG: attributes [[ATT]] = { readnone } +// CHECK-DAG: attributes [[ATT]] = { memory(none) } template __attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { diff --git a/clang/test/CodeGenSYCL/no_opaque_intel-fpga-mem-builtin.cpp b/clang/test/CodeGenSYCL/no_opaque_intel-fpga-mem-builtin.cpp index 62352886b249e..095c875e32e0a 100644 --- a/clang/test/CodeGenSYCL/no_opaque_intel-fpga-mem-builtin.cpp +++ b/clang/test/CodeGenSYCL/no_opaque_intel-fpga-mem-builtin.cpp @@ -67,7 +67,7 @@ void foo(float *A, int *B, State *C, State &D) { f = __builtin_intel_fpga_mem(&F, PARAM_1 | PARAM_2, 127); } -// CHECK-DAG: attributes [[ATT]] = { readnone } +// CHECK-DAG: attributes [[ATT]] = { memory(none) } template __attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { diff --git a/clang/test/Driver/Inputs/inc-inexistent.rsp b/clang/test/Driver/Inputs/inc-inexistent.rsp new file mode 100644 index 0000000000000..c9ecfdf88ddd0 --- /dev/null +++ b/clang/test/Driver/Inputs/inc-inexistent.rsp @@ -0,0 +1 @@ +@inexistent.txt diff --git a/clang/test/Driver/aarch64-mcpu.c b/clang/test/Driver/aarch64-mcpu.c index 0433f6a5b3d3f..b40c579acdf00 100644 --- a/clang/test/Driver/aarch64-mcpu.c +++ b/clang/test/Driver/aarch64-mcpu.c @@ -45,6 +45,8 @@ // CORTEXA78: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a78" // RUN: %clang -target aarch64 -mcpu=cortex-a78c -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A78C %s // CORTEX-A78C: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a78c" +// RUN: %clang -target aarch64 -mcpu=cortex-a715 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A715 %s +// CORTEX-A715: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a715" // RUN: %clang -target aarch64 -mcpu=neoverse-e1 -### -c %s 2>&1 | FileCheck -check-prefix=NEOVERSE-E1 %s // NEOVERSE-E1: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "neoverse-e1" // RUN: %clang -target aarch64 -mcpu=neoverse-v1 -### -c %s 2>&1 | FileCheck -check-prefix=NEOVERSE-V1 %s diff --git a/clang/test/Driver/fsanitize.c b/clang/test/Driver/fsanitize.c index 1756a5dbfe798..d1254a2e47029 100644 --- a/clang/test/Driver/fsanitize.c +++ b/clang/test/Driver/fsanitize.c @@ -263,23 +263,18 @@ // RUN: FileCheck %s --check-prefix=CHECK-NO-CHECK-ASAN-CALLBACK // CHECK-NO-CHECK-ASAN-CALLBACK-NOT: "-mllvm" "-asan-instrumentation-with-call-threshold=0" -// RUN: %clang --target=x86_64-linux-gnu -fsanitize=address -fsanitize-address-use-odr-indicator %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN-ODR-INDICATOR -// RUN: %clang_cl --target=x86_64-windows -fsanitize=address -fsanitize-address-use-odr-indicator -### -- %s 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN-ODR-INDICATOR -// CHECK-ASAN-ODR-INDICATOR: -cc1{{.*}}-fsanitize-address-use-odr-indicator +// RUN: %clang --target=x86_64-linux-gnu -fsanitize=address %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN-ODR-INDICATOR +// RUN: %clang_cl --target=x86_64-windows -fsanitize=address -### -- %s 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN-ODR-INDICATOR-OFF +// CHECK-ASAN-ODR-INDICATOR-NOT: "-fsanitize-address-use-odr-indicator" // RUN: %clang --target=x86_64-linux-gnu -fsanitize=address -fno-sanitize-address-use-odr-indicator %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN-ODR-INDICATOR-OFF // RUN: %clang_cl --target=x86_64-windows -fsanitize=address -fno-sanitize-address-use-odr-indicator -### -- %s 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN-ODR-INDICATOR-OFF -// CHECK-ASAN-ODR-INDICATOR-OFF-NOT: -cc1{{.*}}address-generate-odr-globals +// CHECK-ASAN-ODR-INDICATOR-OFF: "-cc1" {{.*}} "-fno-sanitize-address-use-odr-indicator" -// RUN: %clang --target=x86_64-linux-gnu -fsanitize=address -fno-sanitize-address-use-odr-indicator -fsanitize-address-use-odr-indicator %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN-ODR-INDICATOR-BOTH -// RUN: %clang_cl --target=x86_64-windows -fsanitize=address -fno-sanitize-address-use-odr-indicator -fsanitize-address-use-odr-indicator -### -- %s 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN-ODR-INDICATOR-BOTH -// CHECK-ASAN-ODR-INDICATOR-BOTH: -cc1{{.*}}-fsanitize-address-use-odr-indicator +// RUN: %clang --target=x86_64-linux-gnu -fsanitize=address -fno-sanitize-address-use-odr-indicator -fsanitize-address-use-odr-indicator %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN-ODR-INDICATOR +// RUN: %clang_cl --target=x86_64-windows -fsanitize=address -fno-sanitize-address-use-odr-indicator -fsanitize-address-use-odr-indicator -### -- %s 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN-ODR-INDICATOR -// RUN: %clang --target=x86_64-linux-gnu -fsanitize=address -fsanitize-address-use-odr-indicator -fno-sanitize-address-use-odr-indicator %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN-ODR-INDICATOR-BOTH-OFF -// CHECK-ASAN-ODR-INDICATOR-BOTH-OFF-NOT: -cc1{{.*}}address-generate-odr-globals - -// RUN: %clang --target=x86_64-linux-gnu -fsanitize=address %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN-WITHOUT-ODR-INDICATOR -// CHECK-ASAN-WITHOUT-ODR-INDICATOR-NOT: -cc1{{.*}}address-generate-odr-globals +// RUN: %clang --target=x86_64-linux-gnu -fsanitize=address -fsanitize-address-use-odr-indicator -fno-sanitize-address-use-odr-indicator %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN-ODR-INDICATOR-OFF // RUN: %clang --target=x86_64-linux-gnu -fsanitize-memory-track-origins -pie %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ONLY-TRACK-ORIGINS // CHECK-ONLY-TRACK-ORIGINS: warning: argument unused during compilation: '-fsanitize-memory-track-origins' diff --git a/clang/test/Driver/global-isel.c b/clang/test/Driver/global-isel.c index 66f196b03c1ea..0d3bd2b2fe262 100644 --- a/clang/test/Driver/global-isel.c +++ b/clang/test/Driver/global-isel.c @@ -6,6 +6,7 @@ // RUN: %clang -target aarch64 -fglobal-isel -S %s -### 2>&1 | FileCheck --check-prefix=ARM64-DEFAULT %s // RUN: %clang -target aarch64 -fglobal-isel -S -O0 %s -### 2>&1 | FileCheck --check-prefix=ARM64-O0 %s // RUN: %clang -target aarch64 -fglobal-isel -S -O2 %s -### 2>&1 | FileCheck --check-prefix=ARM64-O2 %s +// RUN: %clang -arch arm64 -fglobal-isel -S -O2 %s -### 2>&1 | FileCheck --check-prefixes=DARWIN-ARM64-O2,ENABLED %s // RUN: %clang -target aarch64 -fglobal-isel -Wno-global-isel -S -O2 %s -### 2>&1 | FileCheck --check-prefix=ARM64-O2-NOWARN %s // RUN: %clang -target x86_64 -fglobal-isel -S %s -### 2>&1 | FileCheck --check-prefix=X86_64 %s @@ -27,6 +28,7 @@ // ARM64-DEFAULT-NOT: warning: -fglobal-isel // ARM64-DEFAULT-NOT: "-global-isel-abort=2" // ARM64-O0-NOT: warning: -fglobal-isel +// DARWIN-ARM64-O2-NOT: warning: -fglobal-isel // ARM64-O2: warning: -fglobal-isel support is incomplete for this architecture at the current optimization level // ARM64-O2: "-mllvm" "-global-isel-abort=2" // ARM64-O2-NOWARN-NOT: warning: -fglobal-isel diff --git a/clang/test/Driver/linker-wrapper-image.c b/clang/test/Driver/linker-wrapper-image.c index 130bcfbe69150..c5ce72c6de5d2 100644 --- a/clang/test/Driver/linker-wrapper-image.c +++ b/clang/test/Driver/linker-wrapper-image.c @@ -115,7 +115,7 @@ // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -opaque-pointers -emit-obj -o %t.o \ // RUN: -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --print-wrapped-module -mllvm -opaque-pointers --dry-run --host-triple=x86_64-unknown-linux-gnu \ -// RUN: -linker-path /usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=HIP +// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=HIP // HIP: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".hip_fatbin" // HIP-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8 diff --git a/clang/test/Driver/linker-wrapper.c b/clang/test/Driver/linker-wrapper.c index 51f3ea3bc5457..b2d73f3621087 100644 --- a/clang/test/Driver/linker-wrapper.c +++ b/clang/test/Driver/linker-wrapper.c @@ -115,8 +115,8 @@ // RUN: --image=file=%S/Inputs/dummy-elf.o,kind=hip,triple=amdgcn-amd-amdhsa,arch=gfx908 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \ // RUN: -fembed-offload-object=%t.out -// RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu -linker-path \ -// RUN: /usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=HIP +// RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu \ +// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=HIP // HIP: lld{{.*}}-flavor gnu --no-undefined -shared -plugin-opt=-amdgpu-internalize-symbols -plugin-opt=mcpu=gfx908 -o {{.*}}.out {{.*}}.o // HIP: lld{{.*}}-flavor gnu --no-undefined -shared -plugin-opt=-amdgpu-internalize-symbols -plugin-opt=mcpu=gfx90a -o {{.*}}.out {{.*}}.o @@ -134,6 +134,12 @@ // LINKER_ARGS: lld{{.*}}-flavor gnu --no-undefined -shared -plugin-opt=-amdgpu-internalize-symbols -plugin-opt=mcpu=gfx908 -o {{.*}}.out {{.*}}.o a // LINKER_ARGS: nvlink{{.*}}-m64 -o {{.*}}.out -arch sm_70 {{.*}}.o a b +// RUN: not clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu -ldummy \ +// RUN: --linker-path=/usr/bin/ld --device-linker=a --device-linker=nvptx64-nvidia-cuda=b -- \ +// RUN: -o a.out 2>&1 | FileCheck %s --check-prefix=MISSING-LIBRARY + +// MISSING-LIBRARY: error: unable to find library -ldummy + /// Ensure that temp files aren't leftoever from static libraries. // RUN: clang-offload-packager -o %t-lib.out \ // RUN: --image=file=%S/Inputs/dummy-elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ diff --git a/clang/test/Driver/lld-repro.c b/clang/test/Driver/lld-repro.c index 7436d1a1f59be..1333f68d911ee 100644 --- a/clang/test/Driver/lld-repro.c +++ b/clang/test/Driver/lld-repro.c @@ -1,22 +1,28 @@ // REQUIRES: lld // UNSUPPORTED: ps4, ps5 -// RUN: not %clang %s -nostartfiles -nostdlib -fuse-ld=lld -gen-reproducer=error -fcrash-diagnostics-dir=%t -fcrash-diagnostics=all 2>&1 \ +// RUN: echo "-nostartfiles -nostdlib -fuse-ld=lld -gen-reproducer=error -fcrash-diagnostics-dir=%t" \ +// RUN: | sed -e 's/\\/\\\\/g' > %t.rsp + +// RUN: not %clang %s @%t.rsp -fcrash-diagnostics=all 2>&1 \ +// RUN: | FileCheck %s + +// Test that the reproducer can still be created even when the input source cannot be preprocessed +// again, like when reading from stdin. +// RUN: not %clang -x c - @%t.rsp -fcrash-diagnostics=all 2>&1 < %s \ // RUN: | FileCheck %s // check that we still get lld's output // CHECK: error: undefined symbol: {{_?}}a // CHECK: Preprocessed source(s) and associated run script(s) are located at: -// CHECK-NEXT: note: diagnostic msg: {{.*}}lld-repro-{{.*}}.c // CHECK-NEXT: note: diagnostic msg: {{.*}}linker-crash-{{.*}}.tar -// CHECK-NEXT: note: diagnostic msg: {{.*}}lld-repro-{{.*}}.sh // CHECK-NEXT: note: diagnostic msg: // CHECK: ******************** -// RUN: not %clang %s -nostartfiles -nostdlib -fuse-ld=lld -gen-reproducer=error -fcrash-diagnostics-dir=%t -fcrash-diagnostics=compiler 2>&1 \ +// RUN: not %clang %s @%t.rsp -fcrash-diagnostics=compiler 2>&1 \ // RUN: | FileCheck %s --check-prefix=NO-LINKER -// RUN: not %clang %s -nostartfiles -nostdlib -fuse-ld=lld -gen-reproducer=error -fcrash-diagnostics-dir=%t 2>&1 \ +// RUN: not %clang %s @%t.rsp 2>&1 \ // RUN: | FileCheck %s --check-prefix=NO-LINKER // NO-LINKER-NOT: Preprocessed source(s) and associated run script(s) are located at: diff --git a/clang/test/Driver/pgo-sample-use-profi.c b/clang/test/Driver/pgo-sample-use-profi.c new file mode 100644 index 0000000000000..454a511a06281 --- /dev/null +++ b/clang/test/Driver/pgo-sample-use-profi.c @@ -0,0 +1,4 @@ +/// Test if profi flat is enabled in frontend as user-facing feature. +// RUN: %clang -c -fsample-profile-use-profi -fprofile-sample-use=/dev/null -### %s 2>&1 | FileCheck %s + +// CHECK: "-mllvm" "-sample-profile-use-profi" diff --git a/clang/test/Driver/response-file-errs.c b/clang/test/Driver/response-file-errs.c new file mode 100644 index 0000000000000..0fd03ed08c04a --- /dev/null +++ b/clang/test/Driver/response-file-errs.c @@ -0,0 +1,18 @@ +// AIX reacts on opening directory differently than other systems. +// XFAIL: system-aix + +// If response file does not exist, '@file; directive remains unexpanded in +// command line. +// +// RUN: %clang @%S/Inputs/inexistent.rsp -### 2>&1 | FileCheck --check-prefix=INEXISTENT %s +// INEXISTENT: @{{.*}}Inputs/inexistent.rsp + +// As the above case but '@file' is in response file. +// +// RUN: %clang @%S/Inputs/inc-inexistent.rsp -### 2>&1 | FileCheck --check-prefix=INEXISTENT2 %s +// INEXISTENT2: @{{.*}}inexistent.txt + +// If file in `@file` is a directory, it is an error. +// +// RUN: not %clang @%S/Inputs -### 2>&1 | FileCheck --check-prefix=DIRECTORY %s +// DIRECTORY: cannot not open file '{{.*}}Inputs': {{[Ii]}}s a directory diff --git a/clang/test/Driver/riscv-cpus.c b/clang/test/Driver/riscv-cpus.c index 4858455d5775f..32d8569fe06fb 100644 --- a/clang/test/Driver/riscv-cpus.c +++ b/clang/test/Driver/riscv-cpus.c @@ -7,6 +7,10 @@ // MCPU-ROCKET64: "-nostdsysteminc" "-target-cpu" "rocket-rv64" // MCPU-ROCKET64: "-target-feature" "+64bit" +// We cannot check much for -mcpu=native, but it should be replaced by a valid CPU string. +// RUN: %clang --target=riscv64 -### -c %s 2>&1 -mcpu=native | FileCheck -check-prefix=MCPU-NATIVE %s +// MCPU-NATIVE-NOT: "-target-cpu" "native" + // RUN: %clang --target=riscv32 -### -c %s 2>&1 -mtune=rocket-rv32 | FileCheck -check-prefix=MTUNE-ROCKET32 %s // MTUNE-ROCKET32: "-tune-cpu" "rocket-rv32" @@ -26,6 +30,10 @@ // RUN: %clang --target=riscv64 -### -c %s 2>&1 -mtune=rocket | FileCheck -check-prefix=MTUNE-ROCKET-64 %s // MTUNE-ROCKET-64: "-tune-cpu" "rocket" +// We cannot check much for -mtune=native, but it should be replaced by a valid CPU string. +// RUN: %clang --target=riscv64 -### -c %s 2>&1 -mtune=native | FileCheck -check-prefix=MTUNE-NATIVE %s +// MTUNE-NATIVE-NOT: "-tune-cpu" "native" + // mcpu with default march // RUN: %clang --target=riscv64 -### -c %s 2>&1 -mcpu=sifive-e20 | FileCheck -check-prefix=MCPU-SIFIVE-E20 %s // MCPU-SIFIVE-E20: "-nostdsysteminc" "-target-cpu" "sifive-e20" @@ -130,10 +138,10 @@ // Check failed cases // RUN: %clang --target=riscv32 -### -c %s 2>&1 -mcpu=generic-rv321 | FileCheck -check-prefix=FAIL-MCPU-NAME %s -// FAIL-MCPU-NAME: error: the clang compiler does not support '-mcpu=generic-rv321' +// FAIL-MCPU-NAME: error: unsupported argument 'generic-rv321' to option '-mcpu=' // RUN: %clang --target=riscv32 -### -c %s 2>&1 -mcpu=generic-rv32 -march=rv64i | FileCheck -check-prefix=MISMATCH-ARCH %s -// MISMATCH-ARCH: error: the clang compiler does not support '-mcpu=generic-rv32' +// MISMATCH-ARCH: error: unsupported argument 'generic-rv32' to option '-mcpu=' // RUN: %clang --target=riscv32 -### -c %s 2>&1 -mcpu=generic-rv64 | FileCheck -check-prefix=MISMATCH-MCPU %s -// MISMATCH-MCPU: error: the clang compiler does not support '-mcpu=generic-rv64' +// MISMATCH-MCPU: error: unsupported argument 'generic-rv64' to option '-mcpu=' diff --git a/clang/test/Driver/x86-march.c b/clang/test/Driver/x86-march.c index 46d8d5da32356..6b8dcd79faffc 100644 --- a/clang/test/Driver/x86-march.c +++ b/clang/test/Driver/x86-march.c @@ -88,6 +88,14 @@ // RUN: | FileCheck %s -check-prefix=alderlake // alderlake: "-target-cpu" "alderlake" // +// RUN: %clang --target=x86_64 -c -### %s -march=raptorlake 2>&1 \ +// RUN: | FileCheck %s -check-prefix=raptorlake +// raptorlake: "-target-cpu" "raptorlake" +// +// RUN: %clang --target=x86_64 -c -### %s -march=meteorlake 2>&1 \ +// RUN: | FileCheck %s -check-prefix=meteorlake +// meteorlake: "-target-cpu" "meteorlake" +// // RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=lakemont 2>&1 \ // RUN: | FileCheck %s -check-prefix=lakemont // lakemont: "-target-cpu" "lakemont" diff --git a/clang/test/ExtractAPI/anonymous_record_no_typedef.c b/clang/test/ExtractAPI/anonymous_record_no_typedef.c index e20abfdd86ab4..abb96db058dbf 100644 --- a/clang/test/ExtractAPI/anonymous_record_no_typedef.c +++ b/clang/test/ExtractAPI/anonymous_record_no_typedef.c @@ -56,22 +56,26 @@ struct Vehicle { { "kind": "memberOf", "source": "c:@S@Vehicle@E@input.h@64@Bicycle", - "target": "c:@S@Vehicle@E@input.h@64" + "target": "c:@S@Vehicle@E@input.h@64", + "targetFallback": "Vehicle::enum (unnamed)" }, { "kind": "memberOf", "source": "c:@S@Vehicle@E@input.h@64@Car", - "target": "c:@S@Vehicle@E@input.h@64" + "target": "c:@S@Vehicle@E@input.h@64", + "targetFallback": "Vehicle::enum (unnamed)" }, { "kind": "memberOf", "source": "c:@S@Vehicle@FI@type", - "target": "c:@S@Vehicle" + "target": "c:@S@Vehicle", + "targetFallback": "Vehicle" }, { "kind": "memberOf", "source": "c:@S@Vehicle@FI@information", - "target": "c:@S@Vehicle" + "target": "c:@S@Vehicle", + "targetFallback": "Vehicle" } ], "symbols": [ diff --git a/clang/test/ExtractAPI/enum.c b/clang/test/ExtractAPI/enum.c index 07d848082981f..7b345464cb982 100644 --- a/clang/test/ExtractAPI/enum.c +++ b/clang/test/ExtractAPI/enum.c @@ -65,57 +65,68 @@ enum { { "kind": "memberOf", "source": "c:@E@Vehicle@Bicycle", - "target": "c:@E@Vehicle" + "target": "c:@E@Vehicle", + "targetFallback": "Vehicle" }, { "kind": "memberOf", "source": "c:@E@Vehicle@Car", - "target": "c:@E@Vehicle" + "target": "c:@E@Vehicle", + "targetFallback": "Vehicle" }, { "kind": "memberOf", "source": "c:@E@Vehicle@Train", - "target": "c:@E@Vehicle" + "target": "c:@E@Vehicle", + "targetFallback": "Vehicle" }, { "kind": "memberOf", "source": "c:@E@Vehicle@Ship", - "target": "c:@E@Vehicle" + "target": "c:@E@Vehicle", + "targetFallback": "Vehicle" }, { "kind": "memberOf", "source": "c:@E@Vehicle@Airplane", - "target": "c:@E@Vehicle" + "target": "c:@E@Vehicle", + "targetFallback": "Vehicle" }, { "kind": "memberOf", "source": "c:@E@Direction@North", - "target": "c:@E@Direction" + "target": "c:@E@Direction", + "targetFallback": "Direction" }, { "kind": "memberOf", "source": "c:@E@Direction@East", - "target": "c:@E@Direction" + "target": "c:@E@Direction", + "targetFallback": "Direction" }, { "kind": "memberOf", "source": "c:@E@Direction@South", - "target": "c:@E@Direction" + "target": "c:@E@Direction", + "targetFallback": "Direction" }, { "kind": "memberOf", "source": "c:@E@Direction@West", - "target": "c:@E@Direction" + "target": "c:@E@Direction", + "targetFallback": "Direction" }, { "kind": "memberOf", "source": "c:@Ea@Constant@Constant", - "target": "c:@Ea@Constant" + "target": "c:@Ea@Constant", + "targetFallback": "enum (unnamed)" }, { "kind": "memberOf", "source": "c:@Ea@OtherConstant@OtherConstant", - "target": "c:@Ea@OtherConstant" + "target": "c:@Ea@OtherConstant", + "targetFallback": "enum (unnamed)" } ], "symbols": [ diff --git a/clang/test/ExtractAPI/objc_category.m b/clang/test/ExtractAPI/objc_category.m index 56bac43a11cdc..185016dfe848c 100644 --- a/clang/test/ExtractAPI/objc_category.m +++ b/clang/test/ExtractAPI/objc_category.m @@ -54,22 +54,26 @@ + (void)ClassMethod; { "kind": "memberOf", "source": "c:objc(cs)Interface(im)InstanceMethod", - "target": "c:objc(cs)Interface" + "target": "c:objc(cs)Interface", + "targetFallback": "Interface" }, { "kind": "memberOf", "source": "c:objc(cs)Interface(cm)ClassMethod", - "target": "c:objc(cs)Interface" + "target": "c:objc(cs)Interface", + "targetFallback": "Interface" }, { "kind": "memberOf", "source": "c:objc(cs)Interface(py)Property", - "target": "c:objc(cs)Interface" + "target": "c:objc(cs)Interface", + "targetFallback": "Interface" }, { "kind": "conformsTo", "source": "c:objc(cs)Interface", - "target": "c:objc(pl)Protocol" + "target": "c:objc(pl)Protocol", + "targetFallback": "Protocol" } ], "symbols": [ diff --git a/clang/test/ExtractAPI/objc_interface.m b/clang/test/ExtractAPI/objc_interface.m index 740a215400d9c..159e97a193a13 100644 --- a/clang/test/ExtractAPI/objc_interface.m +++ b/clang/test/ExtractAPI/objc_interface.m @@ -57,37 +57,44 @@ - (char)getIvar; { "kind": "memberOf", "source": "c:objc(cs)Super(cm)getWithProperty:", - "target": "c:objc(cs)Super" + "target": "c:objc(cs)Super", + "targetFallback": "Super" }, { "kind": "memberOf", "source": "c:objc(cs)Super(im)setProperty:andOtherThing:", - "target": "c:objc(cs)Super" + "target": "c:objc(cs)Super", + "targetFallback": "Super" }, { "kind": "memberOf", "source": "c:objc(cs)Super(py)Property", - "target": "c:objc(cs)Super" + "target": "c:objc(cs)Super", + "targetFallback": "Super" }, { "kind": "conformsTo", "source": "c:objc(cs)Super", - "target": "c:objc(pl)Protocol" + "target": "c:objc(pl)Protocol", + "targetFallback": "Protocol" }, { "kind": "memberOf", "source": "c:objc(cs)Derived@Ivar", - "target": "c:objc(cs)Derived" + "target": "c:objc(cs)Derived", + "targetFallback": "Derived" }, { "kind": "memberOf", "source": "c:objc(cs)Derived(im)getIvar", - "target": "c:objc(cs)Derived" + "target": "c:objc(cs)Derived", + "targetFallback": "Derived" }, { "kind": "inheritsFrom", "source": "c:objc(cs)Derived", - "target": "c:objc(cs)Super" + "target": "c:objc(cs)Super", + "targetFallback": "Super" } ], "symbols": [ diff --git a/clang/test/ExtractAPI/objc_property.m b/clang/test/ExtractAPI/objc_property.m index 1b50950d44243..f09a5ad724238 100644 --- a/clang/test/ExtractAPI/objc_property.m +++ b/clang/test/ExtractAPI/objc_property.m @@ -55,37 +55,44 @@ @interface Interface (Category) { "kind": "memberOf", "source": "c:objc(cs)Interface(cpy)myInterfaceTypeProp", - "target": "c:objc(cs)Interface" + "target": "c:objc(cs)Interface", + "targetFallback": "Interface" }, { "kind": "memberOf", "source": "c:objc(cs)Interface(py)myInterfaceInstanceProp", - "target": "c:objc(cs)Interface" + "target": "c:objc(cs)Interface", + "targetFallback": "Interface" }, { "kind": "memberOf", "source": "c:objc(cs)Interface(cpy)myCategoryTypeProp", - "target": "c:objc(cs)Interface" + "target": "c:objc(cs)Interface", + "targetFallback": "Interface" }, { "kind": "memberOf", "source": "c:objc(cs)Interface(py)myCategoryInstanceProp", - "target": "c:objc(cs)Interface" + "target": "c:objc(cs)Interface", + "targetFallback": "Interface" }, { "kind": "conformsTo", "source": "c:objc(cs)Interface", - "target": "c:objc(pl)Protocol" + "target": "c:objc(pl)Protocol", + "targetFallback": "Protocol" }, { "kind": "memberOf", "source": "c:objc(pl)Protocol(cpy)myProtocolTypeProp", - "target": "c:objc(pl)Protocol" + "target": "c:objc(pl)Protocol", + "targetFallback": "Protocol" }, { "kind": "memberOf", "source": "c:objc(pl)Protocol(py)myProtocolInstanceProp", - "target": "c:objc(pl)Protocol" + "target": "c:objc(pl)Protocol", + "targetFallback": "Protocol" } ], "symbols": [ diff --git a/clang/test/ExtractAPI/objc_protocol.m b/clang/test/ExtractAPI/objc_protocol.m index 036850924587c..d9a65f419df89 100644 --- a/clang/test/ExtractAPI/objc_protocol.m +++ b/clang/test/ExtractAPI/objc_protocol.m @@ -49,7 +49,8 @@ @protocol AnotherProtocol { "kind": "conformsTo", "source": "c:objc(pl)AnotherProtocol", - "target": "c:objc(pl)Protocol" + "target": "c:objc(pl)Protocol", + "targetFallback": "Protocol" } ], "symbols": [ diff --git a/clang/test/ExtractAPI/struct.c b/clang/test/ExtractAPI/struct.c index 516055768749c..7e93f0d7e7bfa 100644 --- a/clang/test/ExtractAPI/struct.c +++ b/clang/test/ExtractAPI/struct.c @@ -52,22 +52,26 @@ struct Color { { "kind": "memberOf", "source": "c:@S@Color@FI@Red", - "target": "c:@S@Color" + "target": "c:@S@Color", + "targetFallback": "Color" }, { "kind": "memberOf", "source": "c:@S@Color@FI@Green", - "target": "c:@S@Color" + "target": "c:@S@Color", + "targetFallback": "Color" }, { "kind": "memberOf", "source": "c:@S@Color@FI@Blue", - "target": "c:@S@Color" + "target": "c:@S@Color", + "targetFallback": "Color" }, { "kind": "memberOf", "source": "c:@S@Color@FI@Alpha", - "target": "c:@S@Color" + "target": "c:@S@Color", + "targetFallback": "Color" } ], "symbols": [ diff --git a/clang/test/ExtractAPI/underscored.c b/clang/test/ExtractAPI/underscored.c index 47f1893cdb029..6eeaf1ce412c3 100644 --- a/clang/test/ExtractAPI/underscored.c +++ b/clang/test/ExtractAPI/underscored.c @@ -65,7 +65,8 @@ typedef _HiddenTypedef ExposedTypedefToHidden; { "kind": "memberOf", "source": "c:@S@ExposedRecord@FI@a", - "target": "c:@S@ExposedRecord" + "target": "c:@S@ExposedRecord", + "targetFallback": "ExposedRecord" } ], "symbols": [ diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c index f2071c866956f..1ac86709cab3f 100644 --- a/clang/test/Misc/target-invalid-cpu-note.c +++ b/clang/test/Misc/target-invalid-cpu-note.c @@ -5,27 +5,27 @@ // RUN: not %clang_cc1 -triple arm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AARCH64 // AARCH64: error: unknown target CPU 'not-a-cpu' -// AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, neoverse-v2, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-m1, apple-m2, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, grace{{$}} +// AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-a715, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, neoverse-v2, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-m1, apple-m2, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, grace{{$}} // RUN: not %clang_cc1 -triple arm64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_AARCH64 // TUNE_AARCH64: error: unknown target CPU 'not-a-cpu' -// TUNE_AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, neoverse-v2, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-m1, apple-m2, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, grace{{$}} +// TUNE_AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-a715, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, neoverse-v2, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-m1, apple-m2, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, grace{{$}} // RUN: not %clang_cc1 -triple i386--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86 // X86: error: unknown target CPU 'not-a-cpu' -// X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, x86-64, x86-64-v2, x86-64-v3, x86-64-v4, geode{{$}} +// X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, x86-64, x86-64-v2, x86-64-v3, x86-64-v4, geode{{$}} // RUN: not %clang_cc1 -triple x86_64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86_64 // X86_64: error: unknown target CPU 'not-a-cpu' -// X86_64-NEXT: note: valid target CPU values are: nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, knl, knm, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, x86-64, x86-64-v2, x86-64-v3, x86-64-v4{{$}} +// X86_64-NEXT: note: valid target CPU values are: nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, knl, knm, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, x86-64, x86-64-v2, x86-64-v3, x86-64-v4{{$}} // RUN: not %clang_cc1 -triple i386--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_X86 // TUNE_X86: error: unknown target CPU 'not-a-cpu' -// TUNE_X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, x86-64, geode{{$}} +// TUNE_X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, x86-64, geode{{$}} // RUN: not %clang_cc1 -triple x86_64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_X86_64 // TUNE_X86_64: error: unknown target CPU 'not-a-cpu' -// TUNE_X86_64-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, x86-64, geode{{$}} +// TUNE_X86_64-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, x86-64, geode{{$}} // RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX // NVPTX: error: unknown target CPU 'not-a-cpu' diff --git a/clang/test/Modules/pair-unambiguous-ctor.cppm b/clang/test/Modules/pair-unambiguous-ctor.cppm index 8022f34f3aafa..eb242244260cb 100644 --- a/clang/test/Modules/pair-unambiguous-ctor.cppm +++ b/clang/test/Modules/pair-unambiguous-ctor.cppm @@ -14,7 +14,9 @@ // expected-no-diagnostics module; #include "config.h" +# 3 "pair-unambiguous-ctor.cppm" 1 3 export module std:M; +# 3 "pair-unambiguous-ctor.cppm" 2 3 import :string; import :algorithm; @@ -25,15 +27,19 @@ auto check() { //--- string.cppm module; #include "string.h" +# 28 "pair-unambiguous-ctor.cppm" 1 3 export module std:string; export namespace std { using std::string; } +# 28 "pair-unambiguous-ctor.cppm" 2 3 //--- algorithm.cppm module; #include "algorithm.h" +# 38 "pair-unambiguous-ctor.cppm" 1 3 export module std:algorithm; +# 38 "pair-unambiguous-ctor.cppm" 2 3 //--- pair.h namespace std __attribute__ ((__visibility__ ("default"))) diff --git a/clang/test/Modules/reserved-names-1.cpp b/clang/test/Modules/reserved-names-1.cpp new file mode 100644 index 0000000000000..fd636ab0630a2 --- /dev/null +++ b/clang/test/Modules/reserved-names-1.cpp @@ -0,0 +1,45 @@ +// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify %s + +// expected-note@1 15{{add 'module;' to the start of the file to introduce a global module fragment}} + +module std; // expected-error {{'std' is a reserved name for a module}} +module _Test; // expected-error {{'_Test' is a reserved name for a module}} \ + expected-error {{module declaration must occur at the start of the translation unit}} +module module; // expected-error {{'module' is an invalid name for a module}} \ + expected-error {{module declaration must occur at the start of the translation unit}} +module std0; // expected-error {{'std0' is a reserved name for a module}} \ + expected-error {{module declaration must occur at the start of the translation unit}} + +export module module; // expected-error {{'module' is an invalid name for a module}} \ + expected-error {{module declaration must occur at the start of the translation unit}} +export module import; // expected-error {{'import' is an invalid name for a module}} \ + expected-error {{module declaration must occur at the start of the translation unit}} +export module _Test; // expected-error {{'_Test' is a reserved name for a module}} \ + expected-error {{module declaration must occur at the start of the translation unit}} +export module __test; // expected-error {{'__test' is a reserved name for a module}} \ + expected-error {{module declaration must occur at the start of the translation unit}} +export module te__st; // expected-error {{'te__st' is a reserved name for a module}} \ + expected-error {{module declaration must occur at the start of the translation unit}} +export module std; // expected-error {{'std' is a reserved name for a module}} \ + expected-error {{module declaration must occur at the start of the translation unit}} +export module std.foo;// expected-error {{'std' is a reserved name for a module}} \ + expected-error {{module declaration must occur at the start of the translation unit}} +export module std0; // expected-error {{'std0' is a reserved name for a module}} \ + expected-error {{module declaration must occur at the start of the translation unit}} +export module std1000000; // expected-error {{'std1000000' is a reserved name for a module}} \ + expected-error {{module declaration must occur at the start of the translation unit}} +export module should_fail._Test; // expected-error {{'_Test' is a reserved name for a module}} \ + expected-error {{module declaration must occur at the start of the translation unit}} + +// Show that being in a system header doesn't save you from diagnostics about +// use of an invalid module-name identifier. +# 34 "reserved-names-1.cpp" 1 3 +export module module; // expected-error {{'module' is an invalid name for a module}} \ + expected-error {{module declaration must occur at the start of the translation unit}} + +export module _Test.import; // expected-error {{'import' is an invalid name for a module}} \ + expected-error {{module declaration must occur at the start of the translation unit}} +# 39 "reserved-names-1.cpp" 2 3 + +// We can still use a reserved name on imoport. +import std; // expected-error {{module 'std' not found}} diff --git a/clang/test/Modules/reserved-names-2.cpp b/clang/test/Modules/reserved-names-2.cpp new file mode 100644 index 0000000000000..6979e92f37765 --- /dev/null +++ b/clang/test/Modules/reserved-names-2.cpp @@ -0,0 +1,6 @@ +// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify %s +// expected-no-diagnostics + +// Demonstrate that we don't consider use of 'std' followed by digits to be a +// reserved identifier if it is not the first part of the path. +export module should_succeed.std0; diff --git a/clang/test/Modules/reserved-names-3.cpp b/clang/test/Modules/reserved-names-3.cpp new file mode 100644 index 0000000000000..b2e155e8d3610 --- /dev/null +++ b/clang/test/Modules/reserved-names-3.cpp @@ -0,0 +1,7 @@ +// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify %s +// expected-no-diagnostics + +// Demonstrate that we don't consider use of 'std' (potentially followed by +// zero or more digits) to be a reserved identifier if it is not the only part +// of the path. +export module std12Three; diff --git a/clang/test/Modules/reserved-names-4.cpp b/clang/test/Modules/reserved-names-4.cpp new file mode 100644 index 0000000000000..73df48b76de8d --- /dev/null +++ b/clang/test/Modules/reserved-names-4.cpp @@ -0,0 +1,7 @@ +// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify %s +// expected-no-diagnostics + +// Demonstrate that we don't consider use of 'std' a reserved identifier if it +// is not the first part of the path. +export module should_succeed.std; + diff --git a/clang/test/Modules/reserved-names-system-header-1.cpp b/clang/test/Modules/reserved-names-system-header-1.cpp new file mode 100644 index 0000000000000..2db4c08add1d9 --- /dev/null +++ b/clang/test/Modules/reserved-names-system-header-1.cpp @@ -0,0 +1,7 @@ +// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify %s +// expected-no-diagnostics + +// Show that we suppress the reserved identifier diagnostic in a system header. +# 100 "file.cpp" 1 3 // Enter a system header +export module std; +# 100 "file.cpp" 2 3 // Leave the system header diff --git a/clang/test/Modules/reserved-names-system-header-2.cpp b/clang/test/Modules/reserved-names-system-header-2.cpp new file mode 100644 index 0000000000000..2087f487721cb --- /dev/null +++ b/clang/test/Modules/reserved-names-system-header-2.cpp @@ -0,0 +1,7 @@ +// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify %s +// expected-no-diagnostics + +// Show that we suppress the reserved identifier diagnostic in a system header. +# 100 "file.cpp" 1 3 // Enter a system header +export module __test; +# 100 "file.cpp" 2 3 // Leave the system header diff --git a/clang/test/OpenMP/Inputs/multiple_regions.inc b/clang/test/OpenMP/Inputs/multiple_regions.inc new file mode 100644 index 0000000000000..f519bee6c5b5e --- /dev/null +++ b/clang/test/OpenMP/Inputs/multiple_regions.inc @@ -0,0 +1,4 @@ +#pragma omp target +{ + i = i + VALUE; +} diff --git a/clang/test/OpenMP/barrier_codegen.cpp b/clang/test/OpenMP/barrier_codegen.cpp index 0b2de036ce119..e06503077d66b 100644 --- a/clang/test/OpenMP/barrier_codegen.cpp +++ b/clang/test/OpenMP/barrier_codegen.cpp @@ -45,7 +45,7 @@ int main(int argc, char **argv) { // CLANGCG: declare i32 @__kmpc_global_thread_num(ptr) // IRBUILDER: ; Function Attrs: nounwind // IRBUILDER-NEXT: declare i32 @__kmpc_global_thread_num(ptr) # -// IRBUILDER_OPT: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly +// IRBUILDER_OPT: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) // IRBUILDER_OPT-NEXT: declare i32 @__kmpc_global_thread_num(ptr nocapture nofree readonly) # // CHECK: define {{.+}} [[TMAIN_INT]]( diff --git a/clang/test/OpenMP/error_ast_print.cpp b/clang/test/OpenMP/error_ast_print.cpp new file mode 100644 index 0000000000000..fbdf68a11634d --- /dev/null +++ b/clang/test/OpenMP/error_ast_print.cpp @@ -0,0 +1,62 @@ +// RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s + +// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +void foo() {} +// CHECK: template int tmain(T argc, char **argv) +// CHECK: static int a; +// CHECK-NEXT: #pragma omp error +// CHECK-NEXT: a = argv[0][0]; +// CHECK-NEXT: ++a; +// CHECK-NEXT: #pragma omp error +// CHECK-NEXT: { +// CHECK-NEXT: int b = 10; +// CHECK-NEXT: T c = 100; +// CHECK-NEXT: a = b + c; +// CHECK-NEXT: } +// CHECK-NEXT: #pragma omp error +// CHECK-NEXT: foo(); +// CHECK-NEXT: return N; + +template +int tmain(T argc, char **argv) { + T b = argc, c, d, e, f, g; + static int a; +#pragma omp error + a = argv[0][0]; + ++a; +#pragma omp error + { + int b = 10; + T c = 100; + a = b + c; + } +#pragma omp error + foo(); +return N; +} + +// CHECK: int main(int argc, char **argv) +// CHECK-NEXT: int b = argc, c, d, e, f, g; +// CHECK-NEXT: static int a; +// CHECK-NEXT: #pragma omp error +// CHECK-NEXT: a = 2; +// CHECK-NEXT: #pragma omp error +// CHECK-NEXT: foo(); +int main (int argc, char **argv) { + int b = argc, c, d, e, f, g; + static int a; +#pragma omp error + a=2; +#pragma omp error + foo(); +} +#endif diff --git a/clang/test/OpenMP/error_message.cpp b/clang/test/OpenMP/error_message.cpp new file mode 100644 index 0000000000000..3f5a4cc243eb3 --- /dev/null +++ b/clang/test/OpenMP/error_message.cpp @@ -0,0 +1,114 @@ +// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s -Wuninitialized + +// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s -Wuninitialized + +template +T tmain(T argc) { + if (argc) +#pragma omp error // expected-error {{'#pragma omp error' cannot be an immediate substatement}} + if (argc) { +#pragma omp error + } + while (argc) +#pragma omp error // expected-error {{'#pragma omp error' cannot be an immediate substatement}} + while (argc) { +#pragma omp error + } + do +#pragma omp error // expected-error {{'#pragma omp error' cannot be an immediate substatement}} + while (argc) + ; + do { +#pragma omp error + } while (argc); + switch (argc) +#pragma omp error // expected-error {{'#pragma omp error' cannot be an immediate substatement}} + switch (argc) + case 1: +#pragma omp error // expected-error {{'#pragma omp error' cannot be an immediate substatement}} + switch (argc) + case 1: { +#pragma omp error + } + switch (argc) { +#pragma omp error + case 1: +#pragma omp error + break; + default: { +#pragma omp error + } break; + } + for (;;) +#pragma omp error // expected-error {{'#pragma omp error' cannot be an immediate substatement}} + for (;;) { +#pragma omp error + } +label: +#pragma omp error +label1 : { +#pragma omp error +} +if (1) + label2: +#pragma omp error // expected-error {{'#pragma omp error' cannot be an immediate substatement}} + + return T(); +} + +int main(int argc, char **argv) { +#pragma omp error + ; +#pragma omp error untied // expected-error {{unexpected OpenMP clause 'untied' in directive '#pragma omp error'}} +#pragma omp error unknown // expected-warning {{extra tokens at the end of '#pragma omp error' are ignored}} + if (argc) +#pragma omp error // expected-error {{'#pragma omp error' cannot be an immediate substatement}} + if (argc) { +#pragma omp error + } + while (argc) +#pragma omp error // expected-error {{'#pragma omp error' cannot be an immediate substatement}} + while (argc) { +#pragma omp error + } + do +#pragma omp error // expected-error {{'#pragma omp error' cannot be an immediate substatement}} + while (argc) + ; + do { +#pragma omp error + } while (argc); + switch (argc) +#pragma omp error // expected-error {{'#pragma omp error' cannot be an immediate substatement}} + switch (argc) + case 1: +#pragma omp error // expected-error {{'#pragma omp error' cannot be an immediate substatement}} + switch (argc) + case 1: { +#pragma omp error + } + switch (argc) { +#pragma omp error + case 1: +#pragma omp error + break; + default: { +#pragma omp error + } break; + } + for (;;) +#pragma omp error // expected-error {{'#pragma omp error' cannot be an immediate substatement}} + for (;;) { +#pragma omp error + } +label: +#pragma omp error +label1 : { +#pragma omp error +} +if (1) + label2: +#pragma omp error // expected-error {{'#pragma omp error' cannot be an immediate substatement}} + + return tmain(argc); +} diff --git a/clang/test/OpenMP/irbuilder_simd_aligned.cpp b/clang/test/OpenMP/irbuilder_simd_aligned.cpp index 6af2f7385e62e..e749cb9cfa778 100644 --- a/clang/test/OpenMP/irbuilder_simd_aligned.cpp +++ b/clang/test/OpenMP/irbuilder_simd_aligned.cpp @@ -164,7 +164,7 @@ void simple(float *a, float *b, int *c) { //. // CHECK: attributes #0 = { mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } // CHECK: attributes #1 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" } -// CHECK: attributes #2 = { inaccessiblememonly nocallback nofree nosync nounwind willreturn } +// CHECK: attributes #2 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } //. // CHECK: !0 = !{i32 1, !"wchar_size", i32 4} // CHECK: !1 = !{i32 7, !"openmp", i32 50} diff --git a/clang/test/OpenMP/multiple_regions_per_line.cpp b/clang/test/OpenMP/multiple_regions_per_line.cpp new file mode 100644 index 0000000000000..4332ca052edce --- /dev/null +++ b/clang/test/OpenMP/multiple_regions_per_line.cpp @@ -0,0 +1,73 @@ +//RUN: %clang_cc1 -verify -x c++ -triple x86_64 -fopenmp -fopenmp-version=51 \ +//RUN: -fopenmp-targets=x86_64 -I%S/Inputs -emit-llvm -o - %s | FileCheck %s + +//RUN: %clang_cc1 -x c++ -triple x86_64 -fopenmp -fopenmp-version=51 \ +//RUN: -fopenmp-targets=x86_64 -I%S/Inputs -emit-llvm-bc -o %t-host.bc %s + +//RUN: %clang_cc1 -x c++ -triple x86_64 -fopenmp -fopenmp-version=51 \ +//RUN: -fopenmp-targets=x86_64 -I%S/Inputs -fopenmp-is-device \ +//RUN: -fopenmp-host-ir-file-path %t-host.bc -emit-llvm -o - %s \ +//RUN: | FileCheck %s --check-prefix=TCHECK + +// expected-no-diagnostics + +//CHECK: define {{.*}}void @[[FOO:.+]]( +void foo() { + int i = 0; + +//CHECK: call void @__omp_offloading_[[FILEID1:[0-9a-f]+_[0-9a-f]+]]_[[FOO]]_l[[T1L:[0-9]+]]( + +#define VALUE 1 +#include "multiple_regions.inc" + +//CHECK: call void @__omp_offloading_[[FILEID1]]_[[FOO]]_l[[T1L]]_1( +#undef VALUE +#define VALUE 2 +#include "multiple_regions.inc" + +//CHECK: call void @__omp_offloading_[[FILEID1]]_[[FOO]]_l[[T1L]]_2( +#undef VALUE +#define VALUE 3 +#include "multiple_regions.inc" +} + +//CHECK: define {{.*}}void @__omp_offloading_[[FILEID1]]_[[FOO]]_l[[T1L]]( +//CHECK: define {{.*}}void @__omp_offloading_[[FILEID1]]_[[FOO]]_l[[T1L]]_1( +//CHECK: define {{.*}}void @__omp_offloading_[[FILEID1]]_[[FOO]]_l[[T1L]]_2( +//TCHECK: define {{.*}}void @__omp_offloading_[[FILEID1:[0-9a-f]+_[0-9a-f]+]]_[[FOO:.+]]_l[[T1L:[0-9]+]]( +//TCHECK: define {{.*}}void @__omp_offloading_[[FILEID1]]_[[FOO]]_l[[T1L]]_1( +//TCHECK: define {{.*}}void @__omp_offloading_[[FILEID1]]_[[FOO]]_l[[T1L]]_2( + +#define A()\ +_Pragma("omp target")\ +{}\ +_Pragma("omp target")\ +{} + +//CHECK: define {{.*}}void @[[BAR:.+]]( +void bar() +{ +//CHECK: call void @__omp_offloading_[[FILEID2:[0-9a-f]+_[0-9a-f]+]]_[[BAR]]_l[[T2L:[0-9]+]]( +//CHECK: call void @__omp_offloading_[[FILEID2]]_[[BAR]]_l[[T2L]]_1( + A() +} + +//CHECK: define {{.*}}void @__omp_offloading_[[FILEID2]]_[[BAR]]_l[[T2L]]( +//CHECK: define {{.*}}void @__omp_offloading_[[FILEID2]]_[[BAR]]_l[[T2L]]_1( +//TCHECK: define {{.*}}void @__omp_offloading_[[FILEID2:[0-9a-f]+_[0-9a-f]+]]_[[BAR:.+]]_l[[T2L:[0-9]+]]( +//TCHECK: define {{.*}}void @__omp_offloading_[[FILEID2]]_[[BAR]]_l[[T2L]]_1( + +// Check metadata is properly generated: +// CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[FOO]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[FOO]]", i32 [[T1L]], i32 1, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[FOO]]", i32 [[T1L]], i32 2, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[BAR]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[BAR]]", i32 [[T2L]], i32 1, i32 {{[0-9]+}}} + +// TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[FOO]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[FOO]]", i32 [[T1L]], i32 1, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[FOO]]", i32 [[T1L]], i32 2, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[BAR]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[BAR]]", i32 [[T2L]], i32 1, i32 {{[0-9]+}}} diff --git a/clang/test/OpenMP/omp_with_loop_pragma_instr_profile.c b/clang/test/OpenMP/omp_with_loop_pragma_instr_profile.c index 9667f9cc549d3..25bfbb7c815de 100644 --- a/clang/test/OpenMP/omp_with_loop_pragma_instr_profile.c +++ b/clang/test/OpenMP/omp_with_loop_pragma_instr_profile.c @@ -17,6 +17,6 @@ void sub(double *restrict a, double *restrict b, int n) { // CHECK: omp.precond.then: // CHECK-NEXT: call void @llvm.instrprof.increment( // CHECK: cond.true: -// CEHCK-NEXT: call void @llvm.instrprof.increment( +// CHECK-NEXT: call void @llvm.instrprof.increment( // CHECK: omp.inner.for.body: // CHECK-NEXT: call void @llvm.instrprof.increment( diff --git a/clang/test/OpenMP/target_codegen_registration.cpp b/clang/test/OpenMP/target_codegen_registration.cpp index cd0d82e223c3f..a82af4dcb02dd 100644 --- a/clang/test/OpenMP/target_codegen_registration.cpp +++ b/clang/test/OpenMP/target_codegen_registration.cpp @@ -403,31 +403,31 @@ int bar(int a){ // Check metadata is properly generated: // CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 205, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 255, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 271, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 277, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 288, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 294, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 398, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 300, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 294, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 300, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 288, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 230, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 205, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 255, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 271, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 277, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 288, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 294, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 398, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 300, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 294, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 300, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 288, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 230, i32 0, i32 {{[0-9]+}}} // TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 205, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 255, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 271, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 277, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 288, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 294, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 398, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 300, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 294, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 300, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 288, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 230, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 205, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 255, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 271, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 277, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 288, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 294, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 398, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 300, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 294, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 300, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 288, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 230, i32 0, i32 {{[0-9]+}}} #endif diff --git a/clang/test/OpenMP/target_codegen_registration_naming.cpp b/clang/test/OpenMP/target_codegen_registration_naming.cpp index 8bfabfdebb295..5814c37363ae3 100644 --- a/clang/test/OpenMP/target_codegen_registration_naming.cpp +++ b/clang/test/OpenMP/target_codegen_registration_naming.cpp @@ -75,10 +75,10 @@ int nested(int a){ // Check metadata is properly generated: // CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} // TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} #endif diff --git a/clang/test/OpenMP/target_has_device_addr_codegen.cpp b/clang/test/OpenMP/target_has_device_addr_codegen.cpp index ddca5b9d64d49..931346a95e4cf 100644 --- a/clang/test/OpenMP/target_has_device_addr_codegen.cpp +++ b/clang/test/OpenMP/target_has_device_addr_codegen.cpp @@ -350,11 +350,10 @@ void use_template() { // CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[Z]], align 8 // CHECK-NEXT: store ptr [[TMP17]], ptr [[TMP]], align 8 // CHECK-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[TMP19]], ptr [[TMP20]], align 8 +// CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP20]], align 8 // CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[TMP19]], ptr [[TMP21]], align 8 +// CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP21]], align 8 // CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0 // CHECK-NEXT: store ptr null, ptr [[TMP22]], align 8 // CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 @@ -422,11 +421,10 @@ void use_template() { // CHECK-NEXT: [[TMP52:%.*]] = load ptr, ptr [[RAA]], align 8 // CHECK-NEXT: store ptr [[TMP52]], ptr [[_TMP13]], align 8 // CHECK-NEXT: [[TMP53:%.*]] = load ptr, ptr [[_TMP13]], align 8 -// CHECK-NEXT: [[TMP54:%.*]] = load ptr, ptr [[_TMP13]], align 8 // CHECK-NEXT: [[TMP55:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[TMP54]], ptr [[TMP55]], align 8 +// CHECK-NEXT: store ptr [[TMP53]], ptr [[TMP55]], align 8 // CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS15]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[TMP54]], ptr [[TMP56]], align 8 +// CHECK-NEXT: store ptr [[TMP53]], ptr [[TMP56]], align 8 // CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS16]], i64 0, i64 0 // CHECK-NEXT: store ptr null, ptr [[TMP57]], align 8 // CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0 @@ -491,11 +489,10 @@ void use_template() { // CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153(ptr [[H]]) #[[ATTR5]] // CHECK-NEXT: br label [[OMP_OFFLOAD_CONT25]] // CHECK: omp_offload.cont25: -// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [5 x i32], ptr [[DA]], i64 0, i64 0 // CHECK-NEXT: [[TMP87:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[ARRAYDECAY]], ptr [[TMP87]], align 8 +// CHECK-NEXT: store ptr [[DA]], ptr [[TMP87]], align 8 // CHECK-NEXT: [[TMP88:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS27]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[ARRAYDECAY]], ptr [[TMP88]], align 8 +// CHECK-NEXT: store ptr [[DA]], ptr [[TMP88]], align 8 // CHECK-NEXT: [[TMP89:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 0 // CHECK-NEXT: store ptr null, ptr [[TMP89]], align 8 // CHECK-NEXT: [[TMP90:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0 @@ -684,11 +681,10 @@ void use_template() { // CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[Z]], align 8 // CHECK-NEXT: store ptr [[TMP17]], ptr [[TMP]], align 8 // CHECK-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[TMP19]], ptr [[TMP20]], align 8 +// CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP20]], align 8 // CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[TMP19]], ptr [[TMP21]], align 8 +// CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP21]], align 8 // CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0 // CHECK-NEXT: store ptr null, ptr [[TMP22]], align 8 // CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 @@ -859,11 +855,10 @@ void use_template() { // CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[Z]], align 8 // CHECK-NEXT: store ptr [[TMP17]], ptr [[TMP]], align 8 // CHECK-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[TMP19]], ptr [[TMP20]], align 8 +// CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP20]], align 8 // CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[TMP19]], ptr [[TMP21]], align 8 +// CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP21]], align 8 // CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0 // CHECK-NEXT: store ptr null, ptr [[TMP22]], align 8 // CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 diff --git a/clang/test/OpenMP/target_has_device_addr_codegen_01.cpp b/clang/test/OpenMP/target_has_device_addr_codegen_01.cpp index cd3ab5fe04ce3..e55b8a4ac521f 100644 --- a/clang/test/OpenMP/target_has_device_addr_codegen_01.cpp +++ b/clang/test/OpenMP/target_has_device_addr_codegen_01.cpp @@ -62,80 +62,77 @@ int main() { // CHECK-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR]], align 8 // CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[PTR]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x float], ptr [[ARR]], i64 0, i64 0 -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[A]], ptr [[TMP8]], align 8 -// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[A]], ptr [[TMP10]], align 8 -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK-NEXT: store ptr null, ptr [[TMP12]], align 8 -// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK-NEXT: store ptr [[TMP6]], ptr [[TMP13]], align 8 -// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK-NEXT: store ptr [[TMP6]], ptr [[TMP15]], align 8 -// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[A]], ptr [[TMP6]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[A]], ptr [[TMP7]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK-NEXT: store ptr null, ptr [[TMP8]], align 8 +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK-NEXT: store ptr [[TMP4]], ptr [[TMP9]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK-NEXT: store ptr [[TMP4]], ptr [[TMP10]], align 8 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK-NEXT: store ptr [[TMP5]], ptr [[TMP12]], align 8 +// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK-NEXT: store ptr [[TMP5]], ptr [[TMP13]], align 8 +// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK-NEXT: store ptr null, ptr [[TMP14]], align 8 +// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK-NEXT: store ptr [[ARR]], ptr [[TMP15]], align 8 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK-NEXT: store ptr [[ARR]], ptr [[TMP16]], align 8 +// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 // CHECK-NEXT: store ptr null, ptr [[TMP17]], align 8 -// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK-NEXT: store ptr [[TMP7]], ptr [[TMP18]], align 8 -// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK-NEXT: store ptr [[TMP7]], ptr [[TMP20]], align 8 -// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 -// CHECK-NEXT: store ptr null, ptr [[TMP22]], align 8 -// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK-NEXT: store ptr [[ARRAYDECAY]], ptr [[TMP23]], align 8 -// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK-NEXT: store ptr [[ARRAYDECAY]], ptr [[TMP25]], align 8 -// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 -// CHECK-NEXT: store ptr null, ptr [[TMP27]], align 8 -// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 -// CHECK-NEXT: store i64 [[TMP1]], ptr [[TMP28]], align 8 -// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 -// CHECK-NEXT: store i64 [[TMP1]], ptr [[TMP30]], align 8 -// CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 -// CHECK-NEXT: store ptr null, ptr [[TMP32]], align 8 -// CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5 -// CHECK-NEXT: store ptr [[VLA]], ptr [[TMP33]], align 8 -// CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 5 -// CHECK-NEXT: store ptr [[VLA]], ptr [[TMP35]], align 8 -// CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5 -// CHECK-NEXT: store ptr null, ptr [[TMP37]], align 8 -// CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK-NEXT: store i64 [[TMP1]], ptr [[TMP18]], align 8 +// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK-NEXT: store i64 [[TMP1]], ptr [[TMP19]], align 8 +// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 +// CHECK-NEXT: store ptr null, ptr [[TMP20]], align 8 +// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5 +// CHECK-NEXT: store ptr [[VLA]], ptr [[TMP21]], align 8 +// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 5 +// CHECK-NEXT: store ptr [[VLA]], ptr [[TMP22]], align 8 +// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5 +// CHECK-NEXT: store ptr null, ptr [[TMP23]], align 8 +// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK-NEXT: store i32 1, ptr [[TMP40]], align 4 -// CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK-NEXT: store i32 6, ptr [[TMP41]], align 4 -// CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK-NEXT: store ptr [[TMP38]], ptr [[TMP42]], align 8 -// CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK-NEXT: store ptr [[TMP39]], ptr [[TMP43]], align 8 -// CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK-NEXT: store ptr @.offload_sizes, ptr [[TMP44]], align 8 -// CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK-NEXT: store ptr @.offload_maptypes, ptr [[TMP45]], align 8 -// CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK-NEXT: store ptr null, ptr [[TMP46]], align 8 -// CHECK-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK-NEXT: store ptr null, ptr [[TMP47]], align 8 -// CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK-NEXT: store i64 0, ptr [[TMP48]], align 8 -// CHECK-NEXT: [[TMP49:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27.region_id, ptr [[KERNEL_ARGS]]) -// CHECK-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 -// CHECK-NEXT: br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK-NEXT: store i32 1, ptr [[TMP26]], align 4 +// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK-NEXT: store i32 6, ptr [[TMP27]], align 4 +// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK-NEXT: store ptr [[TMP24]], ptr [[TMP28]], align 8 +// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK-NEXT: store ptr [[TMP25]], ptr [[TMP29]], align 8 +// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK-NEXT: store ptr @.offload_sizes, ptr [[TMP30]], align 8 +// CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK-NEXT: store ptr @.offload_maptypes, ptr [[TMP31]], align 8 +// CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK-NEXT: store ptr null, ptr [[TMP32]], align 8 +// CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK-NEXT: store ptr null, ptr [[TMP33]], align 8 +// CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK-NEXT: store i64 0, ptr [[TMP34]], align 8 +// CHECK-NEXT: [[TMP35:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27.region_id, ptr [[KERNEL_ARGS]]) +// CHECK-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK-NEXT: br i1 [[TMP36]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK: omp_offload.failed: // CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27(ptr [[A]], ptr [[TMP4]], ptr [[TMP5]], ptr [[ARR]], i64 [[TMP1]], ptr [[VLA]]) #[[ATTR5:[0-9]+]] // CHECK-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK: omp_offload.cont: -// CHECK-NEXT: [[TMP51:%.*]] = load float, ptr [[A]], align 4 -// CHECK-NEXT: [[CONV1:%.*]] = fptosi float [[TMP51]] to i32 +// CHECK-NEXT: [[TMP37:%.*]] = load float, ptr [[A]], align 4 +// CHECK-NEXT: [[CONV1:%.*]] = fptosi float [[TMP37]] to i32 // CHECK-NEXT: store i32 [[CONV1]], ptr [[RETVAL]], align 4 -// CHECK-NEXT: [[TMP52:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK-NEXT: call void @llvm.stackrestore(ptr [[TMP52]]) -// CHECK-NEXT: [[TMP53:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK-NEXT: ret i32 [[TMP53]] +// CHECK-NEXT: [[TMP38:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK-NEXT: call void @llvm.stackrestore(ptr [[TMP38]]) +// CHECK-NEXT: [[TMP39:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK-NEXT: ret i32 [[TMP39]] // // // CHECK-LABEL: define {{[^@]+}}@_ZN1SC1Ev @@ -164,68 +161,68 @@ int main() { // CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[THIS1]], i32 0, i32 1 // CHECK-NEXT: [[ARR:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[THIS1]], i32 0, i32 3 // CHECK-NEXT: [[TMP1:%.*]] = getelementptr [4 x i32], ptr [[ARR]], i32 1 -// CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP1]] to i64 -// CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 -// CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP4]], [[TMP5]] -// CHECK-NEXT: [[TMP7:%.*]] = sdiv exact i64 [[TMP6]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64 +// CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[A]] to i64 +// CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]] +// CHECK-NEXT: [[TMP5:%.*]] = sdiv exact i64 [[TMP4]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes.1, i64 40, i1 false) -// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[THIS1]], ptr [[TMP9]], align 8 -// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[THIS1]], ptr [[TMP6]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[A]], ptr [[TMP7]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK-NEXT: store i64 [[TMP5]], ptr [[TMP8]], align 8 +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK-NEXT: store ptr null, ptr [[TMP9]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK-NEXT: store ptr [[THIS1]], ptr [[TMP10]], align 8 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 // CHECK-NEXT: store ptr [[A]], ptr [[TMP11]], align 8 -// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 -// CHECK-NEXT: store i64 [[TMP7]], ptr [[TMP13]], align 8 -// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK-NEXT: store ptr [[THIS1]], ptr [[TMP15]], align 8 -// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK-NEXT: store ptr [[A]], ptr [[TMP17]], align 8 -// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK-NEXT: store ptr null, ptr [[TMP19]], align 8 -// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK-NEXT: store ptr [[THIS1]], ptr [[TMP20]], align 8 -// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK-NEXT: store ptr [[TMP0]], ptr [[TMP22]], align 8 -// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 -// CHECK-NEXT: store ptr null, ptr [[TMP24]], align 8 -// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK-NEXT: store ptr [[THIS1]], ptr [[TMP25]], align 8 -// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK-NEXT: store ptr [[PTR]], ptr [[TMP27]], align 8 -// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 -// CHECK-NEXT: store ptr null, ptr [[TMP29]], align 8 -// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 -// CHECK-NEXT: store ptr [[THIS1]], ptr [[TMP30]], align 8 -// CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 -// CHECK-NEXT: store ptr [[ARR]], ptr [[TMP32]], align 8 -// CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 -// CHECK-NEXT: store ptr null, ptr [[TMP34]], align 8 -// CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK-NEXT: store ptr [[THIS1]], ptr [[TMP13]], align 8 +// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK-NEXT: store ptr [[TMP0]], ptr [[TMP14]], align 8 +// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK-NEXT: store ptr null, ptr [[TMP15]], align 8 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK-NEXT: store ptr [[THIS1]], ptr [[TMP16]], align 8 +// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK-NEXT: store ptr [[PTR]], ptr [[TMP17]], align 8 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK-NEXT: store ptr null, ptr [[TMP18]], align 8 +// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK-NEXT: store ptr [[THIS1]], ptr [[TMP19]], align 8 +// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK-NEXT: store ptr [[ARR]], ptr [[TMP20]], align 8 +// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 +// CHECK-NEXT: store ptr null, ptr [[TMP21]], align 8 +// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK-NEXT: store i32 1, ptr [[TMP38]], align 4 -// CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK-NEXT: store i32 5, ptr [[TMP39]], align 4 -// CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK-NEXT: store ptr [[TMP35]], ptr [[TMP40]], align 8 -// CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK-NEXT: store ptr [[TMP36]], ptr [[TMP41]], align 8 -// CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK-NEXT: store ptr [[TMP37]], ptr [[TMP42]], align 8 -// CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP43]], align 8 -// CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK-NEXT: store ptr null, ptr [[TMP44]], align 8 -// CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK-NEXT: store ptr null, ptr [[TMP45]], align 8 -// CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK-NEXT: store i64 0, ptr [[TMP46]], align 8 -// CHECK-NEXT: [[TMP47:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l14.region_id, ptr [[KERNEL_ARGS]]) -// CHECK-NEXT: [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0 -// CHECK-NEXT: br i1 [[TMP48]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK-NEXT: store i32 1, ptr [[TMP25]], align 4 +// CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK-NEXT: store i32 5, ptr [[TMP26]], align 4 +// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK-NEXT: store ptr [[TMP22]], ptr [[TMP27]], align 8 +// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK-NEXT: store ptr [[TMP23]], ptr [[TMP28]], align 8 +// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK-NEXT: store ptr [[TMP24]], ptr [[TMP29]], align 8 +// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP30]], align 8 +// CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK-NEXT: store ptr null, ptr [[TMP31]], align 8 +// CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK-NEXT: store ptr null, ptr [[TMP32]], align 8 +// CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK-NEXT: store i64 0, ptr [[TMP33]], align 8 +// CHECK-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l14.region_id, ptr [[KERNEL_ARGS]]) +// CHECK-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK: omp_offload.failed: // CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l14(ptr [[THIS1]]) #[[ATTR5]] // CHECK-NEXT: br label [[OMP_OFFLOAD_CONT]] diff --git a/clang/test/OpenMP/target_parallel_codegen_registration.cpp b/clang/test/OpenMP/target_parallel_codegen_registration.cpp index 568d215e85336..27149109c8327 100644 --- a/clang/test/OpenMP/target_parallel_codegen_registration.cpp +++ b/clang/test/OpenMP/target_parallel_codegen_registration.cpp @@ -442,31 +442,31 @@ int bar(int a){ // Check metadata is properly generated: // CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 295, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 311, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 317, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 328, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 334, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 437, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 340, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 334, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 340, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 328, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 270, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 295, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 311, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 317, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 328, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 334, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 437, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 340, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 334, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 340, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 328, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 270, i32 0, i32 {{[0-9]+}}} // TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 295, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 311, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 317, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 328, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 334, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 437, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 340, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 334, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 340, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 328, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 270, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 295, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 311, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 317, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 328, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 334, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 437, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 340, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 334, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 340, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 328, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 270, i32 0, i32 {{[0-9]+}}} #endif diff --git a/clang/test/OpenMP/target_parallel_codegen_registration_naming.cpp b/clang/test/OpenMP/target_parallel_codegen_registration_naming.cpp index b3fbf83625e96..9701478bba089 100644 --- a/clang/test/OpenMP/target_parallel_codegen_registration_naming.cpp +++ b/clang/test/OpenMP/target_parallel_codegen_registration_naming.cpp @@ -75,10 +75,10 @@ int nested(int a){ // Check metadata is properly generated: // CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} // TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} #endif diff --git a/clang/test/OpenMP/target_parallel_for_codegen.cpp b/clang/test/OpenMP/target_parallel_for_codegen.cpp index f6946165cfd3b..8f14afb70c674 100644 --- a/clang/test/OpenMP/target_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_codegen.cpp @@ -112,18 +112,18 @@ int foo(int n) { a += 1; } - // CEHCK-32: [[FPSIZEGEP]] = getelementptr inbounds [[KMP_PRIVATES_T]], ptr [[KMP_PRIVATES]], i32 0, i32 0 - // CEHCK-32: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[FPSIZEGEP]], ptr align 8 [[SIZET2]], i64 24, i1 false) - // CEHCK-32: [[FPBPGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], ptr [[KMP_PRIVATES]], i32 0, i32 1 - // CEHCK-32: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[FPBPGEP]], ptr align 8 [[BPGEP]], i64 24, i1 false) - // CEHCK-32: [[FPPGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], ptr [[KMP_PRIVATES]], i32 0, i32 2 - // CEHCK-32: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[FPPGEP]], ptr align 8 [[BCAST]], i64 24, i1 false) - // CEHCK-64: [[FPBPGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], ptr [[KMP_PRIVATES]], i32 0, i32 0 - // CEHCK-64: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[FPBPGEP]], ptr align 8 [[BPGEP]], i64 24, i1 false) - // CEHCK-64: [[FPPGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], ptr [[KMP_PRIVATES]], i32 0, i32 1 - // CEHCK-64: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[FPPGEP]], ptr align 8 [[BCAST]], i64 24, i1 false) - // CEHCK-64: [[FPSIZEGEP]] = getelementptr inbounds [[KMP_PRIVATES_T]], ptr [[KMP_PRIVATES]], i32 0, i32 2 - // CEHCK-64: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[FPSIZEGEP]], ptr align 8 [[SIZET2]], i64 24, i1 false) + // CHECK-32: [[FPSIZEGEP]] = getelementptr inbounds [[KMP_PRIVATES_T]], ptr [[KMP_PRIVATES]], i32 0, i32 0 + // CHECK-32: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[FPSIZEGEP]], ptr align 8 [[SIZET2]], i64 24, i1 false) + // CHECK-32: [[FPBPGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], ptr [[KMP_PRIVATES]], i32 0, i32 1 + // CHECK-32: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[FPBPGEP]], ptr align 8 [[BPGEP]], i64 24, i1 false) + // CHECK-32: [[FPPGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], ptr [[KMP_PRIVATES]], i32 0, i32 2 + // CHECK-32: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[FPPGEP]], ptr align 8 [[BCAST]], i64 24, i1 false) + // CHECK-64: [[FPBPGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], ptr [[KMP_PRIVATES]], i32 0, i32 0 + // CHECK-64: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[FPBPGEP]], ptr align 8 [[BPGEP]], i64 24, i1 false) + // CHECK-64: [[FPPGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], ptr [[KMP_PRIVATES]], i32 0, i32 1 + // CHECK-64: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[FPPGEP]], ptr align 8 [[BCAST]], i64 24, i1 false) + // CHECK-64: [[FPSIZEGEP]] = getelementptr inbounds [[KMP_PRIVATES_T]], ptr [[KMP_PRIVATES]], i32 0, i32 2 + // CHECK-64: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[FPSIZEGEP]], ptr align 8 [[SIZET2]], i64 24, i1 false) int lin = 12; #pragma omp target parallel for if(target: 1) linear(lin, a : get_val()) nowait for (unsigned long long it = 2000; it >= 600; it-=400) { diff --git a/clang/test/OpenMP/target_parallel_for_codegen_registration.cpp b/clang/test/OpenMP/target_parallel_for_codegen_registration.cpp index f76aaf15d7455..6aefa86d3e9dd 100644 --- a/clang/test/OpenMP/target_parallel_for_codegen_registration.cpp +++ b/clang/test/OpenMP/target_parallel_for_codegen_registration.cpp @@ -452,31 +452,31 @@ int bar(int a){ // Check metadata is properly generated: // CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 0, i32 {{[0-9]+}}} // TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 0, i32 {{[0-9]+}}} #endif diff --git a/clang/test/OpenMP/target_parallel_for_codegen_registration_naming.cpp b/clang/test/OpenMP/target_parallel_for_codegen_registration_naming.cpp index 2afd858735a0f..a809cb1263402 100644 --- a/clang/test/OpenMP/target_parallel_for_codegen_registration_naming.cpp +++ b/clang/test/OpenMP/target_parallel_for_codegen_registration_naming.cpp @@ -77,10 +77,10 @@ int nested(int a){ // Check metadata is properly generated: // CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} // TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} #endif diff --git a/clang/test/OpenMP/target_parallel_for_simd_codegen_registration.cpp b/clang/test/OpenMP/target_parallel_for_simd_codegen_registration.cpp index 4c3c15425b4a3..ff22cfa8b9a8b 100644 --- a/clang/test/OpenMP/target_parallel_for_simd_codegen_registration.cpp +++ b/clang/test/OpenMP/target_parallel_for_simd_codegen_registration.cpp @@ -452,31 +452,31 @@ int bar(int a){ // Check metadata is properly generated: // CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 0, i32 {{[0-9]+}}} // TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 0, i32 {{[0-9]+}}} #endif diff --git a/clang/test/OpenMP/target_parallel_for_simd_codegen_registration_naming.cpp b/clang/test/OpenMP/target_parallel_for_simd_codegen_registration_naming.cpp index ba2d363dc455a..1eff0c1b0e65d 100644 --- a/clang/test/OpenMP/target_parallel_for_simd_codegen_registration_naming.cpp +++ b/clang/test/OpenMP/target_parallel_for_simd_codegen_registration_naming.cpp @@ -77,10 +77,10 @@ int nested(int a){ // Check metadata is properly generated: // CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} // TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} #endif diff --git a/clang/test/OpenMP/target_simd_codegen_registration.cpp b/clang/test/OpenMP/target_simd_codegen_registration.cpp index 0770d0d314c0c..d41dae2c39769 100644 --- a/clang/test/OpenMP/target_simd_codegen_registration.cpp +++ b/clang/test/OpenMP/target_simd_codegen_registration.cpp @@ -452,31 +452,31 @@ int bar(int a){ // Check metadata is properly generated: // CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 0, i32 {{[0-9]+}}} // TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 0, i32 {{[0-9]+}}} #endif diff --git a/clang/test/OpenMP/target_simd_codegen_registration_naming.cpp b/clang/test/OpenMP/target_simd_codegen_registration_naming.cpp index ef8676a7b9829..a87042fb1fe70 100644 --- a/clang/test/OpenMP/target_simd_codegen_registration_naming.cpp +++ b/clang/test/OpenMP/target_simd_codegen_registration_naming.cpp @@ -77,10 +77,10 @@ int nested(int a){ // Check metadata is properly generated: // CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} // TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} #endif diff --git a/clang/test/OpenMP/target_teams_codegen_registration.cpp b/clang/test/OpenMP/target_teams_codegen_registration.cpp index d79553e80ea64..70288706cff07 100644 --- a/clang/test/OpenMP/target_teams_codegen_registration.cpp +++ b/clang/test/OpenMP/target_teams_codegen_registration.cpp @@ -442,31 +442,31 @@ int bar(int a){ // Check metadata is properly generated: // CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 295, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 311, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 317, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 328, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 334, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 437, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 340, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 334, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 340, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 328, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 270, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 295, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 311, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 317, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 328, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 334, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 437, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 340, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 334, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 340, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 328, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 270, i32 0, i32 {{[0-9]+}}} // TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 295, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 311, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 317, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 328, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 334, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 437, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 340, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 334, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 340, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 328, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 270, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 295, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 311, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 317, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 328, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 334, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 437, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 340, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 334, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 340, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 328, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 270, i32 0, i32 {{[0-9]+}}} #endif diff --git a/clang/test/OpenMP/target_teams_codegen_registration_naming.cpp b/clang/test/OpenMP/target_teams_codegen_registration_naming.cpp index daf4d158a91a7..35975abdad4ac 100644 --- a/clang/test/OpenMP/target_teams_codegen_registration_naming.cpp +++ b/clang/test/OpenMP/target_teams_codegen_registration_naming.cpp @@ -75,10 +75,10 @@ int nested(int a){ // Check metadata is properly generated: // CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} // TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} #endif diff --git a/clang/test/OpenMP/target_teams_distribute_codegen_registration.cpp b/clang/test/OpenMP/target_teams_distribute_codegen_registration.cpp index 3913e43fef8ce..6cf8f4ec79f22 100644 --- a/clang/test/OpenMP/target_teams_distribute_codegen_registration.cpp +++ b/clang/test/OpenMP/target_teams_distribute_codegen_registration.cpp @@ -452,31 +452,31 @@ int bar(int a){ // Check metadata is properly generated: // CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 0, i32 {{[0-9]+}}} // TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 0, i32 {{[0-9]+}}} #endif diff --git a/clang/test/OpenMP/target_teams_distribute_codegen_registration_naming.cpp b/clang/test/OpenMP/target_teams_distribute_codegen_registration_naming.cpp index 44c0f25aa9b12..d5312517f3397 100644 --- a/clang/test/OpenMP/target_teams_distribute_codegen_registration_naming.cpp +++ b/clang/test/OpenMP/target_teams_distribute_codegen_registration_naming.cpp @@ -77,10 +77,10 @@ int nested(int a){ // Check metadata is properly generated: // CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} // TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} #endif diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen_registration.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen_registration.cpp index 4721dbdc4cbd2..1bba9942bbca7 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen_registration.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen_registration.cpp @@ -452,31 +452,31 @@ int bar(int a){ // Check metadata is properly generated: // CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 0, i32 {{[0-9]+}}} // TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 0, i32 {{[0-9]+}}} #endif diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen_registration_naming.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen_registration_naming.cpp index 3a6473a1115f2..1e52f5441f8d6 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen_registration_naming.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen_registration_naming.cpp @@ -77,10 +77,10 @@ int nested(int a){ // Check metadata is properly generated: // CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} // TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} #endif diff --git a/clang/test/OpenMP/target_teams_distribute_simd_codegen_registration.cpp b/clang/test/OpenMP/target_teams_distribute_simd_codegen_registration.cpp index 9181c5c23892b..cd6cf08ce455f 100644 --- a/clang/test/OpenMP/target_teams_distribute_simd_codegen_registration.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_codegen_registration.cpp @@ -452,32 +452,32 @@ int bar(int a){ // Check metadata is properly generated: // CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 0, i32 {{[0-9]+}}} // TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID:-?[0-9]+]], i32 [[FILEID:-?[0-9]+]], !"_ZN2SB3fooEv", i32 245, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SDD1Ev", i32 297, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SEC1Ev", i32 315, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SED1Ev", i32 322, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_Z3bari", i32 446, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EEC1Ev", i32 341, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi1000EED1Ev", i32 348, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2STILi100EE3fooEv", i32 334, i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 [[DEVID]], i32 [[FILEID]], !"_ZN2SCC1Ev", i32 271, i32 0, i32 {{[0-9]+}}} // TCHECK-DAG: !{!"llvm.loop.vectorize.enable", i1 true} // CHECK-DAG: !{!"llvm.loop.vectorize.enable", i1 true} diff --git a/clang/test/OpenMP/target_teams_distribute_simd_codegen_registration_naming.cpp b/clang/test/OpenMP/target_teams_distribute_simd_codegen_registration_naming.cpp index 0421345dab9fb..52783ef7bbaed 100644 --- a/clang/test/OpenMP/target_teams_distribute_simd_codegen_registration_naming.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_codegen_registration_naming.cpp @@ -77,12 +77,12 @@ int nested(int a){ // Check metadata is properly generated: // CHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} -// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// CHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} // TCHECK: !omp_offload.info = !{!{{[0-9]+}}, !{{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 {{[0-9]+}}} -// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T1L]], i32 0, i32 {{[0-9]+}}} +// TCHECK-DAG: = !{i32 0, i32 {{-?[0-9]+}}, i32 {{-?[0-9]+}}, !"[[NNAME]]", i32 [[T2L]], i32 0, i32 {{[0-9]+}}} // CHECK-DAG: !{!"llvm.loop.vectorize.enable", i1 true} // TCHECK-DAG: !{!"llvm.loop.vectorize.enable", i1 true} diff --git a/clang/test/Parser/lambda-attr.cu b/clang/test/Parser/lambda-attr.cu index 886212b97f50b..7fa128effd512 100644 --- a/clang/test/Parser/lambda-attr.cu +++ b/clang/test/Parser/lambda-attr.cu @@ -18,6 +18,10 @@ __attribute__((device)) void device_attr() { ([&](int) __attribute__((device)){ device_fn(); })(0); // expected-warning@-1 {{nvcc does not allow '__device__' to appear after the parameter list in lambdas}} ([&] __attribute__((device)) (int) { device_fn(); })(0); + + // test that noinline can appear anywhere. + ([&] __attribute__((device)) __noinline__ () { device_fn(); })(); + ([&] __noinline__ __attribute__((device)) () { device_fn(); })(); } __attribute__((host)) __attribute__((device)) void host_device_attrs() { @@ -37,6 +41,11 @@ __attribute__((host)) __attribute__((device)) void host_device_attrs() { // expected-warning@-1 {{nvcc does not allow '__host__' to appear after the parameter list in lambdas}} // expected-warning@-2 {{nvcc does not allow '__device__' to appear after the parameter list in lambdas}} ([&] __attribute__((host)) __attribute__((device)) (int) { hd_fn(); })(0); + + // test that noinline can also appear anywhere. + ([] __attribute__((host)) __attribute__((device)) () { hd_fn(); })(); + ([] __attribute__((host)) __noinline__ __attribute__((device)) () { hd_fn(); })(); + ([] __attribute__((host)) __attribute__((device)) __noinline__ () { hd_fn(); })(); } // TODO: Add tests for __attribute__((global)) once we support global lambdas. diff --git a/clang/test/Preprocessor/init-arm.c b/clang/test/Preprocessor/init-arm.c index e317ffa67393d..a55d0d63a79b3 100644 --- a/clang/test/Preprocessor/init-arm.c +++ b/clang/test/Preprocessor/init-arm.c @@ -1450,3 +1450,8 @@ // THUMB-MINGW:#define __ARM_DWARF_EH__ 1 +// RUN: %clang_cc1 -E -dM -ffreestanding -triple=thumbv6m-none-unknown-eabi < /dev/null | FileCheck -match-full-lines -check-prefix Thumbv6m-elf %s +// Thumbv6m-elf: #define __ELF__ 1 + +// RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -triple=thumbv6m-none-unknown-eabi < /dev/null | FileCheck -match-full-lines -check-prefix Thumbv6m-cxx %s +// Thumbv6m-cxx: #define _GNU_SOURCE 1 diff --git a/clang/test/Preprocessor/is_target_unknown_environment.c b/clang/test/Preprocessor/is_target_unknown_environment.c new file mode 100644 index 0000000000000..9462ef442fecd --- /dev/null +++ b/clang/test/Preprocessor/is_target_unknown_environment.c @@ -0,0 +1,15 @@ +// RUN: %clang_cc1 -fsyntax-only -triple x86_64-apple-macos12 -verify %s + +// expected-no-diagnostics + +#if !__is_target_environment(unknown) +#error "mismatching environment" +#endif + +#if __is_target_environment(simulator) || __is_target_environment(SIMULATOR) +#error "mismatching environment" +#endif + +#if __is_target_environment(invalidEnv) +#error "invalid environment must not be matched" +#endif diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c index 0ffa2739e5df4..10f0987a00399 100644 --- a/clang/test/Preprocessor/predefined-arch-macros.c +++ b/clang/test/Preprocessor/predefined-arch-macros.c @@ -1791,6 +1791,12 @@ // RUN: %clang -march=alderlake -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ADL_M32 +// RUN: %clang -march=raptorlake -m32 -E -dM %s -o - 2>&1 \ +// RUN: -target i386-unknown-linux \ +// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ADL_M32 +// RUN: %clang -march=meteorlake -m32 -E -dM %s -o - 2>&1 \ +// RUN: -target i386-unknown-linux \ +// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ADL_M32 // CHECK_ADL_M32: #define __ADX__ 1 // CHECK_ADL_M32: #define __AES__ 1 // CHECK_ADL_M32: #define __AVX2__ 1 @@ -1853,6 +1859,12 @@ // RUN: %clang -march=alderlake -m64 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ADL_M64 +// RUN: %clang -march=raptorlake -m64 -E -dM %s -o - 2>&1 \ +// RUN: -target i386-unknown-linux \ +// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ADL_M64 +// RUN: %clang -march=meteorlake -m64 -E -dM %s -o - 2>&1 \ +// RUN: -target i386-unknown-linux \ +// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ADL_M64 // CHECK_ADL_M64: #define __ADX__ 1 // CHECK_ADL_M64: #define __AES__ 1 // CHECK_ADL_M64: #define __AVX2__ 1 diff --git a/clang/test/Sema/128bitfloat.cpp b/clang/test/Sema/128bitfloat.cpp index 6b9d63e6af4cf..b98b42496e8db 100644 --- a/clang/test/Sema/128bitfloat.cpp +++ b/clang/test/Sema/128bitfloat.cpp @@ -7,7 +7,7 @@ #if defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__) -#if defined(__ppc__) +#if defined(__powerpc__) template struct __is_float128 { static constexpr bool value = false; }; template <> struct __is_float128<__float128> { static constexpr bool value = true; }; static_assert(__is_float128<__ieee128>::value, "__ieee128 aliases to __float128"); @@ -45,7 +45,7 @@ int g(int x, __float128 *y) { // expected-error {{__float128 is not supported o #endif #endif -#ifdef __ppc__ +#ifdef __powerpc__ __ibm128 i; template <> struct __is_floating_point_helper<__ibm128> {}; int w(int x, __ibm128 *y) { diff --git a/clang/test/Sema/aarch64-fp16-target.c b/clang/test/Sema/aarch64-fp16-target.c new file mode 100644 index 0000000000000..9a921e96e88e5 --- /dev/null +++ b/clang/test/Sema/aarch64-fp16-target.c @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -fsyntax-only -verify -emit-llvm -o - %s +// REQUIRES: aarch64-registered-target + +// Test that functions with the correct target attributes can use the correct FP16 intrinsics. + +#include + +__attribute__((target("fullfp16"))) +void test_fullfp16(float16_t f16) { + vabdh_f16(f16, f16); +} + +__attribute__((target("arch=armv8-a+fp16"))) +void test_fp16_arch(float16_t f16) { + vabdh_f16(f16, f16); +} + +__attribute__((target("+fp16"))) +void test_fp16(float16_t f16) { + vabdh_f16(f16, f16); +} + +void undefined(float16_t f16) { + vabdh_f16(f16, f16); // expected-error {{'__builtin_neon_vabdh_f16' needs target feature fullfp16}} +} diff --git a/clang/test/Sema/attr-mode.c b/clang/test/Sema/attr-mode.c index 71d82a20f66d0..5e99c4583155a 100644 --- a/clang/test/Sema/attr-mode.c +++ b/clang/test/Sema/attr-mode.c @@ -46,7 +46,7 @@ typedef _Complex double c32 __attribute((mode(SC))); int c32_test[sizeof(c32) == 8 ? 1 : -1]; typedef _Complex float c64 __attribute((mode(DC))); -#if !defined(__ppc__) && !defined(__mips__) // Note, 'XC' mode is illegal for PPC64 and MIPS machines. +#if !defined(__powerpc__) && !defined(__mips__) // Note, 'XC' mode is illegal for PPC64 and MIPS machines. typedef _Complex float c80 __attribute((mode(XC))); #endif diff --git a/clang/test/Sema/gnu-builtins.c b/clang/test/Sema/gnu-builtins.c new file mode 100644 index 0000000000000..c4da8b39363cd --- /dev/null +++ b/clang/test/Sema/gnu-builtins.c @@ -0,0 +1,13 @@ +// RUN: %clang_cc1 -fsyntax-only -verify=gnu -std=gnu17 %s +// RUN: %clang_cc1 -fsyntax-only -verify=gnu -std=gnu2x %s +// RUN: %clang_cc1 -fsyntax-only -verify=std -std=c17 %s +// RUN: %clang_cc1 -fsyntax-only -verify=std -std=c2x %s + +// std-no-diagnostics + +// 'index' is a builtin library function, but only in GNU mode. So this should +// give an error in GNU modes but be okay in non-GNU mode. +// FIXME: the error is correct, but these notes are pretty awful. +int index; // gnu-error {{redefinition of 'index' as different kind of symbol}} \ + gnu-note {{unguarded header; consider using #ifdef guards or #pragma once}} \ + gnu-note {{previous definition is here}} diff --git a/clang/test/Sema/incompatible-function-pointer-types-strict.c b/clang/test/Sema/incompatible-function-pointer-types-strict.c new file mode 100644 index 0000000000000..647251de42030 --- /dev/null +++ b/clang/test/Sema/incompatible-function-pointer-types-strict.c @@ -0,0 +1,20 @@ +// RUN: %clang_cc1 -fsyntax-only %s -Wincompatible-function-pointer-types-strict -verify=soft,strict +// RUN: %clang_cc1 -fsyntax-only %s -Werror=incompatible-function-pointer-types-strict -verify=hard,strict +// RUN: %clang_cc1 -fsyntax-only %s -Wincompatible-function-pointer-types -verify=nonstrict +// nonstrict-no-diagnostics + +enum E { A = -1, B }; +typedef enum E (*fn_a_t)(void); +typedef void (*fn_b_t)(void); + +int a(void) { return 0; } +void __attribute__((noreturn)) b(void) { while (1); } + +void fa(fn_a_t x) {} // strict-note {{passing argument to parameter 'x' here}} +void fb(fn_b_t x) {} + +void baz(void) { + fa(&a); // soft-warning {{incompatible function pointer types passing 'int (*)(void)' to parameter of type 'fn_a_t' (aka 'enum E (*)(void)')}} \ + hard-error {{incompatible function pointer types passing 'int (*)(void)' to parameter of type 'fn_a_t' (aka 'enum E (*)(void)')}} + fb(&b); // no-warning +} diff --git a/clang/test/Sema/libbuiltins-ctype-powerpc64.c b/clang/test/Sema/libbuiltins-ctype-powerpc64.c index 6a5e6031c5ee2..fce9f5c0716e9 100644 --- a/clang/test/Sema/libbuiltins-ctype-powerpc64.c +++ b/clang/test/Sema/libbuiltins-ctype-powerpc64.c @@ -61,5 +61,5 @@ void test(int x) { // CHECK: declare signext i32 @tolower(i32 noundef signext) [[NUW_RO:#[0-9]+]] // CHECK: declare signext i32 @toupper(i32 noundef signext) [[NUW_RO:#[0-9]+]] -// CHECK: attributes [[NUW_RO]] = { nounwind readonly{{.*}} } -// CHECK: attributes [[NUW_RO_CALL]] = { nounwind readonly willreturn } +// CHECK: attributes [[NUW_RO]] = { nounwind willreturn memory(read){{.*}} } +// CHECK: attributes [[NUW_RO_CALL]] = { nounwind willreturn memory(read) } diff --git a/clang/test/Sema/libbuiltins-ctype-x86_64.c b/clang/test/Sema/libbuiltins-ctype-x86_64.c index ed6e31e734343..0d182d7095aac 100644 --- a/clang/test/Sema/libbuiltins-ctype-x86_64.c +++ b/clang/test/Sema/libbuiltins-ctype-x86_64.c @@ -61,5 +61,5 @@ void test(int x) { // CHECK: declare i32 @tolower(i32 noundef) [[NUW_RO:#[0-9]+]] // CHECK: declare i32 @toupper(i32 noundef) [[NUW_RO:#[0-9]+]] -// CHECK: attributes [[NUW_RO]] = { nounwind readonly{{.*}} } -// CHECK: attributes [[NUW_RO_CALL]] = { nounwind readonly willreturn } +// CHECK: attributes [[NUW_RO]] = { nounwind willreturn memory(read){{.*}} } +// CHECK: attributes [[NUW_RO_CALL]] = { nounwind willreturn memory(read) } diff --git a/clang/test/SemaCXX/pre-dr692.cpp b/clang/test/SemaCXX/pre-dr692.cpp deleted file mode 100644 index 87eac318dc067..0000000000000 --- a/clang/test/SemaCXX/pre-dr692.cpp +++ /dev/null @@ -1,14 +0,0 @@ -// RUN: %clang_cc1 %s -std=c++11 -verify -fexceptions -fcxx-exceptions -pedantic-errors -fno-spell-checking -fclang-abi-compat=15 - -template struct A1 {}; -template struct A2 {}; -template void e1(A1); // expected-note {{candidate}} -template void e1(A1); // expected-note {{candidate}} -template void e2(A2); // expected-note {{candidate}} -template void e2(A2); // expected-note {{candidate}} -void h() { - A1 b1; - e1(b1); // expected-error{{call to 'e1' is ambiguous}} - A2 b2; - e2(b2); // expected-error{{call to 'e2' is ambiguous}} -} diff --git a/clang/test/SemaCXX/using-decl-templates.cpp b/clang/test/SemaCXX/using-decl-templates.cpp index 73d9bc3e774cb..77dc596fdfc9f 100644 --- a/clang/test/SemaCXX/using-decl-templates.cpp +++ b/clang/test/SemaCXX/using-decl-templates.cpp @@ -102,6 +102,28 @@ struct Derived : Base { // expected-note {{requested here}} }; } // namespace DontDiagnoseInvalidTest +namespace shadow_nested_operator { +template +struct A { + struct Nested {}; + operator Nested*() {return 0;}; +}; + +template +struct B : A { + using A::operator typename A::Nested*; + operator typename A::Nested *() { + struct A * thi = this; + return *thi; + }; +}; + +int foo () { + struct B b; + auto s = *b; +} +} // namespace shadow_nested_operator + namespace func_templ { namespace sss { double foo(int, double); diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 40825ac831a50..6a12b64f7d7dd 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -1219,7 +1219,7 @@ Optional searchLibraryBaseName(StringRef Name, StringRef Root, ArrayRef SearchPaths) { for (StringRef Dir : SearchPaths) { if (Optional File = findFile(Dir, Root, "lib" + Name + ".so")) - return None; + return File; if (Optional File = findFile(Dir, Root, "lib" + Name + ".a")) return File; } @@ -1259,6 +1259,10 @@ Expected> getDeviceInput(const ArgList &Args) { if (std::error_code EC = BufferOrErr.getError()) return createFileError(Filename, EC); + if (identify_magic((*BufferOrErr)->getBuffer()) == + file_magic::elf_shared_object) + continue; + bool IsLazy = identify_magic((*BufferOrErr)->getBuffer()) == file_magic::archive; if (Error Err = extractOffloadBinaries( @@ -1266,7 +1270,7 @@ Expected> getDeviceInput(const ArgList &Args) { return std::move(Err); } - // Try to extract input from input libraries. + // Try to extract input from input archive libraries. for (const opt::Arg *Arg : Args.filtered(OPT_library)) { if (auto Library = searchLibrary(Arg->getValue(), Root, LibraryPaths)) { ErrorOr> BufferOrErr = @@ -1274,8 +1278,15 @@ Expected> getDeviceInput(const ArgList &Args) { if (std::error_code EC = BufferOrErr.getError()) reportError(createFileError(*Library, EC)); + if (identify_magic((*BufferOrErr)->getBuffer()) != file_magic::archive) + continue; + if (Error Err = extractOffloadBinaries(**BufferOrErr, LazyInputFiles)) return std::move(Err); + } else { + reportError(createStringError(inconvertibleErrorCode(), + "unable to find library -l%s", + Arg->getValue())); } } diff --git a/clang/tools/driver/driver.cpp b/clang/tools/driver/driver.cpp index 2cc3b48609cb3..4b1a246d99430 100644 --- a/clang/tools/driver/driver.cpp +++ b/clang/tools/driver/driver.cpp @@ -378,7 +378,7 @@ int clang_main(int Argc, char **Argv) { llvm::cl::ExpansionContext ECtx(A, Tokenizer); ECtx.setMarkEOLs(MarkEOLs); if (llvm::Error Err = ECtx.expandResponseFiles(Args)) { - llvm::errs() << Err << '\n'; + llvm::errs() << toString(std::move(Err)) << '\n'; return 1; } diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 0cdfec8ef9dac..960ff2b4dc480 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -2169,6 +2169,7 @@ class EnqueueVisitor : public ConstStmtVisitor { void VisitOMPTaskyieldDirective(const OMPTaskyieldDirective *D); void VisitOMPBarrierDirective(const OMPBarrierDirective *D); void VisitOMPTaskwaitDirective(const OMPTaskwaitDirective *D); + void VisitOMPErrorDirective(const OMPErrorDirective *D); void VisitOMPTaskgroupDirective(const OMPTaskgroupDirective *D); void VisitOMPCancellationPointDirective(const OMPCancellationPointDirective *D); @@ -3119,6 +3120,10 @@ void EnqueueVisitor::VisitOMPTaskwaitDirective(const OMPTaskwaitDirective *D) { VisitOMPExecutableDirective(D); } +void EnqueueVisitor::VisitOMPErrorDirective(const OMPErrorDirective *D) { + VisitOMPExecutableDirective(D); +} + void EnqueueVisitor::VisitOMPTaskgroupDirective( const OMPTaskgroupDirective *D) { VisitOMPExecutableDirective(D); @@ -5824,6 +5829,8 @@ CXString clang_getCursorKindSpelling(enum CXCursorKind Kind) { return cxstring::createRef("OMPBarrierDirective"); case CXCursor_OMPTaskwaitDirective: return cxstring::createRef("OMPTaskwaitDirective"); + case CXCursor_OMPErrorDirective: + return cxstring::createRef("OMPErrorDirective"); case CXCursor_OMPTaskgroupDirective: return cxstring::createRef("OMPTaskgroupDirective"); case CXCursor_OMPFlushDirective: diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp index 0013933303be8..16183432dba4f 100644 --- a/clang/tools/libclang/CXCursor.cpp +++ b/clang/tools/libclang/CXCursor.cpp @@ -717,6 +717,9 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent, case Stmt::OMPTaskwaitDirectiveClass: K = CXCursor_OMPTaskwaitDirective; break; + case Stmt::OMPErrorDirectiveClass: + K = CXCursor_OMPErrorDirective; + break; case Stmt::OMPTaskgroupDirectiveClass: K = CXCursor_OMPTaskgroupDirective; break; diff --git a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp index e36f207389e4c..8e0e27efae9e8 100644 --- a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp @@ -351,24 +351,27 @@ class SpecialBoolAnalysis final } } - bool compareEquivalent(QualType Type, const Value &Val1, - const Environment &Env1, const Value &Val2, - const Environment &Env2) override { + ComparisonResult compare(QualType Type, const Value &Val1, + const Environment &Env1, const Value &Val2, + const Environment &Env2) override { const auto *Decl = Type->getAsCXXRecordDecl(); if (Decl == nullptr || Decl->getIdentifier() == nullptr || Decl->getName() != "SpecialBool") - return false; + return ComparisonResult::Unknown; auto *IsSet1 = cast_or_null(Val1.getProperty("is_set")); + auto *IsSet2 = cast_or_null(Val2.getProperty("is_set")); if (IsSet1 == nullptr) - return true; + return IsSet2 == nullptr ? ComparisonResult::Same + : ComparisonResult::Different; - auto *IsSet2 = cast_or_null(Val2.getProperty("is_set")); if (IsSet2 == nullptr) - return false; + return ComparisonResult::Different; return Env1.flowConditionImplies(*IsSet1) == - Env2.flowConditionImplies(*IsSet2); + Env2.flowConditionImplies(*IsSet2) + ? ComparisonResult::Same + : ComparisonResult::Different; } // Always returns `true` to accept the `MergedVal`. @@ -509,18 +512,19 @@ class OptionalIntAnalysis final } } - bool compareEquivalent(QualType Type, const Value &Val1, - const Environment &Env1, const Value &Val2, - const Environment &Env2) override { + ComparisonResult compare(QualType Type, const Value &Val1, + const Environment &Env1, const Value &Val2, + const Environment &Env2) override { // Nothing to say about a value that does not model an `OptionalInt`. if (!Type->isRecordType() || Type->getAsCXXRecordDecl()->getQualifiedNameAsString() != "OptionalInt") - return false; + return ComparisonResult::Unknown; auto *Prop1 = Val1.getProperty("has_value"); auto *Prop2 = Val2.getProperty("has_value"); assert(Prop1 != nullptr && Prop2 != nullptr); - return areEquivalentValues(*Prop1, *Prop2); + return areEquivalentValues(*Prop1, *Prop2) ? ComparisonResult::Same + : ComparisonResult::Different; } bool merge(QualType Type, const Value &Val1, const Environment &Env1, @@ -1182,12 +1186,12 @@ class TopAnalysis final : public DataflowAnalysis { } } - bool compareEquivalent(QualType Type, const Value &Val1, - const Environment &Env1, const Value &Val2, - const Environment &Env2) override { + ComparisonResult compare(QualType Type, const Value &Val1, + const Environment &Env1, const Value &Val2, + const Environment &Env2) override { // Changes to a sound approximation, which allows us to test whether we can // (soundly) converge for some loops. - return false; + return ComparisonResult::Unknown; } }; diff --git a/clang/unittests/Driver/ToolChainTest.cpp b/clang/unittests/Driver/ToolChainTest.cpp index b143cd6329455..b45bab06d64b8 100644 --- a/clang/unittests/Driver/ToolChainTest.cpp +++ b/clang/unittests/Driver/ToolChainTest.cpp @@ -596,9 +596,10 @@ TEST(ToolChainTest, ConfigInexistentInclude) { ASSERT_TRUE(C); ASSERT_TRUE(C->containsError()); EXPECT_EQ(1U, DiagConsumer->Errors.size()); - EXPECT_STREQ("cannot read configuration file '" USERCONFIG - "': cannot not open file '" UNEXISTENT "'", - DiagConsumer->Errors[0].c_str()); + EXPECT_STRCASEEQ("cannot read configuration file '" USERCONFIG + "': cannot not open file '" UNEXISTENT + "': no such file or directory", + DiagConsumer->Errors[0].c_str()); } #undef USERCONFIG diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 18e79125d3894..acf172ea98d9d 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -6127,6 +6127,33 @@ TEST_F(FormatTest, LayoutStatementsAroundPreprocessorDirectives) { "#endif\n" " x;\n" "}"); + + verifyFormat("#if 0\n" + "#endif\n" + "#if X\n" + "int something_fairly_long; // Align here please\n" + "#endif // Should be aligned"); + + verifyFormat("#if 0\n" + "#endif\n" + "#if X\n" + "#else // Align\n" + ";\n" + "#endif // Align"); + + verifyFormat("void SomeFunction(int param1,\n" + " template <\n" + "#ifdef A\n" + "#if 0\n" + "#endif\n" + " MyType>\n" + "#else\n" + " Type1, Type2>\n" + "#endif\n" + " param2,\n" + " param3) {\n" + " f();\n" + "}"); } TEST_F(FormatTest, GraciouslyHandleIncorrectPreprocessorConditions) { @@ -25522,6 +25549,54 @@ TEST_F(FormatTest, ShortTemplatedArgumentLists) { verifyFormat("template struct Foo {};", Style); } +TEST_F(FormatTest, MultilineLambdaInConditional) { + auto Style = getLLVMStyleWithColumns(70); + verifyFormat("auto aLengthyIdentifier = oneExpressionSoThatWeBreak ? []() {\n" + " ;\n" + " return 5;\n" + "}()\n" + " : 2;", + Style); + verifyFormat( + "auto aLengthyIdentifier = oneExpressionSoThatWeBreak ? 2 : []() {\n" + " ;\n" + " return 5;\n" + "}();", + Style); + + Style = getLLVMStyleWithColumns(60); + verifyFormat("auto aLengthyIdentifier = oneExpressionSoThatWeBreak\n" + " ? []() {\n" + " ;\n" + " return 5;\n" + " }()\n" + " : 2;", + Style); + verifyFormat("auto aLengthyIdentifier =\n" + " oneExpressionSoThatWeBreak ? 2 : []() {\n" + " ;\n" + " return 5;\n" + " }();", + Style); + + Style = getLLVMStyleWithColumns(40); + verifyFormat("auto aLengthyIdentifier =\n" + " oneExpressionSoThatWeBreak ? []() {\n" + " ;\n" + " return 5;\n" + " }()\n" + " : 2;", + Style); + verifyFormat("auto aLengthyIdentifier =\n" + " oneExpressionSoThatWeBreak\n" + " ? 2\n" + " : []() {\n" + " ;\n" + " return 5;\n" + " };", + Style); +} + TEST_F(FormatTest, AlignAfterOpenBracketBlockIndent) { auto Style = getLLVMStyle(); diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index b4e27d35bc369..65ecb12c46cd7 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -145,6 +145,18 @@ TEST_F(TokenAnnotatorTest, UnderstandsUsesOfStarAndAmp) { EXPECT_TOKEN(Tokens[6], tok::l_paren, TT_FunctionTypeLParen); EXPECT_TOKEN(Tokens[7], tok::star, TT_UnaryOperator); EXPECT_TOKEN(Tokens[12], tok::star, TT_PointerOrReference); + + Tokens = annotate("if (Foo * Bar / Test)"); + ASSERT_EQ(Tokens.size(), 9u) << Tokens; + EXPECT_TOKEN(Tokens[3], tok::star, TT_BinaryOperator); + + Tokens = annotate("if (Class* obj {getObj()})"); + ASSERT_EQ(Tokens.size(), 12u) << Tokens; + EXPECT_TOKEN(Tokens[3], tok::star, TT_PointerOrReference); + + Tokens = annotate("if (Foo* Bar = getObj())"); + ASSERT_EQ(Tokens.size(), 11u) << Tokens; + EXPECT_TOKEN(Tokens[3], tok::star, TT_PointerOrReference); } TEST_F(TokenAnnotatorTest, UnderstandsUsesOfPlusAndMinus) { @@ -1124,6 +1136,35 @@ TEST_F(TokenAnnotatorTest, UnderstandsVerilogOperators) { EXPECT_TOKEN(Tokens[9], tok::colon, TT_GotoLabelColon); } +TEST_F(TokenAnnotatorTest, UnderstandConstructors) { + auto Tokens = annotate("Class::Class() : BaseClass(), Member() {}"); + + // The TT_Unknown is clearly not binding for the future, please adapt if those + // tokens get annotated. + ASSERT_EQ(Tokens.size(), 16u) << Tokens; + EXPECT_TOKEN(Tokens[5], tok::colon, TT_CtorInitializerColon); + EXPECT_TOKEN(Tokens[6], tok::identifier, TT_Unknown); + EXPECT_TOKEN(Tokens[7], tok::l_paren, TT_Unknown); + EXPECT_TOKEN(Tokens[8], tok::r_paren, TT_Unknown); + EXPECT_TOKEN(Tokens[9], tok::comma, TT_CtorInitializerComma); + EXPECT_TOKEN(Tokens[10], tok::identifier, TT_Unknown); + EXPECT_TOKEN(Tokens[11], tok::l_paren, TT_Unknown); + EXPECT_TOKEN(Tokens[12], tok::r_paren, TT_Unknown); + EXPECT_TOKEN(Tokens[13], tok::l_brace, TT_FunctionLBrace); + + Tokens = annotate("Class::Class() : BaseClass{}, Member{} {}"); + ASSERT_EQ(Tokens.size(), 16u) << Tokens; + EXPECT_TOKEN(Tokens[5], tok::colon, TT_CtorInitializerColon); + EXPECT_TOKEN(Tokens[6], tok::identifier, TT_Unknown); + EXPECT_TOKEN(Tokens[7], tok::l_brace, TT_Unknown); + EXPECT_TOKEN(Tokens[8], tok::r_brace, TT_Unknown); + EXPECT_TOKEN(Tokens[9], tok::comma, TT_CtorInitializerComma); + EXPECT_TOKEN(Tokens[10], tok::identifier, TT_Unknown); + EXPECT_TOKEN(Tokens[11], tok::l_brace, TT_Unknown); + EXPECT_TOKEN(Tokens[12], tok::r_brace, TT_Unknown); + EXPECT_TOKEN(Tokens[13], tok::l_brace, TT_FunctionLBrace); +} + } // namespace } // namespace format } // namespace clang diff --git a/clang/unittests/Lex/HeaderMapTest.cpp b/clang/unittests/Lex/HeaderMapTest.cpp index 4220edb2908e0..5484041844ea7 100644 --- a/clang/unittests/Lex/HeaderMapTest.cpp +++ b/clang/unittests/Lex/HeaderMapTest.cpp @@ -115,8 +115,7 @@ template struct PaddedFile { TEST(HeaderMapTest, lookupFilenameTruncatedSuffix) { typedef HMapFileMock<2, 64 - sizeof(HMapHeader) - 2 * sizeof(HMapBucket)> FileTy; - static_assert(std::is_standard_layout::value, - "Expected standard layout"); + static_assert(std::is_standard_layout_v, "Expected standard layout"); static_assert(sizeof(FileTy) == 64, "check the math"); PaddedFile P; auto &File = P.File; @@ -151,8 +150,7 @@ TEST(HeaderMapTest, lookupFilenameTruncatedSuffix) { TEST(HeaderMapTest, lookupFilenameTruncatedPrefix) { typedef HMapFileMock<2, 64 - sizeof(HMapHeader) - 2 * sizeof(HMapBucket)> FileTy; - static_assert(std::is_standard_layout::value, - "Expected standard layout"); + static_assert(std::is_standard_layout_v, "Expected standard layout"); static_assert(sizeof(FileTy) == 64, "check the math"); PaddedFile P; auto &File = P.File; diff --git a/clang/unittests/Tooling/ASTSelectionTest.cpp b/clang/unittests/Tooling/ASTSelectionTest.cpp index 88988ef447875..531f9ac89f441 100644 --- a/clang/unittests/Tooling/ASTSelectionTest.cpp +++ b/clang/unittests/Tooling/ASTSelectionTest.cpp @@ -101,22 +101,22 @@ void checkDeclName(const SelectedASTNode &Node, StringRef Name) { } template -const SelectedASTNode &checkNode( - const SelectedASTNode &StmtNode, SourceSelectionKind SelectionKind, - unsigned NumChildren = 0, - std::enable_if_t::value, T> *StmtOverloadChecker = - nullptr) { +const SelectedASTNode & +checkNode(const SelectedASTNode &StmtNode, SourceSelectionKind SelectionKind, + unsigned NumChildren = 0, + std::enable_if_t, T> *StmtOverloadChecker = + nullptr) { checkNodeImpl(isa(StmtNode.Node.get()), StmtNode, SelectionKind, NumChildren); return StmtNode; } template -const SelectedASTNode &checkNode( - const SelectedASTNode &DeclNode, SourceSelectionKind SelectionKind, - unsigned NumChildren = 0, StringRef Name = "", - std::enable_if_t::value, T> *DeclOverloadChecker = - nullptr) { +const SelectedASTNode & +checkNode(const SelectedASTNode &DeclNode, SourceSelectionKind SelectionKind, + unsigned NumChildren = 0, StringRef Name = "", + std::enable_if_t, T> *DeclOverloadChecker = + nullptr) { checkNodeImpl(isa(DeclNode.Node.get()), DeclNode, SelectionKind, NumChildren); if (!Name.empty()) diff --git a/clang/unittests/Tooling/Syntax/TreeTest.cpp b/clang/unittests/Tooling/Syntax/TreeTest.cpp index 712d2bd40fbbc..44cf42fa944a2 100644 --- a/clang/unittests/Tooling/Syntax/TreeTest.cpp +++ b/clang/unittests/Tooling/Syntax/TreeTest.cpp @@ -151,9 +151,8 @@ TEST_F(TreeTest, Iterators) { // FIXME: mutate and observe no invalidation. Mutations are private for now... auto It = Range.begin(); auto CIt = ConstRange.begin(); - static_assert(std::is_same::value, - "mutable range"); - static_assert(std::is_same::value, + static_assert(std::is_same_v, "mutable range"); + static_assert(std::is_same_v, "const range"); for (unsigned I = 0; I < 3; ++I) { diff --git a/clang/www/cxx_dr_status.html b/clang/www/cxx_dr_status.html index 24beb7fc88b65..2affb6cec8e4f 100755 --- a/clang/www/cxx_dr_status.html +++ b/clang/www/cxx_dr_status.html @@ -13956,7 +13956,7 @@

C++ defect report implementation status

2358 CD5 Explicit capture of value - Unknown + Clang 16 2359 diff --git a/cmake/Modules/FindLibEdit.cmake b/cmake/Modules/FindLibEdit.cmake index 7e62d4d839ae1..de8f5a2e71013 100644 --- a/cmake/Modules/FindLibEdit.cmake +++ b/cmake/Modules/FindLibEdit.cmake @@ -21,6 +21,7 @@ find_library(LibEdit_LIBRARIES NAMES edit HINTS ${PC_LIBEDIT_LIBRARY_DIRS}) include(CheckIncludeFile) if(LibEdit_INCLUDE_DIRS AND EXISTS "${LibEdit_INCLUDE_DIRS}/histedit.h") + include(CMakePushCheckState) cmake_push_check_state() list(APPEND CMAKE_REQUIRED_INCLUDES ${LibEdit_INCLUDE_DIRS}) list(APPEND CMAKE_REQUIRED_LIBRARIES ${LibEdit_LIBRARIES}) diff --git a/compiler-rt/cmake/Modules/CompilerRTDarwinUtils.cmake b/compiler-rt/cmake/Modules/CompilerRTDarwinUtils.cmake index e2506872751f9..e372da0d99ba0 100644 --- a/compiler-rt/cmake/Modules/CompilerRTDarwinUtils.cmake +++ b/compiler-rt/cmake/Modules/CompilerRTDarwinUtils.cmake @@ -116,7 +116,7 @@ function(darwin_test_archs os valid_archs) if(NOT TEST_COMPILE_ONLY) message(STATUS "Finding valid architectures for ${os}...") set(SIMPLE_C ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/src.c) - file(WRITE ${SIMPLE_C} "#include \nint main() { printf(__FILE__); return 0; }\n") + file(WRITE ${SIMPLE_C} "#include \nint main(void) { printf(__FILE__); return 0; }\n") set(os_linker_flags) foreach(flag ${DARWIN_${os}_LINK_FLAGS}) diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake index da86bdcdcf169..f6190ee60e3c3 100644 --- a/compiler-rt/cmake/config-ix.cmake +++ b/compiler-rt/cmake/config-ix.cmake @@ -224,7 +224,7 @@ set(COMPILER_RT_SUPPORTED_ARCH) # runtime libraries supported by our current compilers cross-compiling # abilities. set(SIMPLE_SOURCE ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/simple.cc) -file(WRITE ${SIMPLE_SOURCE} "#include \n#include \nint main() { printf(\"hello, world\"); }\n") +file(WRITE ${SIMPLE_SOURCE} "#include \n#include \nint main(void) { printf(\"hello, world\"); }\n") # Detect whether the current target platform is 32-bit or 64-bit, and setup # the correct commandline flags needed to attempt to target 32-bit and 64-bit. diff --git a/compiler-rt/lib/asan/asan_allocator.h b/compiler-rt/lib/asan/asan_allocator.h index 27d826fb613ae..0b4dbf03bb9d5 100644 --- a/compiler-rt/lib/asan/asan_allocator.h +++ b/compiler-rt/lib/asan/asan_allocator.h @@ -135,12 +135,6 @@ typedef VeryCompactSizeClassMap SizeClassMap; const uptr kAllocatorSpace = ~(uptr)0; const uptr kAllocatorSize = 0x2000000000ULL; // 128G. typedef VeryDenseSizeClassMap SizeClassMap; -# elif defined(__aarch64__) -// AArch64/SANITIZER_CAN_USE_ALLOCATOR64 is only for 42-bit VMA -// so no need to different values for different VMA. -const uptr kAllocatorSpace = 0x10000000000ULL; -const uptr kAllocatorSize = 0x10000000000ULL; // 3T. -typedef DefaultSizeClassMap SizeClassMap; #elif defined(__sparc__) const uptr kAllocatorSpace = ~(uptr)0; const uptr kAllocatorSize = 0x20000000000ULL; // 2T. diff --git a/compiler-rt/lib/asan/asan_interceptors.h b/compiler-rt/lib/asan/asan_interceptors.h index 35727a96497dc..c4bf087ea17f0 100644 --- a/compiler-rt/lib/asan/asan_interceptors.h +++ b/compiler-rt/lib/asan/asan_interceptors.h @@ -114,7 +114,7 @@ void InitializePlatformInterceptors(); #if SANITIZER_LINUX && \ (defined(__arm__) || defined(__aarch64__) || defined(__i386__) || \ - defined(__x86_64__) || SANITIZER_RISCV64) + defined(__x86_64__) || SANITIZER_RISCV64 || SANITIZER_LOONGARCH64) # define ASAN_INTERCEPT_VFORK 1 #else # define ASAN_INTERCEPT_VFORK 0 diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt index fd3d3956439d2..42015ef8f36d6 100644 --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -755,7 +755,7 @@ else () SOURCE "#if !(__ARM_FP & 0x8) #error No double-precision support! #endif - int main() { return 0; }") + int main(void) { return 0; }") if(NOT COMPILER_RT_HAS_${arch}_VFP_DP) list(REMOVE_ITEM ${arch}_SOURCES ${arm_Thumb1_VFPv2_DP_SOURCES}) endif() diff --git a/compiler-rt/lib/builtins/apple_versioning.c b/compiler-rt/lib/builtins/apple_versioning.c index f87b42820c154..83d419418f241 100644 --- a/compiler-rt/lib/builtins/apple_versioning.c +++ b/compiler-rt/lib/builtins/apple_versioning.c @@ -138,13 +138,13 @@ NOT_HERE_BEFORE_10_6(__udivti3) NOT_HERE_BEFORE_10_6(__umoddi3) NOT_HERE_BEFORE_10_6(__umodti3) -#if __ppc__ +#if __powerpc__ NOT_HERE_BEFORE_10_6(__gcc_qadd) NOT_HERE_BEFORE_10_6(__gcc_qdiv) NOT_HERE_BEFORE_10_6(__gcc_qmul) NOT_HERE_BEFORE_10_6(__gcc_qsub) NOT_HERE_BEFORE_10_6(__trampoline_setup) -#endif // __ppc__ +#endif // __powerpc__ NOT_HERE_IN_10_8_AND_EARLIER(__atomic_compare_exchange) NOT_HERE_IN_10_8_AND_EARLIER(__atomic_compare_exchange_1) diff --git a/compiler-rt/lib/builtins/clear_cache.c b/compiler-rt/lib/builtins/clear_cache.c index bcc5922e073b3..8993761eb3d42 100644 --- a/compiler-rt/lib/builtins/clear_cache.c +++ b/compiler-rt/lib/builtins/clear_cache.c @@ -93,12 +93,29 @@ void __clear_cache(void *start, void *end) { #endif #elif defined(__linux__) && defined(__loongarch__) __asm__ volatile("ibar 0"); -#elif defined(__linux__) && defined(__mips__) +#elif defined(__mips__) const uintptr_t start_int = (uintptr_t)start; const uintptr_t end_int = (uintptr_t)end; - syscall(__NR_cacheflush, start, (end_int - start_int), BCACHE); -#elif defined(__mips__) && defined(__OpenBSD__) - cacheflush(start, (uintptr_t)end - (uintptr_t)start, BCACHE); + uintptr_t synci_step; + __asm__ volatile("rdhwr %0, $1" : "=r"(synci_step)); + if (synci_step != 0) { +#if __mips_isa_rev >= 6 + for (uintptr_t p = start_int; p < end_int; p += synci_step) + __asm__ volatile("synci 0(%0)" : : "r"(p)); + + // The last "move $at, $0" is the target of jr.hb instead of delay slot. + __asm__ volatile(".set noat\n" + "sync\n" + "addiupc $at, 12\n" + "jr.hb $at\n" + "move $at, $0\n" + ".set at"); +#else + // Pre-R6 may not be globalized. And some implementations may give strange + // synci_step. So, let's use libc call for it. + cacheflush(start, end_int - start_int, BCACHE); +#endif + } #elif defined(__aarch64__) && !defined(__APPLE__) uint64_t xstart = (uint64_t)(uintptr_t)start; uint64_t xend = (uint64_t)(uintptr_t)end; diff --git a/compiler-rt/lib/builtins/int_types.h b/compiler-rt/lib/builtins/int_types.h index 7a72de4806764..e94d3154c6d4e 100644 --- a/compiler-rt/lib/builtins/int_types.h +++ b/compiler-rt/lib/builtins/int_types.h @@ -64,7 +64,7 @@ typedef union { } udwords; #if defined(__LP64__) || defined(__wasm__) || defined(__mips64) || \ - defined(__riscv) || defined(_WIN64) + defined(__SIZEOF_INT128__) || defined(_WIN64) #define CRT_HAS_128BIT #endif diff --git a/compiler-rt/lib/builtins/trampoline_setup.c b/compiler-rt/lib/builtins/trampoline_setup.c index a62431723d787..844eb27944142 100644 --- a/compiler-rt/lib/builtins/trampoline_setup.c +++ b/compiler-rt/lib/builtins/trampoline_setup.c @@ -16,7 +16,7 @@ extern void __clear_cache(void *start, void *end); // which loads r11 with a pointer to the outer function's locals // and then jumps to the target nested function. -#if __ppc__ && !defined(__powerpc64__) +#if __powerpc__ && !defined(__powerpc64__) COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack, int trampSizeAllocated, const void *realFunc, void *localsPtr) { @@ -40,4 +40,4 @@ COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack, // clear instruction cache __clear_cache(trampOnStack, &trampOnStack[10]); } -#endif // __ppc__ && !defined(__powerpc64__) +#endif // __powerpc__ && !defined(__powerpc64__) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_loongarch64.inc.S b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_loongarch64.inc.S index 05192485d5971..68782acb379d1 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_loongarch64.inc.S +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_vfork_loongarch64.inc.S @@ -5,12 +5,6 @@ ASM_HIDDEN(COMMON_INTERCEPTOR_SPILL_AREA) ASM_HIDDEN(_ZN14__interception10real_vforkE) -.bss -.type _ZN14__interception10real_vforkE, @object -.size _ZN14__interception10real_vforkE, 8 -_ZN14__interception10real_vforkE: - .zero 8 - .text .globl ASM_WRAPPER_NAME(vfork) ASM_TYPE_FUNCTION(ASM_WRAPPER_NAME(vfork)) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform.h index 32005eef08cd5..7ecc465bea97a 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform.h @@ -286,8 +286,8 @@ #ifndef SANITIZER_CAN_USE_ALLOCATOR64 # if (SANITIZER_ANDROID && defined(__aarch64__)) || SANITIZER_FUCHSIA # define SANITIZER_CAN_USE_ALLOCATOR64 1 -# elif defined(__mips64) || defined(__aarch64__) || defined(__i386__) || \ - defined(__arm__) || SANITIZER_RISCV64 || defined(__hexagon__) +# elif defined(__mips64) || defined(__arm__) || defined(__i386__) || \ + SANITIZER_RISCV64 || defined(__hexagon__) # define SANITIZER_CAN_USE_ALLOCATOR64 0 # else # define SANITIZER_CAN_USE_ALLOCATOR64 (SANITIZER_WORDSIZE == 64) diff --git a/compiler-rt/lib/tsan/rtl/CMakeLists.txt b/compiler-rt/lib/tsan/rtl/CMakeLists.txt index 0a12cb7021f69..84747a552e79f 100644 --- a/compiler-rt/lib/tsan/rtl/CMakeLists.txt +++ b/compiler-rt/lib/tsan/rtl/CMakeLists.txt @@ -6,6 +6,8 @@ append_list_if(SANITIZER_LIMIT_FRAME_SIZE -Wframe-larger-than=530 TSAN_RTL_CFLAGS) append_list_if(COMPILER_RT_HAS_WGLOBAL_CONSTRUCTORS_FLAG -Wglobal-constructors TSAN_RTL_CFLAGS) +append_list_if(COMPILER_RT_INTERCEPT_LIBDISPATCH ${COMPILER_RT_LIBDISPATCH_CFLAGS} + TSAN_RTL_CFLAGS) set(TSAN_RTL_DYNAMIC_CFLAGS ${TSAN_RTL_CFLAGS}) list(REMOVE_ITEM TSAN_RTL_DYNAMIC_CFLAGS -fPIE) @@ -75,7 +77,6 @@ if(COMPILER_RT_INTERCEPT_LIBDISPATCH) list(APPEND TSAN_SOURCES tsan_interceptors_libdispatch.cpp ) - list(APPEND TSAN_RTL_CFLAGS ${COMPILER_RT_LIBDISPATCH_CFLAGS}) endif() set(TSAN_HEADERS diff --git a/compiler-rt/test/asan/TestCases/Linux/odr-violation.cpp b/compiler-rt/test/asan/TestCases/Linux/odr-violation.cpp index 1c8ea99430df2..a86e032ecef6f 100644 --- a/compiler-rt/test/asan/TestCases/Linux/odr-violation.cpp +++ b/compiler-rt/test/asan/TestCases/Linux/odr-violation.cpp @@ -6,16 +6,19 @@ // We use fast_unwind_on_malloc=0 to have full unwinding even w/o frame // pointers. This setting is not on by default because it's too expensive. // +// Note, -asan-use-private-alias=1 -asan-use-odr-indicator=1 is the default. +// -fno-sanitize-address-use-odr-indicator turns off both. +// // Different size: detect a bug if detect_odr_violation>=1 -// RUN: %clangxx_asan -g -DBUILD_SO=1 -fPIC -shared %s -o %dynamiclib -// RUN: %clangxx_asan -g %s %ld_flags_rpath_exe -o %t-ODR-EXE +// RUN: %clangxx_asan -g -DBUILD_SO=1 -fPIC -shared -fno-sanitize-address-use-odr-indicator %s -o %dynamiclib +// RUN: %clangxx_asan -g -fno-sanitize-address-use-odr-indicator %s %ld_flags_rpath_exe -o %t-ODR-EXE // RUN: %env_asan_opts=fast_unwind_on_malloc=0:detect_odr_violation=1 not %run %t-ODR-EXE 2>&1 | FileCheck %s // RUN: %env_asan_opts=fast_unwind_on_malloc=0:detect_odr_violation=2 not %run %t-ODR-EXE 2>&1 | FileCheck %s // RUN: %env_asan_opts=fast_unwind_on_malloc=0:detect_odr_violation=0 %run %t-ODR-EXE 2>&1 | FileCheck %s --check-prefix=DISABLED // RUN: %env_asan_opts=fast_unwind_on_malloc=0 not %run %t-ODR-EXE 2>&1 | FileCheck %s // // Same size: report a bug only if detect_odr_violation>=2. -// RUN: %clangxx_asan -g -DBUILD_SO=1 -fPIC -shared %s -o %dynamiclib -DSZ=100 +// RUN: %clangxx_asan -g -DBUILD_SO=1 -fPIC -shared -fno-sanitize-address-use-odr-indicator %s -o %dynamiclib -DSZ=100 // RUN: %env_asan_opts=fast_unwind_on_malloc=0:detect_odr_violation=1 %run %t-ODR-EXE 2>&1 | FileCheck %s --check-prefix=DISABLED // RUN: %env_asan_opts=fast_unwind_on_malloc=0:detect_odr_violation=2 not %run %t-ODR-EXE 2>&1 | FileCheck %s // RUN: %env_asan_opts=fast_unwind_on_malloc=0 not %run %t-ODR-EXE 2>&1 | FileCheck %s @@ -26,13 +29,13 @@ // RUN: rm -f %t.supp // // Use private aliases for global variables without indicator symbol. -// RUN: %clangxx_asan -g -DBUILD_SO=1 -fPIC -shared -mllvm -asan-use-private-alias %s -o %dynamiclib -DSZ=100 -// RUN: %clangxx_asan -g -mllvm -asan-use-private-alias %s %ld_flags_rpath_exe -o %t-ODR-EXE +// RUN: %clangxx_asan -g -DBUILD_SO=1 -fPIC -shared -mllvm -asan-use-odr-indicator=0 %s -o %dynamiclib -DSZ=100 +// RUN: %clangxx_asan -g -mllvm -asan-use-odr-indicator=0 %s %ld_flags_rpath_exe -o %t-ODR-EXE // RUN: %env_asan_opts=fast_unwind_on_malloc=0 %run %t-ODR-EXE 2>&1 | FileCheck %s --check-prefix=DISABLED // Use private aliases for global variables: use indicator symbol to detect ODR violation. -// RUN: %clangxx_asan -g -DBUILD_SO=1 -fPIC -shared -mllvm -asan-use-private-alias -mllvm -asan-use-odr-indicator %s -o %dynamiclib -DSZ=100 -// RUN: %clangxx_asan -g -mllvm -asan-use-private-alias -mllvm -asan-use-odr-indicator %s %ld_flags_rpath_exe -o %t-ODR-EXE +// RUN: %clangxx_asan -g -DBUILD_SO=1 -fPIC -shared %s -o %dynamiclib -DSZ=100 +// RUN: %clangxx_asan -g %s %ld_flags_rpath_exe -o %t-ODR-EXE // RUN: %env_asan_opts=fast_unwind_on_malloc=0 not %run %t-ODR-EXE 2>&1 | FileCheck %s // Same as above but with clang switches. diff --git a/compiler-rt/test/asan/TestCases/Linux/odr_indicators.cpp b/compiler-rt/test/asan/TestCases/Linux/odr_indicators.cpp index 36176b552906d..583f6e662fda8 100644 --- a/compiler-rt/test/asan/TestCases/Linux/odr_indicators.cpp +++ b/compiler-rt/test/asan/TestCases/Linux/odr_indicators.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx_asan -fPIC %s -o %t +// RUN: %clangxx_asan -fno-sanitize-address-use-odr-indicator -fPIC %s -o %t // RUN: %env_asan_opts=report_globals=2 %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK,INDICATOR0 // RUN: %clangxx_asan -fsanitize-address-use-odr-indicator -fPIC %s -o %t diff --git a/compiler-rt/test/asan/TestCases/Linux/vfork.cpp b/compiler-rt/test/asan/TestCases/Linux/vfork.cpp index 4c0f02c5088e4..b943e4debce2c 100644 --- a/compiler-rt/test/asan/TestCases/Linux/vfork.cpp +++ b/compiler-rt/test/asan/TestCases/Linux/vfork.cpp @@ -1,7 +1,7 @@ // https://github.com/google/sanitizers/issues/925 // RUN: %clang_asan -O0 %s -o %t && %run %t 2>&1 -// REQUIRES: aarch64-target-arch || x86_64-target-arch || i386-target-arch || arm-target-arch || riscv64-target-arch +// REQUIRES: aarch64-target-arch || x86_64-target-arch || i386-target-arch || arm-target-arch || riscv64-target-arch || loongarch64-target-arch #include #include diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index eff8560c65ba9..943c34c1ad4fe 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -170,7 +170,7 @@ if (FLANG_STANDALONE_BUILD) find_package(Threads) target_link_libraries(llvm_gtest PUBLIC Threads::Threads) add_library(llvm_gtest_main ${UNITTEST_DIR}/UnitTestMain/TestMain.cpp) - target_link_libraries(gtest_main PUBLIC llvm_gtest) + target_link_libraries(llvm_gtest_main PUBLIC llvm_gtest) endif() set(FLANG_GTEST_AVAIL 1) else() @@ -200,8 +200,8 @@ else() set(FLANG_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) endif() - set(MLIR_MAIN_SRC_DIR ${LLVM_MAIN_SRC_DIR}/../mlir/include ) # --src-root - set(MLIR_INCLUDE_DIR ${LLVM_MAIN_SRC_DIR}/../mlir/include ) # --includedir + set(MLIR_MAIN_SRC_DIR ${LLVM_MAIN_SRC_DIR}/../mlir ) # --src-root + set(MLIR_INCLUDE_DIR ${MLIR_MAIN_SRC_DIR}/include ) # --includedir set(MLIR_TABLEGEN_OUTPUT_DIR ${CMAKE_BINARY_DIR}/tools/mlir/include) include_directories(SYSTEM ${MLIR_INCLUDE_DIR}) include_directories(SYSTEM ${MLIR_TABLEGEN_OUTPUT_DIR}) @@ -411,6 +411,9 @@ endif() include(CMakeParseArguments) include(AddFlang) +if (FLANG_INCLUDE_TESTS) + add_compile_definitions(FLANG_INCLUDE_TESTS=1) +endif() add_subdirectory(include) add_subdirectory(lib) diff --git a/flang/docs/ComplexOperations.md b/flang/docs/ComplexOperations.md new file mode 100644 index 0000000000000..6faa1811fd6d0 --- /dev/null +++ b/flang/docs/ComplexOperations.md @@ -0,0 +1,76 @@ +# Complex Operations + +```eval_rst +.. contents:: + :local: +``` + +Fortran includes support for complex number types and a set of operators and +intrinsics that work on these types. Some of those operations are complicated +and require runtime function calls to implement. + +This document outlines a design for generating these operations using the MLIR +complex dialect while avoiding cross-platform ABI issues. + +## FIR Representation + +MLIR contains a complex dialect, similar to the Math dialect also used for +lowering some integer and floating point operations in Flang. Conversion between +fir.complex types and MLIR complex types is supported. + +As a result at the FIR level, complex operations can be represented as +conversions from the fir.complex type to the equivalent MLIR complex type, use +of the MLIR operation and a conversion back. + +This is similar to the way the math intrinsics are lowered, as proposed [here][1] + +**Fortran** +```fortran +function pow_self(c) + complex, intent(in) :: c + complex :: pow_self + pow_self = c ** c +end function pow_self +``` + +**FIR** +```c +func.func @_QPpow_self(%arg0: !fir.ref>) -> !fir.complex<4> { + %0 = fir.alloca !fir.complex<4> + %1 = fir.load %arg0 : !fir.ref> + %2 = fir.load %arg0 : !fir.ref> + %3 = fir.convert %1 : (!fir.complex<4>) -> complex + %4 = fir.convert %2 : (!fir.complex<4>) -> complex + %5 = complex.pow %3, %4 : complex + %6 = fir.convert %5 : (complex) -> !fir.complex<4> + fir.store %6 to %0 : !fir.ref> + %7 = fir.load %0 : !fir.ref> + return %7 : !fir.complex<4> + } +``` + +Some operations are currently missing in the MLIR complex dialect that we would +want to use here, such as powi and the hyperbolic trigonometry functions. +For the missing operations we call directly to libm where possible, for powi +we provide an implementation in the flang runtime. + +## Lowering + +The MLIR complex dialect supports lowering either by emitting calls to the +complex functions in libm (ComplexToLibm), or through lowering to the standard +dialect (ComplexToStandard). However, as MLIR has no target awareness, the +lowering to libm functions suffers from ABI incompatibilities on some platforms. +As such the custom lowering to the standard dialect is used. This may be +something to revisit in future if performance could be improved by using the +libm functions. + +Similarly to the numerical lowering through the math dialect, certain MLIR +optimisations could violate the precise floating point model, so when that is +requested lowering manually emits calls to libm, rather than going through the +MLIR complex dialect. + +The ComplexToStandard dialect does still call into libm for some floating +point math operations, however these don't have the same ABI issues as the +complex libm functions. + +[1]: https://discourse.llvm.org/t/rfc-change-lowering-of-fortran-math-intrinsics/63971 diff --git a/flang/docs/Intrinsics.md b/flang/docs/Intrinsics.md index baa0609c7ccaa..2af87f6adc84b 100644 --- a/flang/docs/Intrinsics.md +++ b/flang/docs/Intrinsics.md @@ -751,7 +751,7 @@ This phase currently supports all the intrinsic procedures listed above but the | Type inquiry intrinsic functions | BIT_SIZE, DIGITS, EPSILON, HUGE, KIND, MAXEXPONENT, MINEXPONENT, NEW_LINE, PRECISION, RADIX, RANGE, TINY| | Non-standard intrinsic functions | AND, OR, XOR, LSHIFT, RSHIFT, SHIFT, ZEXT, IZEXT, COSD, SIND, TAND, ACOSD, ASIND, ATAND, ATAN2D, COMPL, DCMPLX, EQV, NEQV, INT8, JINT, JNINT, KNINT, LOC, QCMPLX, DREAL, DFLOAT, QEXT, QFLOAT, QREAL, DNUM, NUM, JNUM, KNUM, QNUM, RNUM, RAN, RANF, ILEN, SIZEOF, MCLOCK, SECNDS, COTAN, IBCHNG, ISHA, ISHC, ISHL, IXOR, IARG, IARGC, NARGS, NUMARG, BADDRESS, IADDR, CACHESIZE, EOF, FP_CLASS, INT_PTR_KIND, ISNAN, MALLOC | | Intrinsic subroutines |MVBITS (elemental), CPU_TIME, DATE_AND_TIME, EVENT_QUERY, EXECUTE_COMMAND_LINE, GET_COMMAND, GET_COMMAND_ARGUMENT, GET_ENVIRONMENT_VARIABLE, MOVE_ALLOC, RANDOM_INIT, RANDOM_NUMBER, RANDOM_SEED, SYSTEM_CLOCK | -| Atomic intrinsic subroutines | ATOMIC_ADD &al. | +| Atomic intrinsic subroutines | ATOMIC_ADD | | Collective intrinsic subroutines | CO_REDUCE | diff --git a/flang/docs/ProcedurePointer.md b/flang/docs/ProcedurePointer.md new file mode 100644 index 0000000000000..157d387c37094 --- /dev/null +++ b/flang/docs/ProcedurePointer.md @@ -0,0 +1,486 @@ + + +# Procedure Pointer + +A procedure pointer is a procedure that has the EXTERNAL and POINTER attributes. + +This document summarizes what of context the procedure pointers should appear, +and how they are lowered to FIR. + +The current plan is to use/extend the `BoxedProcedure` pass for the conversion +to LLVM IR, and thus will not be lowering the procedure-pointer-related +operations to LLVM IR in `CodeGen.cpp`. + +## Fortran standard + +Here is a list of the sections and constraints of the Fortran standard involved +for procedure pointers. + +- 8.5.4 Components + - C757 + - C758 + - C759 +- 8.5.9: EXTERNAL attribute +- 8.5.14: POINTER attribute + - C853 + - A procedure pointer shall not be referenced unless it is pointer associated + with a target procedure. +- 8.5.15 PROTECTED attribute + - C855 +- 8.5.16 SAVE attribute + - (4) A procedure pointer declared in the scoping unit of a main program, + module, or submodule implicitly has the SAVE attribute. +- 8.10.2.1 COMMON statement + - C8119 +- 10.2.2.2 Pointer assignment statement + - C1028 + - C1029 +- 10.2.2.4 Procedure pointer assignment +- 11.1.3 ASSOCIATE construct + - C1005 +- 12.6.3 Data transfer input/output list + - C1233 +- 15.2.2.4 Procedure pointers + - A procedure pointer may be pointer associated with an external procedure, an + internal procedure, an intrinsic procedure, a module procedure, or a dummy + procedure that is not a procedure pointer. +- 15.4.3.6 Procedure declaration statement +- 15.5.2.9(5) Actual arguments associated with dummy procedure entities +- 16.9.16 ASSOCIATED(POINTER [, TARGET]) + - POINTER may be a procedure pointer, and TARGET may be proc-target in a + pointer assignment statement (10.2.2). +- 16.9.144 NULL([MOLD]) + - MOLD may be a procedure pointer. +- 18.2.3.4 C_F_PROCPOINTER(CPTR, FPTR) + - FPTR shall be a procedure pointer, and not be a component of a coindexed + object. +- C.1.1 A procedure that is not a procedure pointer can be an actual argument + that corresponds to a procedure pointer dummy argument with the INTENT(IN) + attribute. + +--- + +## Representation in FIR + +### Procedure pointer `!fir.ref>` + +A procedure pointer may have an explicit or implicit interface. T in +`!fir.ref>` is the function type, which is `() -> ()` if the +procedure pointer has the implicit interface declared as +`procedure(), pointer :: p`. + +A procedure declaration statement specifies EXTERNAL attribute (8.5.9) for all +entities for all entities in the procedure declaration list. + +### Actual arguments associated with dummy procedure entities + +The actual argument may be a procedure pointer, a valid target for the dummy +pointer, a reference to the NULL() intrinsic, or a reference to a function that +returns a procedure pointer. + +If the interface is explicit, and the dummy argument is procedure pointer, the +reference is resolved as the pointer to the procedure; otherwise, the reference +is resolved as the pointer target. + +**Fortran case 1** +```fortran +subroutine proc_pointer_dummy_argument(p) + interface + function func(x) + integer :: x + end function func + end interface + procedure(func), pointer :: p + call foo1(p) + call foo2(p) +contains + subroutine foo2(q) + interface + function func(x) + integer :: x + end function func + end interface + procedure(func), pointer :: q + end subroutine foo2 +end subroutine proc_pointer_dummy_argument +``` + +**FIR for case 1** +``` +func.func private @foo1(!fir.boxproc<(!fir.ref) -> !fir.ref>) +func.func private @foo2(!fir.ref) -> !fir.ref>>) + +func.func @proc_pointer_dummy_argument(%0 : !fir.ref) -> !fir.ref>>) { + %1 = fir.load %0 : !fir.ref) -> !fir.ref>> + fir.call @foo1(%1) : ((!fir.ref) -> !fir.ref) -> () + fir.call @foo2(%0) : (!fir.ref) -> !fir.ref>>) -> () + return +} +``` + +**Fortran case 2** +```fortran +subroutine proc_pointer_global() + interface + function func(x) + integer :: x + end function func + end interface + procedure(func), pointer, save :: p + call foo1(p) + call foo2(p) +contains + subroutine foo2(q) + interface + function func(x) + integer :: x + end function func + end interface + procedure(func), pointer :: q + end subroutine foo2 +end subroutine proc_pointer_global +``` + +**FIR for case 2** +``` +func.func private @foo1(!fir.boxproc<(!fir.ref) -> !fir.ref>) +func.func private @foo2(!fir.ref) -> !fir.ref>>) + +fir.global internal @ProcedurePointer : !fir.boxproc<(!fir.ref) -> !fir.ref> { + %0 = fir.zero_bits (!fir.ref) -> !fir.ref + %1 = fir.emboxproc %0 : ((!fir.ref) -> !fir.ref) -> !fir.boxproc<(!fir.ref) -> !fir.ref> + fir.has_value %1 : !fir.boxproc<(!fir.ref) -> !fir.ref> +} + +func.func @proc_pointer_global() { + %0 = fir.address_of(@ProcedurePointer) : !fir.ref) -> !fir.ref>> + %1 = fir.load %0 : !fir.ref) -> !fir.ref>> + fir.call @foo1(%1) : (!fir.boxproc<(!fir.ref) -> !fir.ref>) -> () + fir.call @foo2(%0) : (!fir.ref) -> !fir.ref>>) -> () + return +} +``` + +**Fortran case 3** +```fortran +subroutine proc_pointer_local() + interface + function func(x) + integer :: x + end function func + end interface + procedure(func), pointer :: p + call foo1(p) + call foo2(p) +contains + subroutine foo2(q) + interface + function func(x) + integer :: x + end function func + end interface + procedure(func), pointer :: q + end subroutine foo2 +end subroutine proc_pointer_local +``` + +**FIR for case 3** +``` +func.func private @foo1(!fir.boxproc<(!fir.ref) -> !fir.ref>) +func.func private @foo2(!fir.ref) -> !fir.ref>>) + +func.func @proc_pointer_local() { + %0 = fir.alloca !fir.boxproc<(!fir.ref) -> !fir.ref> + %1 = fir.load %0 : !fir.ref) -> !fir.ref>> + %2 = fir.box_addr %1 : (!fir.boxproc<(!fir.ref) -> !fir.ref>) -> ((!fir.ref) -> !fir.ref) + %3 = fir.zero_bits (!fir.ref) -> !fir.ref + fir.store %3 to %2 : !fir.ref<(!fir.ref) -> !fir.ref> + %4 = fir.load %0 : !fir.ref) -> !fir.ref>> + fir.call @foo1(%4) : (!fir.boxproc<(!fir.ref) -> !fir.ref>) -> () + fir.call @foo2(%0) : (!fir.ref) -> !fir.ref>>) -> () + return +} +``` + +It is possible to pass procedure pointers to a C function. If the C function has +an explicit interface in fortran code, and the dummy argument is a procedure +pointer, the code passes a pointer to the procedure as the actual argument +(see Case 5); Otherwise, the code passes the procedure pointer target as the +actual argument (see Case 4). + +**Case 4** +```c +void func_(void (*foo)(int *)) { + int *x, y = 1; + x = &y; + foo(x); +} +``` +```fortran +program main + procedure(), pointer :: pp + pp=>print_x + call func(pp) +contains + subroutine print_x(x) + integer :: x + print *, x + end +end +``` + +Note that the internal procedure is not one good usage, but it works in +implementation. It is better to use BIND(C) external or module procedure as +right-hand side proc-target. + +**Case 5** +```c +void func_(void (**foo)(int *)) { + int *x, y = 1; + x = &y; + (*foo)(x); +} +``` +```fortran +program main + interface + subroutine func(p) + procedure(), pointer :: p + end + end interface + procedure(), pointer :: pp + pp=>print_x + call func(pp) +contains + subroutine print_x(x) + integer :: x + print *, x + end +end +``` + +Case 4 and Case 5 are not recommended from Fortran 2003 standard, which provides +the feature of interoperability with C to handle this. Specifically, +C_F_PROCPOINTER is used to associate a procedure pointer with the target of a C +function pointer. C_FUNPTR is also designed for interoperability with any C +function pointer type. + +### Procedure pointer to function returning a character type + +The dummy procedure pointer may not have a function type with an assumed length +due to C721 and C723. + +### Procedure pointer to internal procedure + +Initially the current plan is to implement pointers to internal procedures +using the LLVM Trampoline intrinsics. This has the drawback of requiring the +stack to be executable, which is a security hole. To avoid this, we will need +improve the implementation to use heap-resident thunks. + +### Procedure pointer assignment `p => proc` + +The right-hand side may be a procedure, a procedure pointer, or a function whose +result is a procedure pointer. + +The procedure could be a BIND(C) procedure. The lowering of it is the same as +that of an external or module procedure. The case of internal procedure has been +discussed above. + +```c +#include +void func_(int *x) { + printf("%d\n", *x); +} +``` +```fortran +program main + interface + subroutine func(x) bind(C) + integer :: x + end + end interface + procedure(func), bind(C, name="func_") :: proc + procedure(func), pointer :: pp + integer :: x = 5 + pp=>proc + call pp(x) +end +``` + +**Fortran case** +```fortran +subroutine proc_pointer_assignment(arg0, arg1) + interface + function func(x) + integer :: x + end + end interface + procedure(func), pointer :: arg0, arg1 + real, external, bind(C, name="Procedure") :: proc + arg0=>proc ! case 1 + arg0=>arg1 ! case 2 + arg0=>reffunc ! case 3 +contains + function reffunc() result(pp) + interface + function func(x) + integer :: x + end + end interface + procedure(func), pointer :: pp + end +end +function proc(x) bind(C, name="Procedure") + integer :: x + proc = real(x) +end +``` + +**FIR** +``` +func.func @Procedure(%arg0 : !fir.ref) -> !fir.ref { + %1 = fir.load %arg0 : !fir.ref + %2 = fir.convert %1 : (i32) -> f32 + return %2 : f32 +} + +func.func @Reference2Function() -> !fir.boxproc<(!fir.ref) -> !fir.ref> { + %0 = fir.alloca !fir.boxproc<(!fir.ref) -> !fir.ref> + %1 = fir.load %0 : !fir.ref) -> !fir.ref>> + return %1 : !fir.boxproc<(!fir.ref) -> !fir.ref> +} + +func.func @proc_pointer_assignment(%arg0 : !fir.ref) -> !fir.ref>>, %arg1 : !fir.ref) -> !fir.ref>>) { + %0 = fir.alloca !fir.boxproc<(!fir.ref) -> !fir.ref> {bindc_name = ".result"} + // case 1: assignment from external procedure + %1 = fir.address_of(@Procedure) : (!fir.ref) -> !fir.ref + %2 = fir.emboxproc %1 : ((!fir.ref) -> !fir.ref) -> !fir.boxproc<(!fir.ref) -> !fir.ref> + fir.store %2 to %arg0 : !fir.ref) -> !fir.ref>> + // case2: assignment from procdure pointer + %3 = fir.load %arg1 : !fir.ref) -> !fir.ref>> + fir.store %3 to %arg0 : !fir.ref) -> !fir.ref>> + // case3: assignment from a reference to a function whose result is a procedure pointer + %4 = fir.call @Reference2Function() : () -> !fir.boxproc<(!fir.ref) -> !fir.ref> + fir.store %4 to %0 : !fir.ref) -> !fir.ref>> + %5 = fir.load %0 : !fir.ref) -> !fir.ref>> + fir.store %5 to %arg0 : !fir.ref) -> !fir.ref>> + return +} +``` + +### Procedure pointer components + +Having procedure pointers in derived types permits `methods` to be dynamically +bound to objects. Such procedure pointer components will have the type +!fir.boxproc. + +**Fortran** +```fortran +subroutine proc_pointer_component(a, i, f) + interface + function func(x) + integer :: x + end + end interface + type matrix + real :: element(2,2) + procedure(func), pointer, nopass :: solve + end type + integer :: i + procedure(func) :: f + type(matrix) :: a + a%solve=>f + r = a%solve(i) +end subroutine proc_pointer_component +``` + +**FIR** +``` +func.func @proc_pointer_component(%arg0 : (!fir.ref) -> !fir.ref, %arg1: !fir.ref) { + %0 = fir.alloca !fir.type<_QFtestTmatrix{element:!fir.array<2x2xf32>,solve:!fir.boxproc<() -> ()>}> + %1 = fir.field_index solve, !fir.type<_QFtestTmatrix{element:!fir.array<2x2xf32>,solve:!fir.boxproc<() -> ()>}> + %2 = fir.coordinate_of %0, %1 : (!fir.ref,solve:!fir.boxproc<() -> ()>}>>, !fir.field) -> !fir.ref ()>> + %3 = fir.emboxproc %arg0 : ((!fir.ref) -> !fir.ref) -> !fir.boxproc<(!fir.ref) -> !fir.ref> + %4 = fir.convert %3 : (!fir.boxproc<(!fir.ref) -> !fir.ref>) -> !fir.boxproc<() -> ()> + fir.store %4 to %2 : !fir.ref ()>> + %4 = fir.field_index solve, !fir.type<_QFtestTmatrix{element:!fir.array<2x2xf32>,solve:!fir.boxproc<() -> ()>}> + %5 = fir.coordinate_of %0, %4 : (!fir.ref,solve:!fir.boxproc<() -> ()>}>>, !fir.field) -> !fir.ref ()>> + %6 = fir.load %5 : !fir.ref ()>> + %7 = fir.convert %6 : (!fir.boxproc<() -> ()>) -> !fir.boxproc<(!fir.ref) -> !fir.ref> + %8 = fir.box_addr %7 : (!fir.boxproc<(!fir.ref) -> !fir.ref>) -> ((!fir.ref) -> !fir.ref) + %9 = fir.call %8(%arg1) : (!fir.ref) -> !fir.ref + return +} +``` + +--- + +# Testing + +The lowering part is tested with LIT tests in tree, but the execution tests are +useful for full testing. + +LLVM IR testing is also helpful with the initial check. A C function pointer is +semantically equivalent to a Fortran procedure in LLVM IR level, and a pointer +to a C function pointer is semantically equivalent to a Fortran procedure +pointer in LLVM IR level. That is, a Fortran procedure will be converted to a +opaque pointer in LLVM IR level, which is the same for a C function pointer; +a Fortran procedure pointer will be converted to a opaque pointer pointing to +a opaque pointer, which is the same for a pointer to a C function pointer. + +The tests should include the following +- function result, subroutine/function arguments with varying types + - non-character scalar + - character (assumed-length and non-assumed-length) + - array (static and dynamic) + - character array + - derived type + - ... (polymorphic?) +- internal/external/module procedure or a C function as the target + - procedure pointer initialization + - procedure pointer assignment +- procedure pointer, procedure pointer target passed to a C function +- procedure pointer, procedure pointer target passed to a Fortran procedure +- procedure pointer component in derived types + +--- + +# Current TODOs +Current list of TODOs in lowering: +- `flang/lib/Lower/CallInterface.cpp:708`: not yet implemented: procedure pointer result not yet handled +- `flang/lib/Lower/CallInterface.cpp:961`: not yet implemented: procedure pointer arguments +- `flang/lib/Lower/CallInterface.cpp:993`: not yet implemented: procedure pointer results +- `flang/lib/Lower/ConvertExpr.cpp:1119`: not yet implemented: procedure pointer component in derived type assignment +- `flang/lib/Lower/ConvertType.cpp:228`: not yet implemented: procedure pointers +- `flang/lib/Lower/Bridge.cpp:2438`: not yet implemented: procedure pointer assignment +- `flang/lib/Lower/ConvertVariable.cpp:348`: not yet implemented: procedure pointer component default initialization +- `flang/lib/Lower/ConvertVariable.cpp:416`: not yet implemented: procedure pointer globals +- `flang/lib/Lower/ConvertVariable.cpp:1459`: not yet implemented: procedure pointers +- `flang/lib/Lower/HostAssociations.cpp:162`: not yet implemented: capture procedure pointer in internal procedure +- lowering of procedure pointers in ASSOCIATED, NULL, and C_F_PROCPOINTER + +Current list of TODOs in code generation: + +NOTE: There are any number of possible implementations. + +- `flang/lib/Optimizer/CodeGen/TypeConverter.h:64` TODO: BoxProcType type conversion +- `flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp:136` not yet implemented: record type with a boxproc type +- fir.global for procedure pointers + +or + +- `flang/lib/Optimizer/CodeGen/CodeGen.cpp:2080` not yet implemented: fir.emboxproc codegen +- `flang/lib/Optimizer/CodeGen/CodeGen.cpp:629` not yet implemented: fir.boxproc_host codegen +- `flang/lib/Optimizer/CodeGen/CodeGen.cpp:1078` not yet implemented: fir.len_param_index codegen +- `flang/lib/Optimizer/CodeGen/CodeGen.cpp:3166` not yet implemented: fir.unboxproc codegen + +--- + +Resources: +- [1] Fortran standard diff --git a/flang/include/flang/Common/MathOptionsBase.def b/flang/include/flang/Common/MathOptionsBase.def new file mode 100644 index 0000000000000..64b3959a1c53e --- /dev/null +++ b/flang/include/flang/Common/MathOptionsBase.def @@ -0,0 +1,25 @@ +//===--- MathOptionsBase.def - Math options config ---------------- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines math options. Users of this file must define +/// ENUM_MATHOPT macro to make use of this information. +/// +//===----------------------------------------------------------------------===// + +#ifndef ENUM_MATHOPT +# error Define the ENUM_MATHOPT macro to handle lowering options +#endif + +/// Allow fusing FP operations (e.g. create FMAs from mul/add). +ENUM_MATHOPT(FPContractEnabled, unsigned, 1, 0) + +/// Permit floating point optimizations without regard to infinities. +ENUM_MATHOPT(NoHonorInfs, unsigned, 1, 0) + +#undef ENUM_MATHOPT diff --git a/flang/include/flang/Common/MathOptionsBase.h b/flang/include/flang/Common/MathOptionsBase.h new file mode 100644 index 0000000000000..7f8ebdbee1987 --- /dev/null +++ b/flang/include/flang/Common/MathOptionsBase.h @@ -0,0 +1,44 @@ +//===- MathOptionsBase.h - Math options config ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Options controlling mathematical computations generated in FIR. +/// This is intended to be header-only implementation without extra +/// dependencies so that multiple components can use it to exchange +/// math configuration. +/// +//===----------------------------------------------------------------------===// + +#ifndef FORTRAN_COMMON_MATHOPTIONSBASE_H +#define FORTRAN_COMMON_MATHOPTIONSBASE_H + +namespace Fortran::common { + +class MathOptionsBase { +public: +#define ENUM_MATHOPT(Name, Type, Bits, Default) \ + Type get##Name() const { return static_cast(Name); } \ + MathOptionsBase &set##Name(Type Value) { \ + Name = static_cast(Value); \ + return *this; \ + } +#include "flang/Common/MathOptionsBase.def" + + MathOptionsBase() { +#define ENUM_MATHOPT(Name, Type, Bits, Default) set##Name(Default); +#include "flang/Common/MathOptionsBase.def" + } + +private: +#define ENUM_MATHOPT(Name, Type, Bits, Default) unsigned Name : Bits; +#include "flang/Common/MathOptionsBase.def" +}; + +} // namespace Fortran::common + +#endif // FORTRAN_COMMON_MATHOPTIONSBASE_H diff --git a/flang/include/flang/Frontend/CompilerInvocation.h b/flang/include/flang/Frontend/CompilerInvocation.h index a5895451bb74a..58479c8418515 100644 --- a/flang/include/flang/Frontend/CompilerInvocation.h +++ b/flang/include/flang/Frontend/CompilerInvocation.h @@ -96,9 +96,10 @@ class CompilerInvocation : public CompilerInvocationBase { bool warnAsErr = false; - /// This flag controls the unparsing and is used to decide whether to print out - /// the semantically analyzed version of an object or expression or the plain - /// version that does not include any information from semantic analysis. + /// This flag controls the unparsing and is used to decide whether to print + /// out the semantically analyzed version of an object or expression or the + /// plain version that does not include any information from semantic + /// analysis. bool useAnalyzedObjectsForUnparse = true; // Fortran Dialect options diff --git a/flang/include/flang/Frontend/FrontendAction.h b/flang/include/flang/Frontend/FrontendAction.h index e6e268c875e80..266050084f4c0 100644 --- a/flang/include/flang/Frontend/FrontendAction.h +++ b/flang/include/flang/Frontend/FrontendAction.h @@ -135,7 +135,8 @@ class FrontendAction { } private: - template bool reportFatalErrors(const char (&message)[N]); + template + bool reportFatalErrors(const char (&message)[N]); }; } // namespace Fortran::frontend diff --git a/flang/include/flang/Frontend/FrontendActions.h b/flang/include/flang/Frontend/FrontendActions.h index 975aaa0b9da27..eb9dda75c516c 100644 --- a/flang/include/flang/Frontend/FrontendActions.h +++ b/flang/include/flang/Frontend/FrontendActions.h @@ -29,8 +29,12 @@ namespace Fortran::frontend { // TODO: This is a copy from f18.cpp. It doesn't really belong here and should // be moved to a more suitable place in future. struct MeasurementVisitor { - template bool Pre(const A &) { return true; } - template void Post(const A &) { + template + bool Pre(const A &) { + return true; + } + template + void Post(const A &) { ++objects; bytes += sizeof(A); } @@ -148,8 +152,8 @@ class PluginParseTreeAction : public PrescanAndSemaAction { /// \param extension The extension to use for the output file (ignored when /// the user decides to print to stdout via `-o -`) /// \return Null on error, ostream for the output file otherwise - std::unique_ptr createOutputFile( - llvm::StringRef extension); + std::unique_ptr + createOutputFile(llvm::StringRef extension); }; //===----------------------------------------------------------------------===// @@ -184,10 +188,10 @@ class DebugDumpAllAction : public PrescanAndSemaDebugAction { /// maintain some level of consistency/similarity between the drivers. enum class BackendActionTy { Backend_EmitAssembly, ///< Emit native assembly files - Backend_EmitObj, ///< Emit native object files - Backend_EmitBC, ///< Emit LLVM bitcode files - Backend_EmitLL, ///< Emit human-readable LLVM assembly - Backend_EmitMLIR ///< Emit MLIR files + Backend_EmitObj, ///< Emit native object files + Backend_EmitBC, ///< Emit LLVM bitcode files + Backend_EmitLL, ///< Emit human-readable LLVM assembly + Backend_EmitMLIR ///< Emit MLIR files }; /// Abstract base class for actions that generate code (MLIR, LLVM IR, assembly @@ -199,7 +203,7 @@ class CodeGenAction : public FrontendAction { void executeAction() override; /// Runs prescan, parsing, sema and lowers to MLIR. bool beginSourceFileAction() override; - /// Sets up LLVM's TargetMachine, configures llvmModule accordingly. + /// Sets up LLVM's TargetMachine. void setUpTargetMachine(); /// Runs the optimization (aka middle-end) pipeline on the LLVM module /// associated with this action. diff --git a/flang/include/flang/Frontend/LangOptions.def b/flang/include/flang/Frontend/LangOptions.def index c4d0ec5329b2e..024db6109d6a1 100644 --- a/flang/include/flang/Frontend/LangOptions.def +++ b/flang/include/flang/Frontend/LangOptions.def @@ -21,5 +21,18 @@ LANGOPT(Name, Bits, Default) ENUM_LANGOPT(FPContractMode, FPModeKind, 2, FPM_Off) ///< FP Contract Mode (off/fast) +/// Permit floating point optimization without regard to infinities +LANGOPT(NoHonorInfs, 1, false) +/// Permit floating point optimization without regard to NaN +LANGOPT(NoHonorNaNs, 1, false) +/// Allow math functions to be replaced with an approximately equivalent calculation +LANGOPT(ApproxFunc, 1, false) +/// Allow optimizations that ignore the sign of floating point zeros +LANGOPT(NoSignedZeros, 1, false) +/// Allow reassociation transformations for floating-point instructions +LANGOPT(AssociativeMath, 1, false) +/// Allow division operations to be reassociated +LANGOPT(ReciprocalMath, 1, false) + #undef LANGOPT #undef ENUM_LANGOPT diff --git a/flang/include/flang/Frontend/TextDiagnosticBuffer.h b/flang/include/flang/Frontend/TextDiagnosticBuffer.h index fb1028a36ea35..7eba843661328 100644 --- a/flang/include/flang/Frontend/TextDiagnosticBuffer.h +++ b/flang/include/flang/Frontend/TextDiagnosticBuffer.h @@ -45,7 +45,7 @@ class TextDiagnosticBuffer : public clang::DiagnosticConsumer { public: void HandleDiagnostic(clang::DiagnosticsEngine::Level diagLevel, - const clang::Diagnostic &info) override; + const clang::Diagnostic &info) override; /// Flush the buffered diagnostics to a given diagnostic engine. void flushDiagnostics(clang::DiagnosticsEngine &diags) const; diff --git a/flang/include/flang/Frontend/TextDiagnosticPrinter.h b/flang/include/flang/Frontend/TextDiagnosticPrinter.h index 3e6e6a1977d1d..0e092a0a012e0 100644 --- a/flang/include/flang/Frontend/TextDiagnosticPrinter.h +++ b/flang/include/flang/Frontend/TextDiagnosticPrinter.h @@ -51,7 +51,7 @@ class TextDiagnosticPrinter : public clang::DiagnosticConsumer { void setPrefix(std::string value) { prefix = std::move(value); } void HandleDiagnostic(clang::DiagnosticsEngine::Level level, - const clang::Diagnostic &info) override; + const clang::Diagnostic &info) override; }; } // namespace Fortran::frontend diff --git a/flang/include/flang/Lower/LoweringOptions.def b/flang/include/flang/Lower/LoweringOptions.def new file mode 100644 index 0000000000000..2a89308467fd9 --- /dev/null +++ b/flang/include/flang/Lower/LoweringOptions.def @@ -0,0 +1,35 @@ +//===--- LoweringOptions.def - Lowering options database ---------- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines the lowering options. Users of this file must define +/// LOWERINGOPT macro to make use of this information. +/// +//===----------------------------------------------------------------------===// + +#ifndef LOWERINGOPT +# error Define the LOWERINGOPT macro to handle lowering options +#endif + +#ifndef ENUM_LOWERINGOPT +# define ENUM_LOWERINGOPT(Name, Type, Bits, Default) \ +LOWERINGOPT(Name, Bits, Default) +#endif + +/// If true, lower transpose without a runtime call. +ENUM_LOWERINGOPT(OptimizeTranspose, unsigned, 1, 1) + +/// If true, enable polymorphic type lowering feature. Off by default. +ENUM_LOWERINGOPT(PolymorphicTypeImpl, unsigned, 1, 0) + +/// If true, lower to High level FIR before lowering to FIR. +/// Off by default until fully ready. +ENUM_LOWERINGOPT(LowerToHighLevelFIR, unsigned, 1, 0) + +#undef LOWERINGOPT +#undef ENUM_LOWERINGOPT diff --git a/flang/include/flang/Lower/LoweringOptions.h b/flang/include/flang/Lower/LoweringOptions.h index d882ff0fb233a..8105ccd7ef6b1 100644 --- a/flang/include/flang/Lower/LoweringOptions.h +++ b/flang/include/flang/Lower/LoweringOptions.h @@ -15,41 +15,45 @@ #ifndef FLANG_LOWER_LOWERINGOPTIONS_H #define FLANG_LOWER_LOWERINGOPTIONS_H -namespace Fortran::lower { +#include "flang/Common/MathOptionsBase.h" -class LoweringOptions { - /// If true, lower transpose without a runtime call. - unsigned optimizeTranspose : 1; +namespace Fortran::lower { - /// If true, enable polymorphic type lowering feature. Off by default. - unsigned polymorphicTypeImpl : 1; +class LoweringOptionsBase { +public: +#define LOWERINGOPT(Name, Bits, Default) unsigned Name : Bits; +#define ENUM_LOWERINGOPT(Name, Type, Bits, Default) +#include "flang/Lower/LoweringOptions.def" + +protected: +#define LOWERINGOPT(Name, Bits, Default) +#define ENUM_LOWERINGOPT(Name, Type, Bits, Default) unsigned Name : Bits; +#include "flang/Lower/LoweringOptions.def" +}; - /// If true, lower to High level FIR before lowering to FIR. - /// Off by default until fully ready. - unsigned lowerToHighLevelFIR : 1; +class LoweringOptions : public LoweringOptionsBase { public: - LoweringOptions() - : optimizeTranspose(true), polymorphicTypeImpl(false), - lowerToHighLevelFIR(false) {} - - bool getOptimizeTranspose() const { return optimizeTranspose; } - LoweringOptions &setOptimizeTranspose(bool v) { - optimizeTranspose = v; - return *this; +#define LOWERINGOPT(Name, Bits, Default) +#define ENUM_LOWERINGOPT(Name, Type, Bits, Default) \ + Type get##Name() const { return static_cast(Name); } \ + LoweringOptions &set##Name(Type Value) { \ + Name = static_cast(Value); \ + return *this; \ } +#include "flang/Lower/LoweringOptions.def" - bool isPolymorphicTypeImplEnabled() const { return polymorphicTypeImpl; } - LoweringOptions &setPolymorphicTypeImpl(bool v) { - polymorphicTypeImpl = v; - return *this; - } + LoweringOptions(); - bool getLowerToHighLevelFIR() const { return lowerToHighLevelFIR; } - LoweringOptions &setLowerToHighLevelFIR(bool v) { - lowerToHighLevelFIR = v; - return *this; + const Fortran::common::MathOptionsBase &getMathOptions() const { + return MathOptions; } + + Fortran::common::MathOptionsBase &getMathOptions() { return MathOptions; } + +private: + /// Options for handling/optimizing mathematical computations. + Fortran::common::MathOptionsBase MathOptions; }; } // namespace Fortran::lower diff --git a/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h b/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h new file mode 100644 index 0000000000000..a3b20b7bbfecc --- /dev/null +++ b/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h @@ -0,0 +1,29 @@ +//===- AliasAnalysis.h - Alias Analysis in FIR -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef FIR_ANALYSIS_ALIASANALYSIS_H_ +#define FIR_ANALYSIS_ALIASANALYSIS_H_ + +#include "mlir/Analysis/AliasAnalysis.h" + +namespace fir { + +//===----------------------------------------------------------------------===// +// AliasAnalysis +//===----------------------------------------------------------------------===// +class AliasAnalysis { +public: + /// Given two values, return their aliasing behavior. + mlir::AliasResult alias(mlir::Value lhs, mlir::Value rhs); + + /// Return the modify-reference behavior of `op` on `location`. + mlir::ModRefResult getModRef(mlir::Operation *op, mlir::Value location); +}; +} // namespace fir + +#endif // FIR_ANALYSIS_ALIASANALYSIS_H_ diff --git a/flang/include/flang/Optimizer/Builder/FIRBuilder.h b/flang/include/flang/Optimizer/Builder/FIRBuilder.h index 49fc22e830b4e..a28ada96ecf7a 100644 --- a/flang/include/flang/Optimizer/Builder/FIRBuilder.h +++ b/flang/include/flang/Optimizer/Builder/FIRBuilder.h @@ -16,6 +16,7 @@ #ifndef FORTRAN_OPTIMIZER_BUILDER_FIRBUILDER_H #define FORTRAN_OPTIMIZER_BUILDER_FIRBUILDER_H +#include "flang/Common/MathOptionsBase.h" #include "flang/Optimizer/Dialect/FIROps.h" #include "flang/Optimizer/Dialect/FIRType.h" #include "flang/Optimizer/Support/KindMapping.h" @@ -35,13 +36,22 @@ class BoxValue; /// Extends the MLIR OpBuilder to provide methods for building common FIR /// patterns. -class FirOpBuilder : public mlir::OpBuilder { +class FirOpBuilder : public mlir::OpBuilder, public mlir::OpBuilder::Listener { public: explicit FirOpBuilder(mlir::Operation *op, const fir::KindMapping &kindMap) - : OpBuilder{op}, kindMap{kindMap} {} + : OpBuilder{op, /*listener=*/this}, kindMap{kindMap} {} explicit FirOpBuilder(mlir::OpBuilder &builder, const fir::KindMapping &kindMap) - : OpBuilder{builder}, kindMap{kindMap} {} + : OpBuilder{builder}, kindMap{kindMap} { + setListener(this); + } + + // The listener self-reference has to be updated in case of copy-construction. + FirOpBuilder(const FirOpBuilder &other) + : OpBuilder{other}, kindMap{other.kindMap}, fastMathFlags{ + other.fastMathFlags} { + setListener(this); + } /// Get the current Region of the insertion point. mlir::Region &getRegion() { return *getBlock()->getParent(); } @@ -393,11 +403,35 @@ class FirOpBuilder : public mlir::OpBuilder { mlir::Value ub, mlir::Value step, mlir::Type type); + /// Set default FastMathFlags value for all operations + /// supporting mlir::arith::FastMathAttr that will be created + /// by this builder. + void setFastMathFlags(mlir::arith::FastMathFlags flags) { + fastMathFlags = flags; + } + + /// Set default FastMathFlags value from the passed MathOptionsBase + /// config. + void setFastMathFlags(Fortran::common::MathOptionsBase options); + /// Dump the current function. (debug) LLVM_DUMP_METHOD void dumpFunc(); private: + /// Set attributes (e.g. FastMathAttr) to \p op operation + /// based on the current attributes setting. + void setCommonAttributes(mlir::Operation *op) const; + + /// FirOpBuilder hook for creating new operation. + void notifyOperationInserted(mlir::Operation *op) override { + setCommonAttributes(op); + } + const KindMapping &kindMap; + + /// FastMathFlags that need to be set for operations that support + /// mlir::arith::FastMathAttr. + mlir::arith::FastMathFlags fastMathFlags{}; }; } // namespace fir diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Derived.h b/flang/include/flang/Optimizer/Builder/Runtime/Derived.h index 816d561d38913..239eab1d4e418 100644 --- a/flang/include/flang/Optimizer/Builder/Runtime/Derived.h +++ b/flang/include/flang/Optimizer/Builder/Runtime/Derived.h @@ -17,7 +17,7 @@ class Location; namespace fir { class FirOpBuilder; class RecordType; -} +} // namespace fir namespace fir::runtime { diff --git a/flang/include/flang/Optimizer/Dialect/CMakeLists.txt b/flang/include/flang/Optimizer/Dialect/CMakeLists.txt index 8f3d47cf6c8c6..d657e3f166903 100644 --- a/flang/include/flang/Optimizer/Dialect/CMakeLists.txt +++ b/flang/include/flang/Optimizer/Dialect/CMakeLists.txt @@ -26,7 +26,7 @@ add_custom_target(flang-doc) set(dialect_doc_filename "FIRLangRef") set(LLVM_TARGET_DEFINITIONS FIROps.td) -tablegen(MLIR ${dialect_doc_filename}.md -gen-op-doc "-I${MLIR_MAIN_SRC_DIR}" "-I${MLIR_INCLUDE_DIR}") +tablegen(MLIR ${dialect_doc_filename}.md -gen-op-doc "-I${MLIR_INCLUDE_DIR}") set(GEN_DOC_FILE ${FLANG_BINARY_DIR}/docs/Dialect/${dialect_doc_filename}.md) add_custom_command( OUTPUT ${GEN_DOC_FILE} diff --git a/flang/include/flang/Optimizer/Dialect/FIRAttr.h b/flang/include/flang/Optimizer/Dialect/FIRAttr.h index 92b3f7a8e6f62..f88d6c6a4f97f 100644 --- a/flang/include/flang/Optimizer/Dialect/FIRAttr.h +++ b/flang/include/flang/Optimizer/Dialect/FIRAttr.h @@ -38,7 +38,7 @@ class ExactTypeAttr using Base::Base; using ValueType = mlir::Type; - static constexpr llvm::StringRef getAttrName() { return "instance"; } + static constexpr llvm::StringRef getAttrName() { return "type_is"; } static ExactTypeAttr get(mlir::Type value); mlir::Type getType() const; @@ -51,7 +51,7 @@ class SubclassAttr using Base::Base; using ValueType = mlir::Type; - static constexpr llvm::StringRef getAttrName() { return "subsumed"; } + static constexpr llvm::StringRef getAttrName() { return "class_is"; } static SubclassAttr get(mlir::Type value); mlir::Type getType() const; diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td index 12938525b7062..76a117d78e73e 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.td +++ b/flang/include/flang/Optimizer/Dialect/FIROps.td @@ -651,10 +651,10 @@ def fir_SelectTypeOp : fir_SwitchTerminatorOp<"select_type"> { ```mlir fir.select_type %arg : !fir.box<()> [ - #fir.instance>, ^bb1(%0 : i32), - #fir.instance>, ^bb2(%2 : i32), - #fir.subsumed>, ^bb3(%2 : i32), - #fir.instance>, ^bb4(%1,%3 : i32,f32), + #fir.type_is>, ^bb1(%0 : i32), + #fir.type_is>, ^bb2(%2 : i32), + #fir.class_is>, ^bb3(%2 : i32), + #fir.type_is>, ^bb4(%1,%3 : i32,f32), unit, ^bb5] ``` }]; diff --git a/flang/include/flang/Optimizer/Dialect/FIRTypes.td b/flang/include/flang/Optimizer/Dialect/FIRTypes.td index 0d06e1d118ea7..eaf43a6e908aa 100644 --- a/flang/include/flang/Optimizer/Dialect/FIRTypes.td +++ b/flang/include/flang/Optimizer/Dialect/FIRTypes.td @@ -635,7 +635,7 @@ def AnyAddressableLike : TypeConstraint, "any addressable">; def ArrayOrBoxOrRecord : TypeConstraint, + IsBaseBoxTypePred, fir_RecordType.predicate]>, "fir.box, fir.array or fir.type">; diff --git a/flang/include/flang/Optimizer/Support/InitFIR.h b/flang/include/flang/Optimizer/Support/InitFIR.h index bbc50dcec2e73..12bd80e7abd4f 100644 --- a/flang/include/flang/Optimizer/Support/InitFIR.h +++ b/flang/include/flang/Optimizer/Support/InitFIR.h @@ -32,7 +32,7 @@ namespace fir::support { mlir::scf::SCFDialect, mlir::arith::ArithDialect, \ mlir::cf::ControlFlowDialect, mlir::func::FuncDialect, \ mlir::vector::VectorDialect, mlir::math::MathDialect, \ - mlir::complex::ComplexDialect + mlir::complex::ComplexDialect, mlir::DLTIDialect // The definitive list of dialects used by flang. #define FLANG_DIALECT_LIST \ diff --git a/flang/include/flang/Semantics/symbol.h b/flang/include/flang/Semantics/symbol.h index 9c3c22ca8ad32..ad01b1235c537 100644 --- a/flang/include/flang/Semantics/symbol.h +++ b/flang/include/flang/Semantics/symbol.h @@ -625,28 +625,27 @@ class Symbol { bool IsSubprogram() const; bool IsFromModFile() const; bool HasExplicitInterface() const { - return common::visit(common::visitors{ - [](const SubprogramDetails &) { return true; }, - [](const SubprogramNameDetails &) { return true; }, - [&](const ProcEntityDetails &x) { - return attrs_.test(Attr::INTRINSIC) || - x.HasExplicitInterface(); - }, - [](const ProcBindingDetails &x) { - return x.symbol().HasExplicitInterface(); - }, - [](const UseDetails &x) { - return x.symbol().HasExplicitInterface(); - }, - [](const HostAssocDetails &x) { - return x.symbol().HasExplicitInterface(); - }, - [](const GenericDetails &x) { - return x.specific() && - x.specific()->HasExplicitInterface(); - }, - [](const auto &) { return false; }, - }, + return common::visit( + common::visitors{ + [](const SubprogramDetails &) { return true; }, + [](const SubprogramNameDetails &) { return true; }, + [&](const ProcEntityDetails &x) { + return attrs_.test(Attr::INTRINSIC) || x.HasExplicitInterface(); + }, + [](const ProcBindingDetails &x) { + return x.symbol().HasExplicitInterface(); + }, + [](const UseDetails &x) { + return x.symbol().HasExplicitInterface(); + }, + [](const HostAssocDetails &x) { + return x.symbol().HasExplicitInterface(); + }, + [](const GenericDetails &x) { + return x.specific() && x.specific()->HasExplicitInterface(); + }, + [](const auto &) { return false; }, + }, details_); } diff --git a/flang/lib/Evaluate/characteristics.cpp b/flang/lib/Evaluate/characteristics.cpp index cf43bab6a5eb7..1795751fbf045 100644 --- a/flang/lib/Evaluate/characteristics.cpp +++ b/flang/lib/Evaluate/characteristics.cpp @@ -506,9 +506,7 @@ static std::optional CharacterizeProcedure( } return intrinsic; } - const semantics::ProcInterface &interface { - proc.interface() - }; + const semantics::ProcInterface &interface { proc.interface() }; if (const semantics::Symbol * interfaceSymbol{interface.symbol()}) { auto interface { CharacterizeProcedure(*interfaceSymbol, context, seenProcs) diff --git a/flang/lib/Evaluate/fold-integer.cpp b/flang/lib/Evaluate/fold-integer.cpp index bc4bd3b19ea21..603c4a46f9b52 100644 --- a/flang/lib/Evaluate/fold-integer.cpp +++ b/flang/lib/Evaluate/fold-integer.cpp @@ -763,20 +763,20 @@ Expr> FoldIntrinsicFunction( context, std::move(funcRef), &Scalar::IEOR, Scalar{}); } else if (name == "ishft") { return FoldElementalIntrinsic(context, std::move(funcRef), - ScalarFunc([&](const Scalar &i, - const Scalar &pos) -> Scalar { - auto posVal{static_cast(pos.ToInt64())}; - if (posVal < -i.bits) { - context.messages().Say( - "SHIFT=%d count for ishft is less than %d"_err_en_US, posVal, - -i.bits); - } else if (posVal > i.bits) { - context.messages().Say( - "SHIFT=%d count for ishft is greater than %d"_err_en_US, posVal, - i.bits); - } - return i.ISHFT(posVal); - })); + ScalarFunc( + [&](const Scalar &i, const Scalar &pos) -> Scalar { + auto posVal{static_cast(pos.ToInt64())}; + if (posVal < -i.bits) { + context.messages().Say( + "SHIFT=%d count for ishft is less than %d"_err_en_US, + posVal, -i.bits); + } else if (posVal > i.bits) { + context.messages().Say( + "SHIFT=%d count for ishft is greater than %d"_err_en_US, + posVal, i.bits); + } + return i.ISHFT(posVal); + })); } else if (name == "ishftc") { if (args.at(2)) { // SIZE= is present return FoldElementalIntrinsic(context, @@ -940,16 +940,15 @@ Expr> FoldIntrinsicFunction( })); } else if (name == "modulo") { return FoldElementalIntrinsic(context, std::move(funcRef), - ScalarFuncWithContext( - [](FoldingContext &context, const Scalar &x, - const Scalar &y) -> Scalar { - auto result{x.MODULO(y)}; - if (result.overflow) { - context.messages().Say( - "modulo() folding overflowed"_warn_en_US); - } - return result.value; - })); + ScalarFuncWithContext([](FoldingContext &context, + const Scalar &x, + const Scalar &y) -> Scalar { + auto result{x.MODULO(y)}; + if (result.overflow) { + context.messages().Say("modulo() folding overflowed"_warn_en_US); + } + return result.value; + })); } else if (name == "not") { return FoldElementalIntrinsic( context, std::move(funcRef), &Scalar::NOT); @@ -1062,16 +1061,15 @@ Expr> FoldIntrinsicFunction( })); } else if (name == "sign") { return FoldElementalIntrinsic(context, std::move(funcRef), - ScalarFunc( - [&context](const Scalar &j, const Scalar &k) -> Scalar { - typename Scalar::ValueWithOverflow result{j.SIGN(k)}; - if (result.overflow) { - context.messages().Say( - "sign(integer(kind=%d)) folding overflowed"_warn_en_US, - KIND); - } - return result.value; - })); + ScalarFunc([&context](const Scalar &j, + const Scalar &k) -> Scalar { + typename Scalar::ValueWithOverflow result{j.SIGN(k)}; + if (result.overflow) { + context.messages().Say( + "sign(integer(kind=%d)) folding overflowed"_warn_en_US, KIND); + } + return result.value; + })); } else if (name == "size") { if (auto shape{GetContextFreeShape(context, args[0])}) { if (auto &dimArg{args[1]}) { // DIM= is present, get one extent diff --git a/flang/lib/Evaluate/fold-real.cpp b/flang/lib/Evaluate/fold-real.cpp index 38ece3f21edd2..ef90d12446454 100644 --- a/flang/lib/Evaluate/fold-real.cpp +++ b/flang/lib/Evaluate/fold-real.cpp @@ -127,15 +127,15 @@ Expr> FoldIntrinsicFunction( ? common::RoundingMode::ToZero : common::RoundingMode::TiesAwayFromZero}; return FoldElementalIntrinsic(context, std::move(funcRef), - ScalarFunc([&name, &context, mode]( - const Scalar &x) -> Scalar { - ValueWithRealFlags> y{x.ToWholeNumber(mode)}; - if (y.flags.test(RealFlag::Overflow)) { - context.messages().Say( - "%s intrinsic folding overflow"_warn_en_US, name); - } - return y.value; - })); + ScalarFunc( + [&name, &context, mode](const Scalar &x) -> Scalar { + ValueWithRealFlags> y{x.ToWholeNumber(mode)}; + if (y.flags.test(RealFlag::Overflow)) { + context.messages().Say( + "%s intrinsic folding overflow"_warn_en_US, name); + } + return y.value; + })); } else if (name == "dim") { return FoldElementalIntrinsic(context, std::move(funcRef), ScalarFunc( diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp index 8841d5456045c..935586b118b17 100644 --- a/flang/lib/Evaluate/intrinsics.cpp +++ b/flang/lib/Evaluate/intrinsics.cpp @@ -1232,6 +1232,14 @@ static const IntrinsicInterface intrinsicSubroutine[]{ {"stat", AnyInt, Rank::scalar, Optionality::optional, common::Intent::Out}}, {}, Rank::elemental, IntrinsicClass::atomicSubroutine}, + {"atomic_xor", + {{"atom", AtomicInt, Rank::atom, Optionality::required, + common::Intent::InOut}, + {"value", AnyInt, Rank::scalar, Optionality::required, + common::Intent::In}, + {"stat", AnyInt, Rank::scalar, Optionality::optional, + common::Intent::Out}}, + {}, Rank::elemental, IntrinsicClass::atomicSubroutine}, {"co_broadcast", {{"a", AnyData, Rank::anyOrAssumedRank, Optionality::required, common::Intent::InOut}, @@ -1373,7 +1381,7 @@ static const IntrinsicInterface intrinsicSubroutine[]{ }; // TODO: Intrinsic subroutine EVENT_QUERY -// TODO: Atomic intrinsic subroutines: ATOMIC_ADD &al. +// TODO: Atomic intrinsic subroutines: ATOMIC_ADD // TODO: Collective intrinsic subroutines: co_reduce // Finds a built-in derived type and returns it as a DynamicType. @@ -2761,7 +2769,8 @@ static bool ApplySpecificChecks(SpecificCall &call, FoldingContext &context) { } } else if (name == "associated") { return CheckAssociated(call, context); - } else if (name == "atomic_and" || name == "atomic_or") { + } else if (name == "atomic_and" || name == "atomic_or" || + name == "atomic_xor") { return CheckForCoindexedObject(context, call.arguments[2], name, "stat"); } else if (name == "atomic_cas") { return CheckForCoindexedObject(context, call.arguments[4], name, "stat"); diff --git a/flang/lib/Frontend/CMakeLists.txt b/flang/lib/Frontend/CMakeLists.txt index 4abca70acaba0..fac5f2c1a1f87 100644 --- a/flang/lib/Frontend/CMakeLists.txt +++ b/flang/lib/Frontend/CMakeLists.txt @@ -36,6 +36,7 @@ add_flang_library(flangFrontend MLIRTransforms MLIRLLVMToLLVMIRTranslation MLIRSCFToControlFlow + MLIRTargetLLVMIRImport ${dialect_libs} LINK_COMPONENTS diff --git a/flang/lib/Frontend/CompilerInstance.cpp b/flang/lib/Frontend/CompilerInstance.cpp index 951b8d179bed0..0d01608434618 100644 --- a/flang/lib/Frontend/CompilerInstance.cpp +++ b/flang/lib/Frontend/CompilerInstance.cpp @@ -117,7 +117,8 @@ CompilerInstance::createOutputFileImpl(llvm::StringRef outputFilePath, std::unique_ptr os; std::error_code error; - os.reset(new llvm::raw_fd_ostream(outputFilePath, error, + os.reset(new llvm::raw_fd_ostream( + outputFilePath, error, (binary ? llvm::sys::fs::OF_None : llvm::sys::fs::OF_TextWithCRLF))); if (error) { return llvm::errorCodeToError(error); diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 3a64086be33d3..f2180145af714 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -379,12 +379,13 @@ static bool parseFrontendArgs(FrontendOptions &opts, llvm::opt::ArgList &args, } // Set fortranForm based on options -ffree-form and -ffixed-form. - if (const auto *arg = args.getLastArg(clang::driver::options::OPT_ffixed_form, - clang::driver::options::OPT_ffree_form)) { + if (const auto *arg = + args.getLastArg(clang::driver::options::OPT_ffixed_form, + clang::driver::options::OPT_ffree_form)) { opts.fortranForm = arg->getOption().matches(clang::driver::options::OPT_ffixed_form) - ? FortranForm::FixedForm - : FortranForm::FreeForm; + ? FortranForm::FixedForm + : FortranForm::FreeForm; } // Set fixedFormColumns based on -ffixed-line-length= @@ -425,22 +426,26 @@ static bool parseFrontendArgs(FrontendOptions &opts, llvm::opt::ArgList &args, opts.features.Enable( Fortran::common::LanguageFeature::ImplicitNoneTypeAlways, args.hasFlag(clang::driver::options::OPT_fimplicit_none, - clang::driver::options::OPT_fno_implicit_none, false)); + clang::driver::options::OPT_fno_implicit_none, false)); // -f{no-}backslash opts.features.Enable(Fortran::common::LanguageFeature::BackslashEscapes, - args.hasFlag(clang::driver::options::OPT_fbackslash, - clang::driver::options::OPT_fno_backslash, false)); + args.hasFlag(clang::driver::options::OPT_fbackslash, + clang::driver::options::OPT_fno_backslash, + false)); // -f{no-}logical-abbreviations - opts.features.Enable(Fortran::common::LanguageFeature::LogicalAbbreviations, + opts.features.Enable( + Fortran::common::LanguageFeature::LogicalAbbreviations, args.hasFlag(clang::driver::options::OPT_flogical_abbreviations, - clang::driver::options::OPT_fno_logical_abbreviations, false)); + clang::driver::options::OPT_fno_logical_abbreviations, + false)); // -f{no-}xor-operator - opts.features.Enable(Fortran::common::LanguageFeature::XOROperator, + opts.features.Enable( + Fortran::common::LanguageFeature::XOROperator, args.hasFlag(clang::driver::options::OPT_fxor_operator, - clang::driver::options::OPT_fno_xor_operator, false)); + clang::driver::options::OPT_fno_xor_operator, false)); // -fno-automatic if (args.hasArg(clang::driver::options::OPT_fno_automatic)) { @@ -494,11 +499,11 @@ static std::string getOpenMPHeadersDir() { /// /// \param [in] opts The preprocessor options instance /// \param [out] args The list of input arguments -static void parsePreprocessorArgs( - Fortran::frontend::PreprocessorOptions &opts, llvm::opt::ArgList &args) { +static void parsePreprocessorArgs(Fortran::frontend::PreprocessorOptions &opts, + llvm::opt::ArgList &args) { // Add macros from the command line. - for (const auto *currentArg : args.filtered( - clang::driver::options::OPT_D, clang::driver::options::OPT_U)) { + for (const auto *currentArg : args.filtered(clang::driver::options::OPT_D, + clang::driver::options::OPT_U)) { if (currentArg->getOption().matches(clang::driver::options::OPT_D)) { opts.addMacroDef(currentArg->getValue()); } else { @@ -513,7 +518,7 @@ static void parsePreprocessorArgs( // Prepend the ordered list of -intrinsic-modules-path // to the default location to search. for (const auto *currentArg : - args.filtered(clang::driver::options::OPT_fintrinsic_modules_path)) + args.filtered(clang::driver::options::OPT_fintrinsic_modules_path)) opts.searchDirectoriesFromIntrModPath.emplace_back(currentArg->getValue()); // -cpp/-nocpp @@ -521,8 +526,8 @@ static void parsePreprocessorArgs( clang::driver::options::OPT_cpp, clang::driver::options::OPT_nocpp)) opts.macrosFlag = (currentArg->getOption().matches(clang::driver::options::OPT_cpp)) - ? PPMacrosFlag::Include - : PPMacrosFlag::Exclude; + ? PPMacrosFlag::Include + : PPMacrosFlag::Exclude; opts.noReformat = args.hasArg(clang::driver::options::OPT_fno_reformat); opts.noLineDirectives = args.hasArg(clang::driver::options::OPT_P); @@ -531,7 +536,7 @@ static void parsePreprocessorArgs( /// Parses all semantic related arguments and populates the variables /// options accordingly. Returns false if new errors are generated. static bool parseSemaArgs(CompilerInvocation &res, llvm::opt::ArgList &args, - clang::DiagnosticsEngine &diags) { + clang::DiagnosticsEngine &diags) { unsigned numErrorsBefore = diags.getNumErrors(); // -J/module-dir option @@ -542,7 +547,7 @@ static bool parseSemaArgs(CompilerInvocation &res, llvm::opt::ArgList &args, if (moduleDirList.size() > 1) { const unsigned diagID = diags.getCustomDiagID(clang::DiagnosticsEngine::Error, - "Only one '-module-dir/-J' option allowed"); + "Only one '-module-dir/-J' option allowed"); diags.Report(diagID); } if (moduleDirList.size() == 1) @@ -570,7 +575,7 @@ static bool parseSemaArgs(CompilerInvocation &res, llvm::opt::ArgList &args, /// Parses all diagnostics related arguments and populates the variables /// options accordingly. Returns false if new errors are generated. static bool parseDiagArgs(CompilerInvocation &res, llvm::opt::ArgList &args, - clang::DiagnosticsEngine &diags) { + clang::DiagnosticsEngine &diags) { unsigned numErrorsBefore = diags.getNumErrors(); // -Werror option @@ -583,7 +588,7 @@ static bool parseDiagArgs(CompilerInvocation &res, llvm::opt::ArgList &args, } else { const unsigned diagID = diags.getCustomDiagID(clang::DiagnosticsEngine::Error, - "Only `-Werror` is supported currently."); + "Only `-Werror` is supported currently."); diags.Report(diagID); } } @@ -598,7 +603,7 @@ static bool parseDiagArgs(CompilerInvocation &res, llvm::opt::ArgList &args, /// Parses all Dialect related arguments and populates the variables /// options accordingly. Returns false if new errors are generated. static bool parseDialectArgs(CompilerInvocation &res, llvm::opt::ArgList &args, - clang::DiagnosticsEngine &diags) { + clang::DiagnosticsEngine &diags) { unsigned numErrorsBefore = diags.getNumErrors(); // -fdefault* family @@ -615,9 +620,9 @@ static bool parseDialectArgs(CompilerInvocation &res, llvm::opt::ArgList &args, if (!args.hasArg(clang::driver::options::OPT_fdefault_real_8)) { // -fdefault-double-8 has to be used with -fdefault-real-8 // to be compatible with gfortran - const unsigned diagID = - diags.getCustomDiagID(clang::DiagnosticsEngine::Error, - "Use of `-fdefault-double-8` requires `-fdefault-real-8`"); + const unsigned diagID = diags.getCustomDiagID( + clang::DiagnosticsEngine::Error, + "Use of `-fdefault-double-8` requires `-fdefault-real-8`"); diags.Report(diagID); } // https://gcc.gnu.org/onlinedocs/gfortran/Fortran-Dialect-Options.html @@ -651,7 +656,7 @@ static bool parseDialectArgs(CompilerInvocation &res, llvm::opt::ArgList &args, } else { const unsigned diagID = diags.getCustomDiagID(clang::DiagnosticsEngine::Error, - "Only -std=f2018 is allowed currently."); + "Only -std=f2018 is allowed currently."); diags.Report(diagID); } } @@ -691,6 +696,42 @@ static bool parseFloatingPointArgs(CompilerInvocation &invoc, opts.setFPContractMode(fpContractMode); } + if (const llvm::opt::Arg *a = + args.getLastArg(clang::driver::options::OPT_menable_no_infinities)) { + diags.Report(diagUnimplemented) << a->getOption().getName(); + opts.NoHonorInfs = true; + } + + if (const llvm::opt::Arg *a = + args.getLastArg(clang::driver::options::OPT_menable_no_nans)) { + diags.Report(diagUnimplemented) << a->getOption().getName(); + opts.NoHonorNaNs = true; + } + + if (const llvm::opt::Arg *a = + args.getLastArg(clang::driver::options::OPT_fapprox_func)) { + diags.Report(diagUnimplemented) << a->getOption().getName(); + opts.ApproxFunc = true; + } + + if (const llvm::opt::Arg *a = + args.getLastArg(clang::driver::options::OPT_fno_signed_zeros)) { + diags.Report(diagUnimplemented) << a->getOption().getName(); + opts.NoSignedZeros = true; + } + + if (const llvm::opt::Arg *a = + args.getLastArg(clang::driver::options::OPT_mreassociate)) { + diags.Report(diagUnimplemented) << a->getOption().getName(); + opts.AssociativeMath = true; + } + + if (const llvm::opt::Arg *a = + args.getLastArg(clang::driver::options::OPT_freciprocal_math)) { + diags.Report(diagUnimplemented) << a->getOption().getName(); + opts.ReciprocalMath = true; + } + return true; } @@ -806,12 +847,12 @@ void CompilerInvocation::setDefaultPredefinitions() { // Populate the macro list with version numbers and other predefinitions. fortranOptions.predefinitions.emplace_back("__flang__", "1"); - fortranOptions.predefinitions.emplace_back( - "__flang_major__", FLANG_VERSION_MAJOR_STRING); - fortranOptions.predefinitions.emplace_back( - "__flang_minor__", FLANG_VERSION_MINOR_STRING); - fortranOptions.predefinitions.emplace_back( - "__flang_patchlevel__", FLANG_VERSION_PATCHLEVEL_STRING); + fortranOptions.predefinitions.emplace_back("__flang_major__", + FLANG_VERSION_MAJOR_STRING); + fortranOptions.predefinitions.emplace_back("__flang_minor__", + FLANG_VERSION_MINOR_STRING); + fortranOptions.predefinitions.emplace_back("__flang_patchlevel__", + FLANG_VERSION_PATCHLEVEL_STRING); // Add predefinitions based on extensions enabled if (frontendOptions.features.IsEnabled( @@ -903,8 +944,18 @@ void CompilerInvocation::setSemanticsOpts( /// Set \p loweringOptions controlling lowering behavior based /// on the \p optimizationLevel. void CompilerInvocation::setLoweringOptions() { - const auto &codegenOpts = getCodeGenOpts(); + const CodeGenOptions &codegenOpts = getCodeGenOpts(); // Lower TRANSPOSE as a runtime call under -O0. loweringOpts.setOptimizeTranspose(codegenOpts.OptimizationLevel > 0); + + const LangOptions &langOptions = getLangOpts(); + Fortran::common::MathOptionsBase &mathOpts = loweringOpts.getMathOptions(); + // TODO: when LangOptions are finalized, we can represent + // the math related options using Fortran::commmon::MathOptionsBase, + // so that we can just copy it into LoweringOptions. + mathOpts + .setFPContractEnabled(langOptions.getFPContractMode() == + LangOptions::FPM_Fast) + .setNoHonorInfs(langOptions.NoHonorInfs); } diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp index dfd2089cb9f93..9042332822c8c 100644 --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -34,6 +34,8 @@ #include "mlir/IR/Dialect.h" #include "mlir/Parser/Parser.h" #include "mlir/Pass/PassManager.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Target/LLVMIR/Import.h" #include "mlir/Target/LLVMIR/ModuleTranslation.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/DiagnosticFrontend.h" @@ -79,6 +81,16 @@ bool PrescanAndSemaDebugAction::beginSourceFileAction() { (generateRtTypeTables() || true); } +static void setMLIRDataLayout(mlir::ModuleOp &mlirModule, + const llvm::DataLayout &dl) { + mlir::MLIRContext *context = mlirModule.getContext(); + mlirModule->setAttr( + mlir::LLVM::LLVMDialect::getDataLayoutAttrName(), + mlir::StringAttr::get(context, dl.getStringRepresentation())); + mlir::DataLayoutSpecInterface dlSpec = mlir::translateDataLayout(dl, context); + mlirModule->setAttr(mlir::DLTIDialect::kDataLayoutAttrName, dlSpec); +} + bool CodeGenAction::beginSourceFileAction() { llvmCtx = std::make_unique(); CompilerInstance &ci = this->getInstance(); @@ -123,6 +135,9 @@ bool CodeGenAction::beginSourceFileAction() { } mlirModule = std::make_unique(module.release()); + setUpTargetMachine(); + const llvm::DataLayout &dl = tm->createDataLayout(); + setMLIRDataLayout(*mlirModule, dl); return true; } @@ -152,10 +167,15 @@ bool CodeGenAction::beginSourceFileAction() { kindMap, ci.getInvocation().getLoweringOpts(), ci.getInvocation().getFrontendOpts().envDefaults); + // Fetch module from lb, so we can set + mlirModule = std::make_unique(lb.getModule()); + setUpTargetMachine(); + const llvm::DataLayout &dl = tm->createDataLayout(); + setMLIRDataLayout(*mlirModule, dl); + // Create a parse tree and lower it to FIR Fortran::parser::Program &parseTree{*ci.getParsing().parseTree()}; lb.lower(parseTree, ci.getInvocation().getSemanticsContext()); - mlirModule = std::make_unique(lb.getModule()); // run the default passes. mlir::PassManager pm(mlirCtx.get(), mlir::OpPassManager::Nesting::Implicit); @@ -565,13 +585,7 @@ getCGOptLevel(const Fortran::frontend::CodeGenOptions &opts) { void CodeGenAction::setUpTargetMachine() { CompilerInstance &ci = this->getInstance(); - // Set the triple based on the CompilerInvocation set-up const std::string &theTriple = ci.getInvocation().getTargetOpts().triple; - if (llvmModule->getTargetTriple() != theTriple) { - ci.getDiagnostics().Report(clang::diag::warn_fe_override_module) - << theTriple; - llvmModule->setTargetTriple(theTriple); - } // Create `Target` std::string error; @@ -735,6 +749,22 @@ void CodeGenAction::executeAction() { if (!llvmModule) generateLLVMIR(); + // Set the triple based on the targetmachine (this comes compiler invocation + // and the command-line target option if specified, or the default if not + // given on the command-line). + setUpTargetMachine(); + const std::string &theTriple = tm->getTargetTriple().str(); + + if (llvmModule->getTargetTriple() != theTriple) { + ci.getDiagnostics().Report(clang::diag::warn_fe_override_module) + << theTriple; + } + // Always set the triple and data layout, to make sure they match and are set. + // Note that this overwrites any datalayout stored in the LLVM-IR. This avoids + // an assert for incompatible data layout when the code-generation happens. + llvmModule->setTargetTriple(theTriple); + llvmModule->setDataLayout(tm->createDataLayout()); + // Run LLVM's middle-end (i.e. the optimizer). runOptimizationPipeline(*os); @@ -744,9 +774,6 @@ void CodeGenAction::executeAction() { return; } - setUpTargetMachine(); - llvmModule->setDataLayout(tm->createDataLayout()); - if (action == BackendActionTy::Backend_EmitBC) { // This action has effectively been completed in runOptimizationPipeline. return; diff --git a/flang/lib/Frontend/FrontendOptions.cpp b/flang/lib/Frontend/FrontendOptions.cpp index 8353858ff5094..504fac6cd6fb9 100644 --- a/flang/lib/Frontend/FrontendOptions.cpp +++ b/flang/lib/Frontend/FrontendOptions.cpp @@ -17,22 +17,23 @@ using namespace Fortran::frontend; bool Fortran::frontend::isFixedFormSuffix(llvm::StringRef suffix) { // Note: Keep this list in-sync with flang/test/lit.cfg.py return suffix == "f77" || suffix == "f" || suffix == "F" || suffix == "ff" || - suffix == "for" || suffix == "FOR" || suffix == "fpp" || suffix == "FPP"; + suffix == "for" || suffix == "FOR" || suffix == "fpp" || + suffix == "FPP"; } bool Fortran::frontend::isFreeFormSuffix(llvm::StringRef suffix) { // Note: Keep this list in-sync with flang/test/lit.cfg.py // TODO: Add Cuda Fortan files (i.e. `*.cuf` and `*.CUF`). return suffix == "f90" || suffix == "F90" || suffix == "ff90" || - suffix == "f95" || suffix == "F95" || suffix == "ff95" || - suffix == "f03" || suffix == "F03" || suffix == "f08" || - suffix == "F08" || suffix == "f18" || suffix == "F18"; + suffix == "f95" || suffix == "F95" || suffix == "ff95" || + suffix == "f03" || suffix == "F03" || suffix == "f08" || + suffix == "F08" || suffix == "f18" || suffix == "F18"; } bool Fortran::frontend::isToBePreprocessed(llvm::StringRef suffix) { return suffix == "F" || suffix == "FOR" || suffix == "fpp" || - suffix == "FPP" || suffix == "F90" || suffix == "F95" || - suffix == "F03" || suffix == "F08" || suffix == "F18"; + suffix == "FPP" || suffix == "F90" || suffix == "F95" || + suffix == "F03" || suffix == "F08" || suffix == "F18"; } InputKind FrontendOptions::getInputKindForExtension(llvm::StringRef extension) { diff --git a/flang/lib/Frontend/TextDiagnosticPrinter.cpp b/flang/lib/Frontend/TextDiagnosticPrinter.cpp index 12c41d77ba467..7ae19645e40a4 100644 --- a/flang/lib/Frontend/TextDiagnosticPrinter.cpp +++ b/flang/lib/Frontend/TextDiagnosticPrinter.cpp @@ -46,7 +46,7 @@ void TextDiagnosticPrinter::HandleDiagnostic( // We only emit diagnostics in contexts that lack valid source locations. assert(!info.getLocation().isValid() && - "Diagnostics with valid source location are not supported"); + "Diagnostics with valid source location are not supported"); Fortran::frontend::TextDiagnostic::printDiagnosticLevel(os, level, diagOpts->ShowColors); diff --git a/flang/lib/FrontendTool/ExecuteCompilerInvocation.cpp b/flang/lib/FrontendTool/ExecuteCompilerInvocation.cpp index 54cbd2c99e4a0..b99d2b7196da3 100644 --- a/flang/lib/FrontendTool/ExecuteCompilerInvocation.cpp +++ b/flang/lib/FrontendTool/ExecuteCompilerInvocation.cpp @@ -82,7 +82,7 @@ createFrontendAction(CompilerInstance &ci) { return std::make_unique(); case PluginAction: { for (const FrontendPluginRegistry::entry &plugin : - FrontendPluginRegistry::entries()) { + FrontendPluginRegistry::entries()) { if (plugin.getName() == ci.getFrontendOpts().actionName) { std::unique_ptr p(plugin.instantiate()); return std::move(p); @@ -101,8 +101,9 @@ createFrontendAction(CompilerInstance &ci) { bool executeCompilerInvocation(CompilerInstance *flang) { // Honor -help. if (flang->getFrontendOpts().showHelp) { - clang::driver::getDriverOptTable().printHelp(llvm::outs(), - "flang-new -fc1 [options] file...", "LLVM 'Flang' Compiler", + clang::driver::getDriverOptTable().printHelp( + llvm::outs(), "flang-new -fc1 [options] file...", + "LLVM 'Flang' Compiler", /*Include=*/clang::driver::options::FC1Option, /*Exclude=*/llvm::opt::DriverFlag::HelpHidden, /*ShowAllAliases=*/false); diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp index 190fe1a698f3c..65f7e9c75b53b 100644 --- a/flang/lib/Lower/Allocatable.cpp +++ b/flang/lib/Lower/Allocatable.cpp @@ -482,6 +482,10 @@ class AllocateStmtHelper { if (!typeSpec) typeSpec = &alloc.type; + // Do not generate calls for non derived-type type spec. + if (!typeSpec->AsDerived()) + return; + assert(typeSpec && "type spec missing for polymorphic allocation"); std::string typeName = Fortran::lower::mangle::mangleName(typeSpec->derivedTypeSpec()); diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index e9490b80566fe..6ab001b850fd2 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -2884,6 +2884,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { mlir::func::FuncOp func = callee.addEntryBlockAndMapArguments(); builder = new fir::FirOpBuilder(func, bridge.getKindMap()); assert(builder && "FirOpBuilder did not instantiate"); + builder->setFastMathFlags(bridge.getLoweringOptions().getMathOptions()); builder->setInsertionPointToStart(&func.front()); func.setVisibility(mlir::SymbolTable::Visibility::Public); @@ -3087,6 +3088,8 @@ class FirConverter : public Fortran::lower::AbstractConverter { mlir::FunctionType::get(context, llvm::None, llvm::None)); func.addEntryBlock(); builder = new fir::FirOpBuilder(func, bridge.getKindMap()); + assert(builder && "FirOpBuilder did not instantiate"); + builder->setFastMathFlags(bridge.getLoweringOptions().getMathOptions()); createGlobals(); if (mlir::Region *region = func.getCallableRegion()) region->dropAllReferences(); diff --git a/flang/lib/Lower/CMakeLists.txt b/flang/lib/Lower/CMakeLists.txt index cfc2e28aee344..183bf6478e75c 100644 --- a/flang/lib/Lower/CMakeLists.txt +++ b/flang/lib/Lower/CMakeLists.txt @@ -17,6 +17,7 @@ add_flang_library(FortranLower IntrinsicCall.cpp IO.cpp IterationSpace.cpp + LoweringOptions.cpp Mangler.cpp OpenACC.cpp OpenMP.cpp diff --git a/flang/lib/Lower/CallInterface.cpp b/flang/lib/Lower/CallInterface.cpp index 190c5619dface..0832f101d4bb2 100644 --- a/flang/lib/Lower/CallInterface.cpp +++ b/flang/lib/Lower/CallInterface.cpp @@ -829,7 +829,7 @@ class Fortran::lower::CallInterfaceImpl { if (cat == Fortran::common::TypeCategory::Derived) { // TODO is kept under experimental flag until feature is complete. if (dynamicType.IsPolymorphic() && - !getConverter().getLoweringOptions().isPolymorphicTypeImplEnabled()) + !getConverter().getLoweringOptions().getPolymorphicTypeImpl()) TODO(interface.converter.getCurrentLocation(), "support for polymorphic types"); @@ -929,13 +929,15 @@ class Fortran::lower::CallInterfaceImpl { PassEntityBy passBy = PassEntityBy::BaseAddress; Property prop = Property::BaseAddress; if (isValueAttr) { + bool isBuiltinCptrType = fir::isa_builtin_cptr_type(type); if (isBindC || (!type.isa() && !obj.attrs.test(Attrs::Optional) && - dynamicType.category() != - Fortran::common::TypeCategory::Derived)) { + (dynamicType.category() != + Fortran::common::TypeCategory::Derived || + isBuiltinCptrType))) { passBy = PassEntityBy::Value; prop = Property::Value; - if (fir::isa_builtin_cptr_type(type)) { + if (isBuiltinCptrType) { auto recTy = type.dyn_cast(); mlir::Type fieldTy = recTy.getTypeList()[0].second; passType = fir::ReferenceType::get(fieldTy); diff --git a/flang/lib/Lower/ConvertType.cpp b/flang/lib/Lower/ConvertType.cpp index 1d838df2022a9..e9a2e339e7876 100644 --- a/flang/lib/Lower/ConvertType.cpp +++ b/flang/lib/Lower/ConvertType.cpp @@ -234,8 +234,7 @@ struct TypeBuilder { translateLenParameters(params, tySpec->category(), ultimate); ty = genFIRType(context, tySpec->category(), kind, params); } else if (type->IsPolymorphic() && - !converter.getLoweringOptions() - .isPolymorphicTypeImplEnabled()) { + !converter.getLoweringOptions().getPolymorphicTypeImpl()) { // TODO is kept under experimental flag until feature is complete. TODO(loc, "support for polymorphic types"); } else if (type->IsUnlimitedPolymorphic()) { diff --git a/flang/lib/Lower/IntrinsicCall.cpp b/flang/lib/Lower/IntrinsicCall.cpp index 0184352336684..abd31558aa05d 100644 --- a/flang/lib/Lower/IntrinsicCall.cpp +++ b/flang/lib/Lower/IntrinsicCall.cpp @@ -5004,7 +5004,7 @@ Fortran::lower::getIntrinsicArgumentLowering(llvm::StringRef specificName) { /// intrinsic function. Fortran::lower::ArgLoweringRule Fortran::lower::lowerIntrinsicArgumentAs( const IntrinsicArgumentLoweringRules &rules, unsigned position) { - assert(position < sizeof(rules.args) / sizeof(decltype(*rules.args)) && + assert(position < sizeof(rules.args) / (sizeof(decltype(*rules.args))) && "invalid argument"); return {rules.args[position].lowerAs, rules.args[position].handleDynamicOptional}; diff --git a/flang/lib/Lower/LoweringOptions.cpp b/flang/lib/Lower/LoweringOptions.cpp new file mode 100644 index 0000000000000..9456abf0e8dea --- /dev/null +++ b/flang/lib/Lower/LoweringOptions.cpp @@ -0,0 +1,23 @@ +//===--- LoweringOptions.cpp ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/ +// +//===----------------------------------------------------------------------===// + +#include "flang/Lower/LoweringOptions.h" + +namespace Fortran::lower { + +LoweringOptions::LoweringOptions() : MathOptions{} { +#define LOWERINGOPT(Name, Bits, Default) Name = Default; +#define ENUM_LOWERINGOPT(Name, Type, Bits, Default) set##Name(Default); +#include "flang/Lower/LoweringOptions.def" +} + +} // namespace Fortran::lower diff --git a/flang/lib/Lower/PFTBuilder.cpp b/flang/lib/Lower/PFTBuilder.cpp index 62ec5adf7758e..19b2512dc9224 100644 --- a/flang/lib/Lower/PFTBuilder.cpp +++ b/flang/lib/Lower/PFTBuilder.cpp @@ -900,12 +900,22 @@ class PFTBuilder { }, [&](const parser::SelectRankStmt &s) { insertConstructName(s, parentConstruct); + lastConstructStmtEvaluation = &eval; + }, + [&](const parser::SelectRankCaseStmt &) { + eval.isNewBlock = true; + lastConstructStmtEvaluation->controlSuccessor = &eval; + lastConstructStmtEvaluation = &eval; }, - [&](const parser::SelectRankCaseStmt &) { eval.isNewBlock = true; }, [&](const parser::SelectTypeStmt &s) { insertConstructName(s, parentConstruct); + lastConstructStmtEvaluation = &eval; + }, + [&](const parser::TypeGuardStmt &) { + eval.isNewBlock = true; + lastConstructStmtEvaluation->controlSuccessor = &eval; + lastConstructStmtEvaluation = &eval; }, - [&](const parser::TypeGuardStmt &) { eval.isNewBlock = true; }, // Constructs - set (unstructured) construct exit targets [&](const parser::AssociateConstruct &) { setConstructExit(eval); }, diff --git a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp new file mode 100644 index 0000000000000..85f4743d53c67 --- /dev/null +++ b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp @@ -0,0 +1,67 @@ +//===- AliasAnalysis.cpp - Alias Analysis for FIR ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "flang/Optimizer/Analysis/AliasAnalysis.h" +#include "mlir/Interfaces/SideEffectInterfaces.h" + +using namespace mlir; + +//===----------------------------------------------------------------------===// +// AliasAnalysis: alias +//===----------------------------------------------------------------------===// + +namespace fir { +AliasResult AliasAnalysis::alias(Value lhs, Value rhs) { + // This is for now a mock analysis + if (lhs == rhs) { + return AliasResult::MustAlias; + } + return AliasResult::MayAlias; +} + +//===----------------------------------------------------------------------===// +// AliasAnalysis: getModRef +//===----------------------------------------------------------------------===// + +/// This is mostly inspired by MLIR::LocalAliasAnalysis with 2 notable +/// differences 1) Regions are not handled here but will be handled by a data +/// flow analysis to come 2) Allocate and Free effects are considered modifying +ModRefResult AliasAnalysis::getModRef(Operation *op, Value location) { + MemoryEffectOpInterface interface = dyn_cast(op); + if (!interface) + return ModRefResult::getModAndRef(); + + // Build a ModRefResult by merging the behavior of the effects of this + // operation. + SmallVector effects; + interface.getEffects(effects); + + ModRefResult result = ModRefResult::getNoModRef(); + for (const MemoryEffects::EffectInstance &effect : effects) { + + // Check for an alias between the effect and our memory location. + AliasResult aliasResult = AliasResult::MayAlias; + if (Value effectValue = effect.getValue()) + aliasResult = alias(effectValue, location); + + // If we don't alias, ignore this effect. + if (aliasResult.isNo()) + continue; + + // Merge in the corresponding mod or ref for this effect. + if (isa(effect.getEffect())) { + result = result.merge(ModRefResult::getRef()); + } else { + result = result.merge(ModRefResult::getMod()); + } + if (result.isModAndRef()) + break; + } + return result; +} +} // namespace fir diff --git a/flang/lib/Optimizer/Analysis/CMakeLists.txt b/flang/lib/Optimizer/Analysis/CMakeLists.txt new file mode 100644 index 0000000000000..4ebe7d8c78c33 --- /dev/null +++ b/flang/lib/Optimizer/Analysis/CMakeLists.txt @@ -0,0 +1,16 @@ +add_flang_library(FIRAnalysis + AliasAnalysis.cpp + + DEPENDS + FIRBuilder + FIRDialect + FIRSupport + + LINK_LIBS + FIRBuilder + FIRDialect + MLIRFuncDialect + MLIRLLVMDialect + MLIRMathTransforms + FIRSupport +) diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp index 521f469b4ea2f..50fc21b0f256b 100644 --- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp +++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp @@ -571,6 +571,30 @@ mlir::Value fir::FirOpBuilder::genExtentFromTriplet(mlir::Location loc, return create(loc, cmp, div, zero); } +void fir::FirOpBuilder::setCommonAttributes(mlir::Operation *op) const { + auto fmi = mlir::dyn_cast(*op); + if (!fmi) + return; + // TODO: use fmi.setFastMathFlagsAttr() after D137114 is merged. + // For now set the attribute by the name. + llvm::StringRef arithFMFAttrName = fmi.getFastMathAttrName(); + if (fastMathFlags != mlir::arith::FastMathFlags::none) + op->setAttr(arithFMFAttrName, mlir::arith::FastMathFlagsAttr::get( + op->getContext(), fastMathFlags)); +} + +void fir::FirOpBuilder::setFastMathFlags( + Fortran::common::MathOptionsBase options) { + mlir::arith::FastMathFlags arithFMF{}; + if (options.getFPContractEnabled()) { + arithFMF = arithFMF | mlir::arith::FastMathFlags::contract; + } + if (options.getNoHonorInfs()) { + arithFMF = arithFMF | mlir::arith::FastMathFlags::ninf; + } + setFastMathFlags(arithFMF); +} + //===--------------------------------------------------------------------===// // ExtendedValue inquiry helper implementation //===--------------------------------------------------------------------===// @@ -920,7 +944,7 @@ fir::ExtendedValue fir::factory::componentToExtendedValue( auto fieldTy = component.getType(); if (auto ty = fir::dyn_cast_ptrEleTy(fieldTy)) fieldTy = ty; - if (fieldTy.isa()) { + if (fieldTy.isa()) { llvm::SmallVector nonDeferredTypeParams; auto eleTy = fir::unwrapSequenceType(fir::dyn_cast_ptrOrBoxEleTy(fieldTy)); if (auto charTy = eleTy.dyn_cast()) { diff --git a/flang/lib/Optimizer/Builder/MutableBox.cpp b/flang/lib/Optimizer/Builder/MutableBox.cpp index 2c6b1d05bed40..7773125919710 100644 --- a/flang/lib/Optimizer/Builder/MutableBox.cpp +++ b/flang/lib/Optimizer/Builder/MutableBox.cpp @@ -809,28 +809,29 @@ fir::factory::MutableBoxReallocation fir::factory::genReallocIfNeeded( TODO(loc, "automatic allocation of derived type allocatable with " "length parameters"); } - auto ifOp = - builder - .genIfOp(loc, {addrType}, mustReallocate, - /*withElseRegion=*/true) - .genThen([&]() { - // If shape or length mismatch, allocate new storage. - // When rhs is a scalar, keep the previous shape - auto extents = shape.empty() - ? mlir::ValueRange(previousExtents) - : shape; - auto heap = allocateAndInitNewStorage( - builder, loc, box, extents, lengthParams, - ".auto.alloc"); - if (storageHandler) - storageHandler(getExtValForStorage(heap)); - builder.create(loc, heap); - }) - .genElse([&]() { - if (storageHandler) - storageHandler(getExtValForStorage(addr)); - builder.create(loc, addr); - }); + auto ifOp = builder + .genIfOp(loc, {addrType}, mustReallocate, + /*withElseRegion=*/true) + .genThen([&]() { + // If shape or length mismatch, allocate new + // storage. When rhs is a scalar, keep the + // previous shape + auto extents = + shape.empty() + ? mlir::ValueRange(previousExtents) + : shape; + auto heap = allocateAndInitNewStorage( + builder, loc, box, extents, lengthParams, + ".auto.alloc"); + if (storageHandler) + storageHandler(getExtValForStorage(heap)); + builder.create(loc, heap); + }) + .genElse([&]() { + if (storageHandler) + storageHandler(getExtValForStorage(addr)); + builder.create(loc, addr); + }); ifOp.end(); auto newAddr = ifOp.getResults()[0]; builder.create( diff --git a/flang/lib/Optimizer/CMakeLists.txt b/flang/lib/Optimizer/CMakeLists.txt index 2320bf4f44270..4a602162ed2b7 100644 --- a/flang/lib/Optimizer/CMakeLists.txt +++ b/flang/lib/Optimizer/CMakeLists.txt @@ -4,3 +4,4 @@ add_subdirectory(Dialect) add_subdirectory(HLFIR) add_subdirectory(Support) add_subdirectory(Transforms) +add_subdirectory(Analysis) diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index fae601b84671d..be49a0bf509be 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -915,12 +915,17 @@ struct DispatchOpConversion : public FIROpConversion { return emitError(loc) << "no binding tables found"; // Get derived type information. - auto declaredType = llvm::TypeSwitch( - dispatch.getObject().getType().getEleTy()) - .Case( - [](auto p) { return p.getEleTy(); }) - .Default([](mlir::Type t) { return t; }); - + auto declaredType = + llvm::TypeSwitch( + dispatch.getObject().getType().getEleTy()) + .Case( + [](auto p) { + if (auto seq = + p.getEleTy().template dyn_cast()) + return seq.getEleTy(); + return p.getEleTy(); + }) + .Default([](mlir::Type t) { return t; }); assert(declaredType.isa() && "expecting fir.type"); auto recordType = declaredType.dyn_cast(); std::string typeDescName = diff --git a/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp b/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp index 6868d32840a2e..9bf51cc6ee1a4 100644 --- a/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp +++ b/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp @@ -841,17 +841,17 @@ class TargetRewrite : public fir::impl::TargetRewritePassBase { auto argNo = newInTys.size(); if (attr.isByVal()) { if (auto align = attr.getAlignment()) - fixups.emplace_back( - FixupTy::Codes::ArgumentAsLoad, argNo, - [=](mlir::func::FuncOp func) { - auto elemType = fir::dyn_cast_ptrOrBoxEleTy( - func.getFunctionType().getInput(argNo)); - func.setArgAttr(argNo, "llvm.byval", - mlir::TypeAttr::get(elemType)); - func.setArgAttr(argNo, "llvm.align", - rewriter->getIntegerAttr( - rewriter->getIntegerType(32), align)); - }); + fixups.emplace_back(FixupTy::Codes::ArgumentAsLoad, argNo, + [=](mlir::func::FuncOp func) { + auto elemType = fir::dyn_cast_ptrOrBoxEleTy( + func.getFunctionType().getInput(argNo)); + func.setArgAttr(argNo, "llvm.byval", + mlir::TypeAttr::get(elemType)); + func.setArgAttr( + argNo, "llvm.align", + rewriter->getIntegerAttr( + rewriter->getIntegerType(32), align)); + }); else fixups.emplace_back(FixupTy::Codes::ArgumentAsLoad, newInTys.size(), [=](mlir::func::FuncOp func) { diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp index 450900b351029..86628b792068b 100644 --- a/flang/lib/Optimizer/Dialect/FIROps.cpp +++ b/flang/lib/Optimizer/Dialect/FIROps.cpp @@ -2970,8 +2970,11 @@ void fir::SelectTypeOp::print(mlir::OpAsmPrinter &p) { } mlir::LogicalResult fir::SelectTypeOp::verify() { - if (!(getSelector().getType().isa())) - return emitOpError("must be a boxed type"); + if (!(getSelector().getType().isa())) + return emitOpError("must be a fir.class or fir.box type"); + if (auto boxType = getSelector().getType().dyn_cast()) + if (!boxType.getEleTy().isa()) + return emitOpError("selector must be polymorphic"); auto cases = getOperation()->getAttrOfType(getCasesAttr()).getValue(); auto count = getNumDest(); diff --git a/flang/lib/Optimizer/Dialect/FIRType.cpp b/flang/lib/Optimizer/Dialect/FIRType.cpp index 67b4d1af7cf17..c509ce0fcdcfb 100644 --- a/flang/lib/Optimizer/Dialect/FIRType.cpp +++ b/flang/lib/Optimizer/Dialect/FIRType.cpp @@ -948,6 +948,10 @@ bool fir::hasAbstractResult(mlir::FunctionType ty) { if (ty.getNumResults() == 0) return false; auto resultType = ty.getResult(0); + // FIXME: The interoperable derived type needs more investigations and tests. + // The derived type without BIND attribute may also not be abstract result. + if (fir::isa_builtin_cptr_type(resultType)) + return false; return resultType.isa(); } diff --git a/flang/lib/Semantics/check-acc-structure.h b/flang/lib/Semantics/check-acc-structure.h index d7de0c5b02eb4..fda626e57ba63 100644 --- a/flang/lib/Semantics/check-acc-structure.h +++ b/flang/lib/Semantics/check-acc-structure.h @@ -70,7 +70,6 @@ class AccStructureChecker #include "llvm/Frontend/OpenACC/ACC.inc" private: - bool CheckAllowedModifier(llvm::acc::Clause clause); bool IsComputeConstruct(llvm::acc::Directive directive) const; bool IsInsideComputeConstruct() const; diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp index e0efaa7746a3e..85dbbb14e721a 100644 --- a/flang/lib/Semantics/check-declarations.cpp +++ b/flang/lib/Semantics/check-declarations.cpp @@ -1573,9 +1573,7 @@ void CheckHelper::CheckPassArg( return; } const auto &name{proc.name()}; - const Symbol *interface { - interface0 ? FindInterface(*interface0) : nullptr - }; + const Symbol *interface { interface0 ? FindInterface(*interface0) : nullptr }; if (!interface) { messages_.Say(name, "Procedure component '%s' must have NOPASS attribute or explicit interface"_err_en_US, diff --git a/flang/lib/Semantics/compute-offsets.cpp b/flang/lib/Semantics/compute-offsets.cpp index 237b6b6545d73..779afa6f0bc3d 100644 --- a/flang/lib/Semantics/compute-offsets.cpp +++ b/flang/lib/Semantics/compute-offsets.cpp @@ -174,8 +174,7 @@ void ComputeOffsetsHelper::DoCommonBlock(Symbol &commonBlock) { if (const auto *baseBlock{FindCommonBlockContaining(base)}) { if (baseBlock == &commonBlock) { if (base.offset() != symbol.offset() - dep.offset || - std::find(details.objects().begin(), details.objects().end(), - base) != details.objects().end()) { + llvm::is_contained(details.objects(), base)) { context_.Say(errorSite, "'%s' is storage associated with '%s' by EQUIVALENCE elsewhere in COMMON block /%s/"_err_en_US, symbol.name(), base.name(), commonBlock.name()); diff --git a/flang/lib/Semantics/data-to-inits.h b/flang/lib/Semantics/data-to-inits.h index d39a9a39bcc44..10d850d23d5d6 100644 --- a/flang/lib/Semantics/data-to-inits.h +++ b/flang/lib/Semantics/data-to-inits.h @@ -18,7 +18,7 @@ namespace Fortran::parser { struct DataStmtSet; struct DataStmtValue; -} +} // namespace Fortran::parser namespace Fortran::evaluate { class ExpressionAnalyzer; } diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp index 3853b2eebc6b4..182e83eeea944 100644 --- a/flang/lib/Semantics/expression.cpp +++ b/flang/lib/Semantics/expression.cpp @@ -2307,17 +2307,17 @@ auto ExpressionAnalyzer::GetCalleeAndArguments( const parser::ProcedureDesignator &pd, ActualArguments &&arguments, bool isSubroutine, bool mightBeStructureConstructor) -> std::optional { - return common::visit( - common::visitors{ - [&](const parser::Name &name) { - return GetCalleeAndArguments(name, std::move(arguments), - isSubroutine, mightBeStructureConstructor); - }, - [&](const parser::ProcComponentRef &pcr) { - return AnalyzeProcedureComponentRef( - pcr, std::move(arguments), isSubroutine); - }, - }, + return common::visit(common::visitors{ + [&](const parser::Name &name) { + return GetCalleeAndArguments(name, + std::move(arguments), isSubroutine, + mightBeStructureConstructor); + }, + [&](const parser::ProcComponentRef &pcr) { + return AnalyzeProcedureComponentRef( + pcr, std::move(arguments), isSubroutine); + }, + }, pd.u); } @@ -3417,26 +3417,26 @@ void ArgumentAnalyzer::Analyze( // be detected and represented (they're not expressions). // TODO: C1534: Don't allow a "restricted" specific intrinsic to be passed. std::optional actual; - common::visit( - common::visitors{ - [&](const common::Indirection &x) { - actual = AnalyzeExpr(x.value()); - SetArgSourceLocation(actual, x.value().source); - }, - [&](const parser::AltReturnSpec &label) { - if (!isSubroutine) { - context_.Say("alternate return specification may not appear on" - " function reference"_err_en_US); - } - actual = ActualArgument(label.v); - }, - [&](const parser::ActualArg::PercentRef &) { - context_.Say("%REF() intrinsic for arguments"_todo_en_US); - }, - [&](const parser::ActualArg::PercentVal &) { - context_.Say("%VAL() intrinsic for arguments"_todo_en_US); - }, - }, + common::visit(common::visitors{ + [&](const common::Indirection &x) { + actual = AnalyzeExpr(x.value()); + SetArgSourceLocation(actual, x.value().source); + }, + [&](const parser::AltReturnSpec &label) { + if (!isSubroutine) { + context_.Say( + "alternate return specification may not appear on" + " function reference"_err_en_US); + } + actual = ActualArgument(label.v); + }, + [&](const parser::ActualArg::PercentRef &) { + context_.Say("%REF() intrinsic for arguments"_todo_en_US); + }, + [&](const parser::ActualArg::PercentVal &) { + context_.Say("%VAL() intrinsic for arguments"_todo_en_US); + }, + }, std::get(arg.t).u); if (actual) { if (const auto &argKW{std::get>(arg.t)}) { diff --git a/flang/lib/Semantics/mod-file.cpp b/flang/lib/Semantics/mod-file.cpp index 422c8735eab3a..3659ead0a568d 100644 --- a/flang/lib/Semantics/mod-file.cpp +++ b/flang/lib/Semantics/mod-file.cpp @@ -687,7 +687,7 @@ void ModFileWriter::PutProcEntity(llvm::raw_ostream &os, const Symbol &symbol) { return; } const auto &details{symbol.get()}; - const ProcInterface &interface{details.interface()}; + const ProcInterface &interface { details.interface() }; Attrs attrs{symbol.attrs()}; if (details.passName()) { attrs.reset(Attr::PASS); diff --git a/flang/lib/Semantics/symbol.cpp b/flang/lib/Semantics/symbol.cpp index fe7942bbb7d79..4fe6ee4bd0076 100644 --- a/flang/lib/Semantics/symbol.cpp +++ b/flang/lib/Semantics/symbol.cpp @@ -715,7 +715,8 @@ bool GenericKind::Is(GenericKind::OtherKind x) const { return y && *y == x; } -bool SymbolOffsetCompare::operator()(const SymbolRef &x, const SymbolRef &y) const { +bool SymbolOffsetCompare::operator()( + const SymbolRef &x, const SymbolRef &y) const { const Symbol *xCommon{FindCommonBlockContaining(*x)}; const Symbol *yCommon{FindCommonBlockContaining(*y)}; if (xCommon) { diff --git a/flang/lib/Semantics/tools.cpp b/flang/lib/Semantics/tools.cpp index 6bf2a574fe3fa..7484993d2393e 100644 --- a/flang/lib/Semantics/tools.cpp +++ b/flang/lib/Semantics/tools.cpp @@ -456,9 +456,7 @@ const Symbol *FindInterface(const Symbol &symbol) { return common::visit( common::visitors{ [](const ProcEntityDetails &details) { - const Symbol *interface { - details.interface().symbol() - }; + const Symbol *interface { details.interface().symbol() }; return interface ? FindInterface(*interface) : nullptr; }, [](const ProcBindingDetails &details) { diff --git a/flang/test/CMakeLists.txt b/flang/test/CMakeLists.txt index d8dca531d9398..7601e1e4c87a4 100644 --- a/flang/test/CMakeLists.txt +++ b/flang/test/CMakeLists.txt @@ -1,5 +1,6 @@ # Test runner infrastructure for Flang. This configures the Flang test trees # for use by Lit, and delegates to LLVM's lit test handlers. +add_subdirectory(lib) llvm_canonicalize_cmake_booleans( FLANG_BUILD_EXAMPLES diff --git a/flang/test/Driver/driver-help-hidden.f90 b/flang/test/Driver/driver-help-hidden.f90 index d2dc82ea1b526..3bce2a57caa1a 100644 --- a/flang/test/Driver/driver-help-hidden.f90 +++ b/flang/test/Driver/driver-help-hidden.f90 @@ -22,6 +22,7 @@ ! CHECK-NEXT: -E Only run the preprocessor ! CHECK-NEXT: -falternative-parameter-statement ! CHECK-NEXT: Enable the old style PARAMETER statement +! CHECK-NEXT: -fapprox-func Allow certain math function calls to be replaced with an approximately equivalent calculation ! CHECK-NEXT: -fbackslash Specify that backslash in string introduces an escape character ! CHECK-NEXT: -fcolor-diagnostics Enable colors in diagnostics ! CHECK-NEXT: -fconvert= Set endian conversion of data for unformatted files @@ -44,8 +45,10 @@ ! CHECK-NEXT: -fno-automatic Implies the SAVE attribute for non-automatic local objects in subprograms unless RECURSIVE ! CHECK-NEXT: -fno-color-diagnostics Disable colors in diagnostics ! CHECK-NEXT: -fno-integrated-as Disable the integrated assembler +! CHECK-NEXT: -fno-signed-zeros Allow optimizations that ignore the sign of floating point zeros ! CHECK-NEXT: -fopenacc Enable OpenACC ! CHECK-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code. +! CHECK-NEXT: -freciprocal-math Allow division operations to be reassociated ! CHECK-NEXT: -fsyntax-only Run the preprocessor, parser and semantic analysis stages ! CHECK-NEXT: -fxor-operator Enable .XOR. as a synonym of .NEQV. ! CHECK-NEXT: -help Display available options diff --git a/flang/test/Driver/driver-help.f90 b/flang/test/Driver/driver-help.f90 index 3ab509c7129e9..8d24deee0b1ad 100644 --- a/flang/test/Driver/driver-help.f90 +++ b/flang/test/Driver/driver-help.f90 @@ -22,6 +22,7 @@ ! HELP-NEXT: -E Only run the preprocessor ! HELP-NEXT: -falternative-parameter-statement ! HELP-NEXT: Enable the old style PARAMETER statement +! HELP-NEXT: -fapprox-func Allow certain math function calls to be replaced with an approximately equivalent calculation ! HELP-NEXT: -fbackslash Specify that backslash in string introduces an escape character ! HELP-NEXT: -fcolor-diagnostics Enable colors in diagnostics ! HELP-NEXT: -fconvert= Set endian conversion of data for unformatted files @@ -42,8 +43,10 @@ ! HELP-NEXT: -fno-automatic Implies the SAVE attribute for non-automatic local objects in subprograms unless RECURSIVE ! HELP-NEXT: -fno-color-diagnostics Disable colors in diagnostics ! HELP-NEXT: -fno-integrated-as Disable the integrated assembler +! HELP-NEXT: -fno-signed-zeros Allow optimizations that ignore the sign of floating point zeros ! HELP-NEXT: -fopenacc Enable OpenACC ! HELP-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code. +! HELP-NEXT: -freciprocal-math Allow division operations to be reassociated ! HELP-NEXT: -fsyntax-only Run the preprocessor, parser and semantic analysis stages ! HELP-NEXT: -fxor-operator Enable .XOR. as a synonym of .NEQV. ! HELP-NEXT: -help Display available options @@ -79,6 +82,7 @@ ! HELP-FC1-NEXT: -E Only run the preprocessor ! HELP-FC1-NEXT: -falternative-parameter-statement ! HELP-FC1-NEXT: Enable the old style PARAMETER statement +! HELP-FC1-NEXT: -fapprox-func Allow certain math function calls to be replaced with an approximately equivalent calculation ! HELP-FC1-NEXT: -fbackslash Specify that backslash in string introduces an escape character ! HELP-FC1-NEXT: -fcolor-diagnostics Enable colors in diagnostics ! HELP-FC1-NEXT: -fconvert= Set endian conversion of data for unformatted files @@ -122,18 +126,23 @@ ! HELP-FC1-NEXT: -fno-automatic Implies the SAVE attribute for non-automatic local objects in subprograms unless RECURSIVE ! HELP-FC1-NEXT: -fno-debug-pass-manager Disables debug printing for the new pass manager ! HELP-FC1-NEXT: -fno-reformat Dump the cooked character stream in -E mode +! HELP-FC1-NEXT: -fno-signed-zeros Allow optimizations that ignore the sign of floating point zeros ! HELP-FC1-NEXT: -fopenacc Enable OpenACC ! HELP-FC1-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code. +! HELP-FC1-NEXT: -freciprocal-math Allow division operations to be reassociated ! HELP-FC1-NEXT: -fsyntax-only Run the preprocessor, parser and semantic analysis stages ! HELP-FC1-NEXT: -fxor-operator Enable .XOR. as a synonym of .NEQV. ! HELP-FC1-NEXT: -help Display available options ! HELP-FC1-NEXT: -init-only Only execute frontend initialization ! HELP-FC1-NEXT: -I Add directory to the end of the list of include search paths ! HELP-FC1-NEXT: -load Load the named plugin (dynamic shared object) +! HELP-FC1-NEXT: -menable-no-infs Allow optimization to assume there are no infinities. +! HELP-FC1-NEXT: -menable-no-nans Allow optimization to assume there are no NaNs. ! HELP-FC1-NEXT: -mllvm Additional arguments to forward to LLVM's option processing ! HELP-FC1-NEXT: -mmlir Additional arguments to forward to MLIR's option processing ! HELP-FC1-NEXT: -module-dir Put MODULE files in ! HELP-FC1-NEXT: -module-suffix Use as the suffix for module files (the default value is `.mod`) +! HELP-FC1-NEXT: -mreassociate Allow reassociation transformations for floating-point instructions ! HELP-FC1-NEXT: -mrelocation-model ! HELP-FC1-NEXT: The relocation model to use ! HELP-FC1-NEXT: -nocpp Disable predefined and command line preprocessor macros diff --git a/flang/test/Driver/emit-llvm.f90 b/flang/test/Driver/emit-llvm.f90 index 8e864421529eb..32a5a044f2b08 100644 --- a/flang/test/Driver/emit-llvm.f90 +++ b/flang/test/Driver/emit-llvm.f90 @@ -6,6 +6,7 @@ ! RUN: %flang_fc1 -emit-llvm %s -o - | FileCheck %s ! CHECK: ; ModuleID = 'FIRModule' +! CHECK: target datalayout = ! CHECK: define void @_QQmain() ! CHECK-NEXT: ret void ! CHECK-NEXT: } diff --git a/flang/test/Driver/emit-mlir.f90 b/flang/test/Driver/emit-mlir.f90 index 9391195c94339..191ee13396ef9 100644 --- a/flang/test/Driver/emit-mlir.f90 +++ b/flang/test/Driver/emit-mlir.f90 @@ -10,6 +10,8 @@ ! RUN: %flang_fc1 -emit-mlir emit-mlir.f90 && ls emit-mlir.mlir ! CHECK: module attributes { +! CHECK-SAME: dlti.dl_spec = +! CHECK-SAME: llvm.data_layout = ! CHECK-LABEL: func @_QQmain() { ! CHECK-NEXT: return ! CHECK-NEXT: } diff --git a/flang/test/Driver/flang_fp_opts.f90 b/flang/test/Driver/flang_fp_opts.f90 index 34987f4b0c438..0dc31f6f7649e 100644 --- a/flang/test/Driver/flang_fp_opts.f90 +++ b/flang/test/Driver/flang_fp_opts.f90 @@ -1,4 +1,18 @@ ! Test for handling of floating point options within the frontend driver -! RUN: %flang_fc1 -ffp-contract=fast %s 2>&1 | FileCheck %s +! RUN: %flang_fc1 \ +! RUN: -ffp-contract=fast \ +! RUN: -menable-no-infs \ +! RUN: -menable-no-nans \ +! RUN: -fapprox-func \ +! RUN: -fno-signed-zeros \ +! RUN: -mreassociate \ +! RUN: -freciprocal-math \ +! RUN: %s 2>&1 | FileCheck %s ! CHECK: ffp-contract= is not currently implemented +! CHECK: menable-no-infs is not currently implemented +! CHECK: menable-no-nans is not currently implemented +! CHECK: fapprox-func is not currently implemented +! CHECK: fno-signed-zeros is not currently implemented +! CHECK: mreassociate is not currently implemented +! CHECK: freciprocal-math is not currently implemented diff --git a/flang/test/Driver/frontend-forwarding.f90 b/flang/test/Driver/frontend-forwarding.f90 index b956940fd7d29..9d1d7cb8d3c88 100644 --- a/flang/test/Driver/frontend-forwarding.f90 +++ b/flang/test/Driver/frontend-forwarding.f90 @@ -9,6 +9,12 @@ ! RUN: -flarge-sizes \ ! RUN: -fconvert=little-endian \ ! RUN: -ffp-contract=fast \ +! RUN: -fno-honor-infinities \ +! RUN: -fno-honor-nans \ +! RUN: -fapprox-func \ +! RUN: -fno-signed-zeros \ +! RUN: -fassociative-math \ +! RUN: -freciprocal-math \ ! RUN: -mllvm -print-before-all\ ! RUN: -P \ ! RUN: | FileCheck %s @@ -20,5 +26,11 @@ ! CHECK: "-fdefault-real-8" ! CHECK: "-flarge-sizes" ! CHECK: "-ffp-contract=fast" +! CHECK: "-menable-no-infs" +! CHECK: "-menable-no-nans" +! CHECK: "-fapprox-func" +! CHECK: "-fno-signed-zeros" +! CHECK: "-mreassociate" +! CHECK: "-freciprocal-math" ! CHECK: "-fconvert=little-endian" ! CHECK: "-mllvm" "-print-before-all" diff --git a/flang/test/Driver/pic-flags.f90 b/flang/test/Driver/pic-flags.f90 index 2f4842f72cadf..fb6ab701c3820 100644 --- a/flang/test/Driver/pic-flags.f90 +++ b/flang/test/Driver/pic-flags.f90 @@ -1,3 +1,4 @@ +! REQUIRES: aarch64-registered-target && x86-registered-target && arm-registered-target ! RUN: %flang -v -S -emit-llvm -o - %s --target=aarch64-linux-gnu -fno-pie 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-STATIC,CHECK-STATIC-IR ! RUN: %flang -v -S -emit-llvm -o - %s --target=aarch64-linux-gnu 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-PIE-LEVEL2,CHECK-PIE-LEVEL2-IR @@ -14,7 +15,6 @@ ! RUN: %flang -v -### -o - %s --target=arm-none-eabi -frwpi 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-RWPI ! RUN: %flang -v -### -o - %s --target=arm-none-eabi -fropi -frwpi 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-ROPI-RWPI - ! CHECK: -fc1 diff --git a/flang/test/Fir/abstract-results.fir b/flang/test/Fir/abstract-results.fir index 580f7c6d22e13..92d803e4994ba 100644 --- a/flang/test/Fir/abstract-results.fir +++ b/flang/test/Fir/abstract-results.fir @@ -202,6 +202,17 @@ func.func @call_chararrayfunc() { // FUNC-BOX-NOT: fir.save_result } +func.func private @rettcptr() -> !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> attributes {fir.bindc_name = "rettcptr"} + +// FUNC-REF-LABEL: func @_QPtest_return_cptr() { +// FUNC-BOX-LABEL: func @_QPtest_return_cptr() { +func.func @_QPtest_return_cptr() { + // FUNC-REF: [[VAL:.*]] = fir.call @rettcptr() : () -> !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> + // FUNC-BOX: [[VAL:.*]] = fir.call @rettcptr() : () -> !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> + %1 = fir.call @rettcptr() : () -> !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> + return +} + // ------------------------ Test fir.address_of rewrite ------------------------ func.func private @takesfuncarray((i32) -> !fir.array) diff --git a/flang/test/Fir/affine-promotion.fir b/flang/test/Fir/affine-promotion.fir index 4879e51a44512..aae35c6ef5659 100644 --- a/flang/test/Fir/affine-promotion.fir +++ b/flang/test/Fir/affine-promotion.fir @@ -50,21 +50,21 @@ func.func @loop_with_load_and_store(%a1: !arr_d1, %a2: !arr_d1, %a3: !arr_d1) { // CHECK: %[[VAL_3:.*]] = arith.constant 1 : index // CHECK: %[[VAL_4:.*]] = arith.constant 100 : index // CHECK: %[[VAL_5:.*]] = fir.shape %[[VAL_4]] : (index) -> !fir.shape<1> -// CHECK: %[[VAL_6:.*]] = affine.apply #map(){{\[}}%[[VAL_3]], %[[VAL_4]]] +// CHECK: %[[VAL_6:.*]] = affine.apply #{{.*}}(){{\[}}%[[VAL_3]], %[[VAL_4]]] // CHECK: %[[VAL_7:.*]] = fir.alloca !fir.array, %[[VAL_6]] // CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_0]] : (!fir.ref>) -> memref // CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_1]] : (!fir.ref>) -> memref // CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_7]] : (!fir.ref>) -> memref -// CHECK: affine.for %[[VAL_11:.*]] = %[[VAL_3]] to #map1(){{\[}}%[[VAL_4]]] { -// CHECK: %[[VAL_12:.*]] = affine.apply #map2(%[[VAL_11]]){{\[}}%[[VAL_3]], %[[VAL_4]], %[[VAL_3]]] +// CHECK: affine.for %[[VAL_11:.*]] = %[[VAL_3]] to #{{.*}}(){{\[}}%[[VAL_4]]] { +// CHECK: %[[VAL_12:.*]] = affine.apply #{{.*}}(%[[VAL_11]]){{\[}}%[[VAL_3]], %[[VAL_4]], %[[VAL_3]]] // CHECK: %[[VAL_13:.*]] = affine.load %[[VAL_8]]{{\[}}%[[VAL_12]]] : memref // CHECK: %[[VAL_14:.*]] = affine.load %[[VAL_9]]{{\[}}%[[VAL_12]]] : memref // CHECK: %[[VAL_15:.*]] = arith.addf %[[VAL_13]], %[[VAL_14]] : f32 // CHECK: affine.store %[[VAL_15]], %[[VAL_10]]{{\[}}%[[VAL_12]]] : memref // CHECK: } // CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_2]] : (!fir.ref>) -> memref -// CHECK: affine.for %[[VAL_17:.*]] = %[[VAL_3]] to #map1(){{\[}}%[[VAL_4]]] { -// CHECK: %[[VAL_18:.*]] = affine.apply #map2(%[[VAL_17]]){{\[}}%[[VAL_3]], %[[VAL_4]], %[[VAL_3]]] +// CHECK: affine.for %[[VAL_17:.*]] = %[[VAL_3]] to #{{.*}}(){{\[}}%[[VAL_4]]] { +// CHECK: %[[VAL_18:.*]] = affine.apply #{{.*}}(%[[VAL_17]]){{\[}}%[[VAL_3]], %[[VAL_4]], %[[VAL_3]]] // CHECK: %[[VAL_19:.*]] = affine.load %[[VAL_10]]{{\[}}%[[VAL_18]]] : memref // CHECK: %[[VAL_20:.*]] = affine.load %[[VAL_9]]{{\[}}%[[VAL_18]]] : memref // CHECK: %[[VAL_21:.*]] = arith.mulf %[[VAL_19]], %[[VAL_20]] : f32 @@ -114,18 +114,18 @@ func.func @loop_with_if(%a: !arr_d1, %v: f32) { // CHECK: %[[VAL_5:.*]] = arith.constant 100 : index // CHECK: %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1> // CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_0]] : (!fir.ref>) -> memref -// CHECK: affine.for %[[VAL_8:.*]] = %[[VAL_3]] to #map(){{\[}}%[[VAL_5]]] { -// CHECK: %[[VAL_9:.*]] = affine.apply #map1(%[[VAL_8]]){{\[}}%[[VAL_3]], %[[VAL_5]], %[[VAL_3]]] +// CHECK: affine.for %[[VAL_8:.*]] = %[[VAL_3]] to #{{.*}}(){{\[}}%[[VAL_5]]] { +// CHECK: %[[VAL_9:.*]] = affine.apply #{{.*}}(%[[VAL_8]]){{\[}}%[[VAL_3]], %[[VAL_5]], %[[VAL_3]]] // CHECK: affine.store %[[VAL_1]], %[[VAL_7]]{{\[}}%[[VAL_9]]] : memref // CHECK: } -// CHECK: affine.for %[[VAL_10:.*]] = %[[VAL_3]] to #map(){{\[}}%[[VAL_5]]] { -// CHECK: %[[VAL_11:.*]] = affine.apply #map1(%[[VAL_10]]){{\[}}%[[VAL_3]], %[[VAL_5]], %[[VAL_3]]] +// CHECK: affine.for %[[VAL_10:.*]] = %[[VAL_3]] to #{{.*}}(){{\[}}%[[VAL_5]]] { +// CHECK: %[[VAL_11:.*]] = affine.apply #{{.*}}(%[[VAL_10]]){{\[}}%[[VAL_3]], %[[VAL_5]], %[[VAL_3]]] // CHECK: affine.store %[[VAL_1]], %[[VAL_7]]{{\[}}%[[VAL_11]]] : memref // CHECK: } -// CHECK: affine.for %[[VAL_12:.*]] = %[[VAL_3]] to #map(){{\[}}%[[VAL_5]]] { +// CHECK: affine.for %[[VAL_12:.*]] = %[[VAL_3]] to #{{.*}}(){{\[}}%[[VAL_5]]] { // CHECK: %[[VAL_13:.*]] = arith.subi %[[VAL_12]], %[[VAL_4]] : index // CHECK: affine.if #set(%[[VAL_12]]) { -// CHECK: %[[VAL_14:.*]] = affine.apply #map1(%[[VAL_12]]){{\[}}%[[VAL_3]], %[[VAL_5]], %[[VAL_3]]] +// CHECK: %[[VAL_14:.*]] = affine.apply #{{.*}}(%[[VAL_12]]){{\[}}%[[VAL_3]], %[[VAL_5]], %[[VAL_3]]] // CHECK: affine.store %[[VAL_1]], %[[VAL_7]]{{\[}}%[[VAL_14]]] : memref // CHECK: } // CHECK: } diff --git a/flang/test/Fir/boxchar.fir b/flang/test/Fir/boxchar.fir index 06d66202ff894..4c5fdec2184ed 100644 --- a/flang/test/Fir/boxchar.fir +++ b/flang/test/Fir/boxchar.fir @@ -1,5 +1,5 @@ -// RUN: tco --target=x86_64-unknown-linux-gnu %s | FileCheck %s -// RUN: %flang_fc1 -emit-llvm -triple x86_64-unknown-linux-gnu %s -o - | FileCheck %s +// RUN: tco %s | FileCheck %s +// RUN: %flang_fc1 -emit-llvm %s -o - | FileCheck %s // Test of building and passing boxchar. diff --git a/flang/test/Fir/cg-ops.fir b/flang/test/Fir/cg-ops.fir index 6fcaa5c3d6066..c8c666a62a3d2 100644 --- a/flang/test/Fir/cg-ops.fir +++ b/flang/test/Fir/cg-ops.fir @@ -1,4 +1,4 @@ -// RUN: fir-opt --split-input-file --pass-pipeline="cg-rewrite,cse" %s | FileCheck %s +// RUN: fir-opt --split-input-file --pass-pipeline="builtin.module(cg-rewrite,cse)" %s | FileCheck %s // CHECK-LABEL: func @codegen( // CHECK-SAME: %[[arg:.*]]: !fir diff --git a/flang/test/Fir/convert-to-llvm-invalid.fir b/flang/test/Fir/convert-to-llvm-invalid.fir index 7cba7fc55b8e3..bdc2525876ac2 100644 --- a/flang/test/Fir/convert-to-llvm-invalid.fir +++ b/flang/test/Fir/convert-to-llvm-invalid.fir @@ -71,14 +71,14 @@ func.func @shape_shift_not_dead(%arg0: !fir.ref>, %i: index, // Test `fir.select_type` conversion to llvm. // Should have been converted. -func.func @bar_select_type(%arg : !fir.box>) -> i32 { +func.func @bar_select_type(%arg : !fir.class>) -> i32 { %0 = arith.constant 1 : i32 %2 = arith.constant 3 : i32 // expected-error@+2{{fir.select_type should have already been converted}} // expected-error@+1{{failed to legalize operation 'fir.select_type'}} - fir.select_type %arg : !fir.box> [ - #fir.instance>,^bb1(%0:i32), - #fir.instance>,^bb2(%2:i32), + fir.select_type %arg : !fir.class> [ + #fir.type_is>,^bb1(%0:i32), + #fir.type_is>,^bb2(%2:i32), unit,^bb5 ] ^bb1(%a : i32) : return %a : i32 diff --git a/flang/test/Fir/dispatch.f90 b/flang/test/Fir/dispatch.f90 index e3e75f20ea45d..7b462df81c52f 100644 --- a/flang/test/Fir/dispatch.f90 +++ b/flang/test/Fir/dispatch.f90 @@ -120,6 +120,21 @@ subroutine display_class(p) call p%proc_pass(1) end subroutine + subroutine no_pass_array(a) + class(p1) :: a(:) + call a(1)%proc_nopass() + end subroutine + + subroutine no_pass_array_allocatable(a) + class(p1), allocatable :: a(:) + call a(1)%proc_nopass() + end subroutine + + subroutine no_pass_array_pointer(a) + class(p1), allocatable :: a(:) + call a(1)%proc_nopass() + end subroutine + end module program test_type_to_class @@ -232,6 +247,9 @@ program test_type_to_class ! CHECK: %[[FUNC_PTR:.*]] = inttoptr i64 %[[FUNC_ADDR]] to ptr ! CHECK: call void %[[FUNC_PTR]](ptr %[[INT32]], ptr %[[CLASS]]) +! CHECK-LABEL: _QMdispatch1Pno_pass_array +! CHECK-LABEL: _QMdispatch1Pno_pass_array_allocatable +! CHECK-LABEL: _QMdispatch1Pno_pass_array_pointer ! Check the layout of the binding table. This is easier to do in FIR than in ! LLVM IR. diff --git a/flang/test/Fir/fir-ops.fir b/flang/test/Fir/fir-ops.fir index 5052a06d90bcd..486c7ee809910 100644 --- a/flang/test/Fir/fir-ops.fir +++ b/flang/test/Fir/fir-ops.fir @@ -322,8 +322,8 @@ func.func @bar_select_rank(%arg : i32, %arg2 : i32) -> i32 { } // CHECK-LABEL: func @bar_select_type( -// CHECK-SAME: [[VAL_101:%.*]]: !fir.box}>>) -> i32 { -func.func @bar_select_type(%arg : !fir.box}>>) -> i32 { +// CHECK-SAME: [[VAL_101:%.*]]: !fir.class}>>) -> i32 { +func.func @bar_select_type(%arg : !fir.class}>>) -> i32 { // CHECK: [[VAL_102:%.*]] = arith.constant 1 : i32 // CHECK: [[VAL_103:%.*]] = arith.constant 2 : i32 @@ -334,8 +334,8 @@ func.func @bar_select_type(%arg : !fir.box}>> [#fir.instance>, ^bb1([[VAL_102]] : i32), #fir.instance>, ^bb2([[VAL_104]] : i32), #fir.subsumed>, ^bb3([[VAL_104]] : i32), #fir.instance>, ^bb4([[VAL_103]] : i32), unit, ^bb5] - fir.select_type %arg : !fir.box}>> [ #fir.instance>,^bb1(%0:i32), #fir.instance>,^bb2(%2:i32), #fir.subsumed>,^bb3(%2:i32), #fir.instance>,^bb4(%1:i32), unit,^bb5 ] +// CHECK: fir.select_type [[VAL_101]] : !fir.class}>> [#fir.type_is>, ^bb1([[VAL_102]] : i32), #fir.type_is>, ^bb2([[VAL_104]] : i32), #fir.class_is>, ^bb3([[VAL_104]] : i32), #fir.type_is>, ^bb4([[VAL_103]] : i32), unit, ^bb5] + fir.select_type %arg : !fir.class}>> [ #fir.type_is>,^bb1(%0:i32), #fir.type_is>,^bb2(%2:i32), #fir.class_is>,^bb3(%2:i32), #fir.type_is>,^bb4(%1:i32), unit,^bb5 ] // CHECK: ^bb1([[VAL_106:%.*]]: i32): // CHECK: return [[VAL_106]] : i32 diff --git a/flang/test/Fir/inline.fir b/flang/test/Fir/inline.fir index 48f10c1e85198..c2ed2bf422ead 100644 --- a/flang/test/Fir/inline.fir +++ b/flang/test/Fir/inline.fir @@ -1,5 +1,5 @@ -// RUN: tco --target=x86_64-unknown-linux-gnu --inline-all %s -o - | FileCheck %s -// RUN: %flang_fc1 -triple x86_64-unknown-linux-gnu -mmlir --inline-all -emit-llvm %s -o - | FileCheck %s +// RUN: tco --inline-all %s -o - | FileCheck %s +// RUN: %flang_fc1 -mmlir --inline-all -emit-llvm %s -o - | FileCheck %s // CHECK-LABEL: @add func.func @add(%a : i32, %b : i32) -> i32 { diff --git a/flang/test/Fir/invalid.fir b/flang/test/Fir/invalid.fir index 5d0ac39184211..fdb4249c4ad81 100644 --- a/flang/test/Fir/invalid.fir +++ b/flang/test/Fir/invalid.fir @@ -928,3 +928,22 @@ func.func @bad_array_declare_unlimited_polymorphic_boxaddr(%arg0: !fir.ref>>>, !fir.shift<2>) -> !fir.ref>>> return } + +// ----- + +func.func @invalid_selector(%arg : !fir.box>) -> i32 { + %0 = arith.constant 1 : i32 + %2 = arith.constant 3 : i32 + // expected-error@+1{{'fir.select_type' op selector must be polymorphic}} + fir.select_type %arg : !fir.box> [ + #fir.type_is>,^bb1(%0:i32), + #fir.type_is>,^bb2(%2:i32), + unit,^bb5 ] +^bb1(%a : i32) : + return %a : i32 +^bb2(%b : i32) : + return %b : i32 +^bb5 : + %zero = arith.constant 0 : i32 + return %zero : i32 +} diff --git a/flang/test/Fir/target.fir b/flang/test/Fir/target.fir index 831c75379aa07..f6cf0587fcae9 100644 --- a/flang/test/Fir/target.fir +++ b/flang/test/Fir/target.fir @@ -48,13 +48,13 @@ func.func @gen8() -> !fir.complex<8> { return %5 : !fir.complex<8> } -// I32: declare void @sink4(ptr) +// I32: declare void @sink4(ptr byval({ float, float }) align 4) // X64: declare void @sink4(<2 x float>) // AARCH64: declare void @sink4([2 x float]) // PPC: declare void @sink4(float, float) func.func private @sink4(!fir.complex<4>) -> () -// I32: declare void @sink8(ptr) +// I32: declare void @sink8(ptr byval({ double, double }) align 4) // X64: declare void @sink8(double, double) // AARCH64: declare void @sink8([2 x double]) // PPC: declare void @sink8(double, double) diff --git a/flang/test/Intrinsics/math-codegen.fir b/flang/test/Intrinsics/math-codegen.fir index 2c658d4c1b4d6..0af896adf3226 100644 --- a/flang/test/Intrinsics/math-codegen.fir +++ b/flang/test/Intrinsics/math-codegen.fir @@ -4,13 +4,13 @@ //--- abs_fast.fir // RUN: fir-opt %t/abs_fast.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/abs_fast.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.fabs"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.fabs({{%[A-Za-z0-9._]+}}) : (f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.fabs"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.fabs({{%[A-Za-z0-9._]+}}) : (f64) -> f64 // CHECK: @_QPtest_real16 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.fabs"({{%[A-Za-z0-9._]+}}) : (f128) -> f128 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.fabs({{%[A-Za-z0-9._]+}}) : (f128) -> f128 // CHECK: @_QPtest_complex4 // CHECK: {{%[A-Za-z0-9._]+}} = llvm.call @hypotf({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 @@ -76,13 +76,13 @@ func.func private @hypot(f64, f64) -> f64 //--- abs_relaxed.fir // RUN: fir-opt %t/abs_relaxed.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/abs_relaxed.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.fabs"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.fabs({{%[A-Za-z0-9._]+}}) : (f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.fabs"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.fabs({{%[A-Za-z0-9._]+}}) : (f64) -> f64 // CHECK: @_QPtest_real16 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.fabs"({{%[A-Za-z0-9._]+}}) : (f128) -> f128 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.fabs({{%[A-Za-z0-9._]+}}) : (f128) -> f128 // CHECK: @_QPtest_complex4 // CHECK: {{%[A-Za-z0-9._]+}} = llvm.call @hypotf({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 @@ -304,15 +304,15 @@ func.func private @llvm.trunc.f64(f64) -> f64 //--- anint_fast.fir // RUN: fir-opt %t/anint_fast.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/anint_fast.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.round({{%[A-Za-z0-9._]+}}) : (f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.round({{%[A-Za-z0-9._]+}}) : (f64) -> f64 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f32 { %0 = fir.alloca f32 {bindc_name = "test_real4", uniq_name = "_QFtest_real4Etest_real4"} %1 = fir.load %arg0 : !fir.ref - %2 = "llvm.intr.round"(%1) : (f32) -> f32 + %2 = llvm.intr.round(%1) : (f32) -> f32 fir.store %2 to %0 : !fir.ref %3 = fir.load %0 : !fir.ref return %3 : f32 @@ -320,7 +320,7 @@ func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f32 { func.func @_QPtest_real8(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f64 { %0 = fir.alloca f64 {bindc_name = "test_real8", uniq_name = "_QFtest_real8Etest_real8"} %1 = fir.load %arg0 : !fir.ref - %2 = "llvm.intr.round"(%1) : (f64) -> f64 + %2 = llvm.intr.round(%1) : (f64) -> f64 fir.store %2 to %0 : !fir.ref %3 = fir.load %0 : !fir.ref return %3 : f64 @@ -329,15 +329,15 @@ func.func @_QPtest_real8(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f64 { //--- anint_relaxed.fir // RUN: fir-opt %t/anint_relaxed.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/anint_relaxed.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.round({{%[A-Za-z0-9._]+}}) : (f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.round({{%[A-Za-z0-9._]+}}) : (f64) -> f64 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f32 { %0 = fir.alloca f32 {bindc_name = "test_real4", uniq_name = "_QFtest_real4Etest_real4"} %1 = fir.load %arg0 : !fir.ref - %2 = "llvm.intr.round"(%1) : (f32) -> f32 + %2 = llvm.intr.round(%1) : (f32) -> f32 fir.store %2 to %0 : !fir.ref %3 = fir.load %0 : !fir.ref return %3 : f32 @@ -345,7 +345,7 @@ func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f32 { func.func @_QPtest_real8(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f64 { %0 = fir.alloca f64 {bindc_name = "test_real8", uniq_name = "_QFtest_real8Etest_real8"} %1 = fir.load %arg0 : !fir.ref - %2 = "llvm.intr.round"(%1) : (f64) -> f64 + %2 = llvm.intr.round(%1) : (f64) -> f64 fir.store %2 to %0 : !fir.ref %3 = fir.load %0 : !fir.ref return %3 : f64 @@ -541,10 +541,10 @@ func.func private @atan2(f64, f64) -> f64 //--- ceiling_fast.fir // RUN: fir-opt %t/ceiling_fast.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/ceiling_fast.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.ceil"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.ceil({{%[A-Za-z0-9._]+}}) : (f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.ceil"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.ceil({{%[A-Za-z0-9._]+}}) : (f64) -> f64 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f32 { %0 = fir.alloca f32 {bindc_name = "test_real4", uniq_name = "_QFtest_real4Etest_real4"} @@ -570,10 +570,10 @@ func.func @_QPtest_real8(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f64 { //--- ceiling_relaxed.fir // RUN: fir-opt %t/ceiling_relaxed.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/ceiling_relaxed.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.ceil"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.ceil({{%[A-Za-z0-9._]+}}) : (f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.ceil"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.ceil({{%[A-Za-z0-9._]+}}) : (f64) -> f64 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f32 { %0 = fir.alloca f32 {bindc_name = "test_real4", uniq_name = "_QFtest_real4Etest_real4"} @@ -630,10 +630,10 @@ func.func private @ceil(f64) -> f64 //--- cos_fast.fir // RUN: fir-opt %t/cos_fast.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/cos_fast.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.cos"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.cos({{%[A-Za-z0-9._]+}}) : (f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.cos"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.cos({{%[A-Za-z0-9._]+}}) : (f64) -> f64 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f32 { %0 = fir.alloca f32 {bindc_name = "test_real4", uniq_name = "_QFtest_real4Etest_real4"} @@ -655,10 +655,10 @@ func.func @_QPtest_real8(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f64 { //--- cos_relaxed.fir // RUN: fir-opt %t/cos_relaxed.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/cos_relaxed.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.cos"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.cos({{%[A-Za-z0-9._]+}}) : (f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.cos"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.cos({{%[A-Za-z0-9._]+}}) : (f64) -> f64 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f32 { %0 = fir.alloca f32 {bindc_name = "test_real4", uniq_name = "_QFtest_real4Etest_real4"} @@ -865,10 +865,10 @@ func.func private @erf(f64) -> f64 //--- exp_fast.fir // RUN: fir-opt %t/exp_fast.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/exp_fast.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.exp"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.exp({{%[A-Za-z0-9._]+}}) : (f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.exp"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.exp({{%[A-Za-z0-9._]+}}) : (f64) -> f64 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f32 { %0 = fir.alloca f32 {bindc_name = "test_real4", uniq_name = "_QFtest_real4Etest_real4"} @@ -890,10 +890,10 @@ func.func @_QPtest_real8(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f64 { //--- exp_relaxed.fir // RUN: fir-opt %t/exp_relaxed.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/exp_relaxed.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.exp"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.exp({{%[A-Za-z0-9._]+}}) : (f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.exp"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.exp({{%[A-Za-z0-9._]+}}) : (f64) -> f64 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f32 { %0 = fir.alloca f32 {bindc_name = "test_real4", uniq_name = "_QFtest_real4Etest_real4"} @@ -942,10 +942,10 @@ func.func private @exp(f64) -> f64 //--- floor_fast.fir // RUN: fir-opt %t/floor_fast.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/floor_fast.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.floor"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.floor({{%[A-Za-z0-9._]+}}) : (f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.floor"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.floor({{%[A-Za-z0-9._]+}}) : (f64) -> f64 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f32 { %0 = fir.alloca f32 {bindc_name = "test_real4", uniq_name = "_QFtest_real4Etest_real4"} @@ -971,10 +971,10 @@ func.func @_QPtest_real8(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f64 { //--- floor_relaxed.fir // RUN: fir-opt %t/floor_relaxed.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/floor_relaxed.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.floor"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.floor({{%[A-Za-z0-9._]+}}) : (f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.floor"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.floor({{%[A-Za-z0-9._]+}}) : (f64) -> f64 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f32 { %0 = fir.alloca f32 {bindc_name = "test_real4", uniq_name = "_QFtest_real4Etest_real4"} @@ -1031,10 +1031,10 @@ func.func private @floor(f64) -> f64 //--- log_fast.fir // RUN: fir-opt %t/log_fast.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/log_fast.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.log"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.log({{%[A-Za-z0-9._]+}}) : (f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.log"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.log({{%[A-Za-z0-9._]+}}) : (f64) -> f64 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f32 { %0 = fir.alloca f32 {bindc_name = "test_real4", uniq_name = "_QFtest_real4Etest_real4"} @@ -1056,10 +1056,10 @@ func.func @_QPtest_real8(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f64 { //--- log_relaxed.fir // RUN: fir-opt %t/log_relaxed.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/log_relaxed.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.log"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.log({{%[A-Za-z0-9._]+}}) : (f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.log"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.log({{%[A-Za-z0-9._]+}}) : (f64) -> f64 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f32 { %0 = fir.alloca f32 {bindc_name = "test_real4", uniq_name = "_QFtest_real4Etest_real4"} @@ -1108,10 +1108,10 @@ func.func private @log(f64) -> f64 //--- log10_fast.fir // RUN: fir-opt %t/log10_fast.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/log10_fast.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.log10"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.log10({{%[A-Za-z0-9._]+}}) : (f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.log10"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.log10({{%[A-Za-z0-9._]+}}) : (f64) -> f64 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f32 { %0 = fir.alloca f32 {bindc_name = "test_real4", uniq_name = "_QFtest_real4Etest_real4"} @@ -1133,10 +1133,10 @@ func.func @_QPtest_real8(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f64 { //--- log10_relaxed.fir // RUN: fir-opt %t/log10_relaxed.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/log10_relaxed.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.log10"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.log10({{%[A-Za-z0-9._]+}}) : (f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.log10"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.log10({{%[A-Za-z0-9._]+}}) : (f64) -> f64 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f32 { %0 = fir.alloca f32 {bindc_name = "test_real4", uniq_name = "_QFtest_real4Etest_real4"} @@ -1310,13 +1310,13 @@ func.func private @llvm.lround.i64.f64(f64) -> i64 // CHECK: @_QPtest_real4 // CHECK: [[STOI:%[A-Za-z0-9._]+]] = llvm.sext {{%[A-Za-z0-9._]+}} : i16 to i32 // CHECK: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.powi.f32.i32({{%[A-Za-z0-9._]+}}, [[STOI]]) : (f32, i32) -> f32 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.pow"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.pow({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 // CHECK: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.powi.f32.i32({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, i32) -> f32 // CHECK: @_QPtest_real8 // CHECK: [[STOI:%[A-Za-z0-9._]+]] = llvm.sext {{%[A-Za-z0-9._]+}} : i16 to i32 // CHECK: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.powi.f64.i32({{%[A-Za-z0-9._]+}}, [[STOI]]) : (f64, i32) -> f64 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.pow"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.pow({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 // CHECK: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.powi.f64.i32({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, i32) -> f64 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}, %arg1: !fir.ref {fir.bindc_name = "y"}, %arg2: !fir.ref {fir.bindc_name = "s"}, %arg3: !fir.ref {fir.bindc_name = "i"}) -> f32 { @@ -1363,13 +1363,13 @@ func.func private @llvm.powi.f64.i32(f64, i32) -> f64 // CHECK: @_QPtest_real4 // CHECK: [[STOI:%[A-Za-z0-9._]+]] = llvm.sext {{%[A-Za-z0-9._]+}} : i16 to i32 // CHECK: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.powi.f32.i32({{%[A-Za-z0-9._]+}}, [[STOI]]) : (f32, i32) -> f32 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.pow"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.pow({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 // CHECK: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.powi.f32.i32({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, i32) -> f32 // CHECK: @_QPtest_real8 // CHECK: [[STOI:%[A-Za-z0-9._]+]] = llvm.sext {{%[A-Za-z0-9._]+}} : i16 to i32 // CHECK: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.powi.f64.i32({{%[A-Za-z0-9._]+}}, [[STOI]]) : (f64, i32) -> f64 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.pow"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.pow({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 // CHECK: {{%[A-Za-z0-9._]+}} = llvm.call @llvm.powi.f64.i32({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, i32) -> f64 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}, %arg1: !fir.ref {fir.bindc_name = "y"}, %arg2: !fir.ref {fir.bindc_name = "s"}, %arg3: !fir.ref {fir.bindc_name = "i"}) -> f32 { @@ -1482,16 +1482,16 @@ func.func @_QPtest_int4(%arg0: !fir.ref {fir.bindc_name = "x"}, %arg1: !fir //--- sign_fast.fir // RUN: fir-opt %t/sign_fast.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/sign_fast.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.copysign"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.copysign({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.copysign"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.copysign({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 // CHECK: @_QPtest_real10 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.copysign"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f80, f80) -> f80 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.copysign({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f80, f80) -> f80 // CHECK: @_QPtest_real16 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.copysign"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f128, f128) -> f128 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.copysign({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f128, f128) -> f128 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}, %arg1: !fir.ref {fir.bindc_name = "y"}) -> f32 { %0 = fir.alloca f32 {bindc_name = "test_real4", uniq_name = "_QFtest_real4Etest_real4"} @@ -1533,16 +1533,16 @@ func.func @_QPtest_real16(%arg0: !fir.ref {fir.bindc_name = "x"}, %arg1: ! //--- sign_relaxed.fir // RUN: fir-opt %t/sign_relaxed.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/sign_relaxed.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.copysign"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.copysign({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f32, f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.copysign"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.copysign({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f64, f64) -> f64 // CHECK: @_QPtest_real10 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.copysign"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f80, f80) -> f80 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.copysign({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f80, f80) -> f80 // CHECK: @_QPtest_real16 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.copysign"({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f128, f128) -> f128 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.copysign({{%[A-Za-z0-9._]+}}, {{%[A-Za-z0-9._]+}}) : (f128, f128) -> f128 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}, %arg1: !fir.ref {fir.bindc_name = "y"}) -> f32 { %0 = fir.alloca f32 {bindc_name = "test_real4", uniq_name = "_QFtest_real4Etest_real4"} @@ -1639,10 +1639,10 @@ func.func private @llvm.copysign.f128(f128, f128) -> f128 //--- sin_fast.fir // RUN: fir-opt %t/sin_fast.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/sin_fast.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.sin"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.sin({{%[A-Za-z0-9._]+}}) : (f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.sin"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.sin({{%[A-Za-z0-9._]+}}) : (f64) -> f64 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f32 { %0 = fir.alloca f32 {bindc_name = "test_real4", uniq_name = "_QFtest_real4Etest_real4"} @@ -1664,10 +1664,10 @@ func.func @_QPtest_real8(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f64 { //--- sin_relaxed.fir // RUN: fir-opt %t/sin_relaxed.fir --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu" | FileCheck %t/sin_relaxed.fir // CHECK: @_QPtest_real4 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.sin"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.sin({{%[A-Za-z0-9._]+}}) : (f32) -> f32 // CHECK: @_QPtest_real8 -// CHECK: {{%[A-Za-z0-9._]+}} = "llvm.intr.sin"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +// CHECK: {{%[A-Za-z0-9._]+}} = llvm.intr.sin({{%[A-Za-z0-9._]+}}) : (f64) -> f64 func.func @_QPtest_real4(%arg0: !fir.ref {fir.bindc_name = "x"}) -> f32 { %0 = fir.alloca f32 {bindc_name = "test_real4", uniq_name = "_QFtest_real4Etest_real4"} diff --git a/flang/test/Lower/Intrinsics/anint.f90 b/flang/test/Lower/Intrinsics/anint.f90 index 03b376e2605c7..fe479a07681ee 100644 --- a/flang/test/Lower/Intrinsics/anint.f90 +++ b/flang/test/Lower/Intrinsics/anint.f90 @@ -4,7 +4,7 @@ ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref {fir.bindc_name = "a"}, ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "b"}) { ! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref -! CHECK: %[[VAL_3:.*]] = "llvm.intr.round"(%[[VAL_2]]) : (f32) -> f32 +! CHECK: %[[VAL_3:.*]] = llvm.intr.round(%[[VAL_2]]) : (f32) -> f32 ! CHECK: fir.store %[[VAL_3]] to %[[VAL_1]] : !fir.ref ! CHECK: return ! CHECK: } @@ -18,7 +18,7 @@ subroutine anint_test(a, b) ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref {fir.bindc_name = "a"}, ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "b"}) { ! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref -! CHECK: %[[VAL_3:.*]] = "llvm.intr.round"(%[[VAL_2]]) : (f64) -> f64 +! CHECK: %[[VAL_3:.*]] = llvm.intr.round(%[[VAL_2]]) : (f64) -> f64 ! CHECK: fir.store %[[VAL_3]] to %[[VAL_1]] : !fir.ref ! CHECK: return ! CHECK: } @@ -32,7 +32,7 @@ subroutine anint_test_real8(a, b) ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref {fir.bindc_name = "a"}, ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "b"}) { ! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref -! CHECK: %[[VAL_3:.*]] = "llvm.intr.round"(%[[VAL_2]]) : (f80) -> f80 +! CHECK: %[[VAL_3:.*]] = llvm.intr.round(%[[VAL_2]]) : (f80) -> f80 ! CHECK: fir.store %[[VAL_3]] to %[[VAL_1]] : !fir.ref ! CHECK: return ! CHECK: } diff --git a/flang/test/Lower/allocatable-polymorphic.f90 b/flang/test/Lower/allocatable-polymorphic.f90 index 6a0fa45234fde..c82df40045fc2 100644 --- a/flang/test/Lower/allocatable-polymorphic.f90 +++ b/flang/test/Lower/allocatable-polymorphic.f90 @@ -17,6 +17,10 @@ module poly procedure :: proc2 => proc2_p2 end type + type with_alloc + class(p1), pointer :: element + end type + contains subroutine proc1_p1() print*, 'call proc1_p1' @@ -348,8 +352,31 @@ subroutine test_deallocate() allocate(p) deallocate(p) end subroutine + + subroutine test_type_with_polymorphic_pointer_component() + type(with_alloc), pointer :: a + allocate(a) + allocate(a%element) + end subroutine end module +! CHECK-LABEL: func.func @_QMpolyPtest_type_with_polymorphic_pointer_component() +! CHECK: %[[TYPE_PTR:.*]] = fir.alloca !fir.ptr>>}>> {uniq_name = "_QMpolyFtest_type_with_polymorphic_pointer_componentEa.addr"} +! CHECK: %[[TYPE_PTR_LOAD:.*]] = fir.load %[[TYPE_PTR]] : !fir.ref>>}>>> +! CHECK: %[[ELEMENT:.*]] = fir.field_index element, !fir.type<_QMpolyTwith_alloc{element:!fir.class>>}> +! CHECK: %[[ELEMENT_DESC:.*]] = fir.coordinate_of %[[TYPE_PTR_LOAD]], %[[ELEMENT]] : (!fir.ptr>>}>>, !fir.field) -> !fir.ref>>> +! CHECK: %[[ZERO:.*]] = fir.zero_bits !fir.ptr> +! CHECK: %[[ZERO_DESC:.*]] = fir.embox %[[ZERO]] : (!fir.ptr>) -> !fir.class>> +! CHECK: fir.store %[[ZERO_DESC]] to %[[ELEMENT_DESC]] : !fir.ref>>> +! CHECK: %[[TYPE_DESC_P1:.*]] = fir.address_of(@_QMpolyE.dt.p1) : !fir.ref> +! CHECK: %[[ELEMENT_DESC_CAST:.*]] = fir.convert %[[ELEMENT_DESC]] : (!fir.ref>>>) -> !fir.ref> +! CHECK: %[[TYPE_DESC_P1_CAST:.*]] = fir.convert %[[TYPE_DESC_P1]] : (!fir.ref>) -> !fir.ref +! CHECK: %[[RANK:.*]] = arith.constant 0 : i32 +! CHECK: %[[CORANK:.*]] = arith.constant 0 : i32 +! CHECK: %{{.*}} = fir.call @_FortranAPointerNullifyDerived(%[[ELEMENT_DESC_CAST]], %[[TYPE_DESC_P1_CAST]], %[[RANK]], %[[CORANK]]) : (!fir.ref>, !fir.ref, i32, i32) -> none +! CHECK: %[[ELEMENT_DESC_CAST:.*]] = fir.convert %[[ELEMENT_DESC]] : (!fir.ref>>>) -> !fir.ref> +! CHECK: %{{.*}} = fir.call @_FortranAPointerAllocate(%[[ELEMENT_DESC_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 + program test_alloc use poly diff --git a/flang/test/Lower/call-by-value.f90 b/flang/test/Lower/call-by-value.f90 index 717da1afd99e5..b9f9dc1a24eff 100644 --- a/flang/test/Lower/call-by-value.f90 +++ b/flang/test/Lower/call-by-value.f90 @@ -73,3 +73,20 @@ subroutine test_char_value(x) bind(c) character(1), value :: x call internal_call4(x) end + +! CHECK-LABEL: func.func @_QPtest_cptr_value( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref {fir.bindc_name = "x"}) { +! CHECK: %[[VAL_1:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> +! CHECK: %[[VAL_2:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}> +! CHECK: %[[VAL_3:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_2]] : (!fir.ref>, !fir.field) -> !fir.ref +! CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_0]] : (!fir.ref) -> i64 +! CHECK: fir.store %[[VAL_4]] to %[[VAL_3]] : !fir.ref +! CHECK: fir.call @_QPinternal_call5(%[[VAL_1]]) : (!fir.ref>) -> () +! CHECK: return +! CHECK: } + +subroutine test_cptr_value(x) + use iso_c_binding + type(c_ptr), value :: x + call internal_call5(x) +end diff --git a/flang/test/Lower/ext-proc-as-actual-argument-1.f90 b/flang/test/Lower/ext-proc-as-actual-argument-1.f90 index e121a82a3e021..36751b82a1cad 100644 --- a/flang/test/Lower/ext-proc-as-actual-argument-1.f90 +++ b/flang/test/Lower/ext-proc-as-actual-argument-1.f90 @@ -13,7 +13,7 @@ ! CHECK: return ! CHECK-LABEL: func @_QPext_func( -! CEHCK: %[[ARG_0:.*]]: !fir.ref>, %[[ARG_1:.*]]: index) -> !fir.boxchar<1> { +! CHECK: %[[ARG_0:.*]]: !fir.ref>, %[[ARG_1:.*]]: index) -> !fir.boxchar<1> { program m external :: ext_func call sub(ext_func) diff --git a/flang/test/Lower/ext-proc-as-actual-argument-2.f90 b/flang/test/Lower/ext-proc-as-actual-argument-2.f90 index 8c04e8617f49e..5b6d0aad308a7 100644 --- a/flang/test/Lower/ext-proc-as-actual-argument-2.f90 +++ b/flang/test/Lower/ext-proc-as-actual-argument-2.f90 @@ -13,7 +13,7 @@ ! CHECK: return ! CHECK-LABEL: func @_QPext_func( -! CEHCK: %[[ARG_0:.*]]: !fir.ref>, %[[ARG_1:.*]]: index) -> !fir.boxchar<1> { +! CHECK: %[[ARG_0:.*]]: !fir.ref>, %[[ARG_1:.*]]: index) -> !fir.boxchar<1> { program m external :: ext_func call sub(ext_func) diff --git a/flang/test/Lower/fail_image.f90 b/flang/test/Lower/fail_image.f90 index 9da162faca248..f2b54e7c2d560 100644 --- a/flang/test/Lower/fail_image.f90 +++ b/flang/test/Lower/fail_image.f90 @@ -14,7 +14,7 @@ subroutine fail_image_test(fail) ! CHECK: ^[[BB2]]: ! CHECK-NEXT: br ^[[BB3:.*]] ! CHECK-NEXT: ^[[BB3]] -! CEHCK-NEXT: return +! CHECK-NEXT: return return end subroutine ! CHECK-LABEL: func private @_FortranAFailImageStatement() -> none attributes {fir.runtime} diff --git a/flang/test/Lower/fast-math-arithmetic.f90 b/flang/test/Lower/fast-math-arithmetic.f90 new file mode 100644 index 0000000000000..cc7a7dcf210e3 --- /dev/null +++ b/flang/test/Lower/fast-math-arithmetic.f90 @@ -0,0 +1,13 @@ +! RUN: %flang_fc1 -emit-fir -ffp-contract=fast %s -o - 2>&1 | FileCheck --check-prefixes=CONTRACT,ALL %s +! RUN: %flang_fc1 -emit-fir -menable-no-infs %s -o - 2>&1 | FileCheck --check-prefixes=NINF,ALL %s + +! ALL-LABEL: func.func @_QPtest +subroutine test(x) + real x +! CONTRACT: arith.mulf{{.*}}, {{.*}} fastmath<[[ATTRS:contract]]> : f32 +! NINF: arith.mulf{{.*}}, {{.*}} fastmath<[[ATTRS:ninf]]> : f32 +! ALL: arith.divf{{.*}}, {{.*}} fastmath<[[ATTRS]]> : f32 +! ALL: arith.addf{{.*}}, {{.*}} fastmath<[[ATTRS]]> : f32 +! ALL: arith.subf{{.*}}, {{.*}} fastmath<[[ATTRS]]> : f32 + x = x * x + x / x - x +end subroutine test diff --git a/flang/test/Lower/math-lowering.f90 b/flang/test/Lower/math-lowering.f90 index 7d9bf7c0c2a86..82dfaf4fc7301 100644 --- a/flang/test/Lower/math-lowering.f90 +++ b/flang/test/Lower/math-lowering.f90 @@ -109,8 +109,8 @@ function test_real4(x) end function ! ALL-LABEL: @_QPtest_real4 -! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 -! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! FAST: {{%[A-Za-z0-9._]+}} = llvm.intr.round({{%[A-Za-z0-9._]+}}) : (f32) -> f32 +! RELAXED: {{%[A-Za-z0-9._]+}} = llvm.intr.round({{%[A-Za-z0-9._]+}}) : (f32) -> f32 ! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @llvm.round.f32({{%[A-Za-z0-9._]+}}) : (f32) -> f32 function test_real8(x) @@ -119,8 +119,8 @@ function test_real8(x) end function ! ALL-LABEL: @_QPtest_real8 -! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 -! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! FAST: {{%[A-Za-z0-9._]+}} = llvm.intr.round({{%[A-Za-z0-9._]+}}) : (f64) -> f64 +! RELAXED: {{%[A-Za-z0-9._]+}} = llvm.intr.round({{%[A-Za-z0-9._]+}}) : (f64) -> f64 ! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @llvm.round.f64({{%[A-Za-z0-9._]+}}) : (f64) -> f64 function test_real10(x) @@ -129,8 +129,8 @@ function test_real10(x) end function ! ALL-LABEL: @_QPtest_real10 -! FAST: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f80) -> f80 -! RELAXED: {{%[A-Za-z0-9._]+}} = "llvm.intr.round"({{%[A-Za-z0-9._]+}}) : (f80) -> f80 +! FAST: {{%[A-Za-z0-9._]+}} = llvm.intr.round({{%[A-Za-z0-9._]+}}) : (f80) -> f80 +! RELAXED: {{%[A-Za-z0-9._]+}} = llvm.intr.round({{%[A-Za-z0-9._]+}}) : (f80) -> f80 ! PRECISE: {{%[A-Za-z0-9._]+}} = fir.call @llvm.round.f80({{%[A-Za-z0-9._]+}}) : (f80) -> f80 ! TODO: wait until fp128 is supported well in llvm.round diff --git a/flang/test/Lower/nullify-polymoprhic.f90 b/flang/test/Lower/nullify-polymorphic.f90 similarity index 100% rename from flang/test/Lower/nullify-polymoprhic.f90 rename to flang/test/Lower/nullify-polymorphic.f90 diff --git a/flang/test/Lower/polymorphic.f90 b/flang/test/Lower/polymorphic.f90 index 6177845af8f34..d828c0c209349 100644 --- a/flang/test/Lower/polymorphic.f90 +++ b/flang/test/Lower/polymorphic.f90 @@ -49,4 +49,31 @@ subroutine check() ! CHECK: %[[BOX2:.*]] = fir.embox %[[DT2]] : (!fir.ref>) -> !fir.class> ! CHECK: %[[CLASS2:.*]] = fir.convert %[[BOX2]] : (!fir.class>) -> !fir.class> ! CHECK: fir.call @_QMpolymorphic_testPprint(%[[CLASS2]]) : (!fir.class>) -> () + + subroutine test_allocate_unlimited_polymorphic_non_derived() + class(*), pointer :: u + allocate(integer::u) + end subroutine + +! CHECK-LABEL: test_allocate_unlimited_polymorphic_non_derived +! CHECK-NOT: _FortranAPointerNullifyDerived +! CHECK: fir.call @_FortranAPointerAllocate + + function test_fct_ret_class() + class(p1), pointer :: test_fct_ret_class + end function + + subroutine call_fct() + class(p1), pointer :: p + p => test_fct_ret_class() + end subroutine + +! CHECK-LABEL: func.func @_QMpolymorphic_testPtest_fct_ret_class() -> !fir.class>> +! CHECK: return %{{.*}} : !fir.class>> + +! CHECK-lABEL: func.func @_QMpolymorphic_testPcall_fct() +! CHECK: %[[RESULT:.*]] = fir.alloca !fir.class>> {bindc_name = ".result"} +! CHECK: %[[CALL_RES:.*]] = fir.call @_QMpolymorphic_testPtest_fct_ret_class() : () -> !fir.class>> +! CHECK: fir.save_result %[[CALL_RES]] to %[[RESULT]] : !fir.class>>, !fir.ref>>> + end module diff --git a/flang/test/Semantics/atomic11.f90 b/flang/test/Semantics/atomic11.f90 index d46c7af1b03b5..1c50825e5541f 100644 --- a/flang/test/Semantics/atomic11.f90 +++ b/flang/test/Semantics/atomic11.f90 @@ -1,10 +1,9 @@ ! RUN: %python %S/test_errors.py %s %flang_fc1 -! XFAIL: * ! This test checks for semantic errors in atomic_xor subroutine calls based on ! the interface defined in section 16.9.30 of the Fortran 2018 standard. program test_atomic_xor - use iso_fortran_env, only: atomic_int_kind + use iso_fortran_env, only: atomic_int_kind, atomic_logical_kind implicit none integer(kind=atomic_int_kind) :: scalar_coarray[*], non_scalar_coarray(10)[*], val, non_coarray @@ -13,6 +12,7 @@ program test_atomic_xor integer(kind=1) :: kind1_coarray[*] real :: non_integer_coarray[*] logical :: non_integer + logical(atomic_logical_kind) :: atomic_logical[*] !___ standard-conforming calls ___ call atomic_xor(scalar_coarray, val) @@ -27,13 +27,16 @@ program test_atomic_xor !___ non-standard-conforming calls ___ - !ERROR: 'atom=' argument must be a scalar coarray for intrinsic 'atomic_xor' + !ERROR: 'atom=' argument must be a scalar coarray or coindexed object for intrinsic 'atomic_xor' call atomic_xor(non_scalar_coarray, val) - !ERROR: 'atom=' argument must be a coarray or a coindexed object for intrinsic 'atomic_xor' + !ERROR: 'atom=' argument must be a scalar coarray or coindexed object for intrinsic 'atomic_xor' + call atomic_xor(non_scalar_coarray[1], val) + + !ERROR: 'atom=' argument must be a scalar coarray or coindexed object for intrinsic 'atomic_xor' call atomic_xor(non_coarray, val) - !ERROR: 'atom=' argument must be a coarray or a coindexed object for intrinsic 'atomic_xor' + !ERROR: 'atom=' argument must be a scalar coarray or coindexed object for intrinsic 'atomic_xor' call atomic_xor(array, val) !ERROR: Actual argument for 'atom=' must have kind=atomic_int_kind, but is 'INTEGER(4)' @@ -45,6 +48,9 @@ program test_atomic_xor !ERROR: Actual argument for 'atom=' has bad type 'REAL(4)' call atomic_xor(non_integer_coarray, val) + !ERROR: Actual argument for 'atom=' has bad type 'LOGICAL(8)' + call atomic_xor(atomic_logical, val) + !ERROR: 'value=' argument has unacceptable rank 1 call atomic_xor(scalar_coarray, array) @@ -57,9 +63,11 @@ program test_atomic_xor !ERROR: 'stat=' argument has unacceptable rank 1 call atomic_xor(scalar_coarray, val, status_array) + !ERROR: 'stat' argument to 'atomic_xor' may not be a coindexed object call atomic_xor(scalar_coarray, val, coindexed_status[1]) - !ERROR: Actual argument associated with INTENT(OUT) dummy argument 'stat=' must be definable + !ERROR: Actual argument associated with INTENT(OUT) dummy argument 'stat=' is not definable + !BECAUSE: '1_4' is not a variable or pointer call atomic_xor(scalar_coarray, val, 1) !ERROR: missing mandatory 'atom=' argument diff --git a/flang/test/lib/Analysis/AliasAnalysis/CMakeLists.txt b/flang/test/lib/Analysis/AliasAnalysis/CMakeLists.txt new file mode 100644 index 0000000000000..c4b3838c9a23e --- /dev/null +++ b/flang/test/lib/Analysis/AliasAnalysis/CMakeLists.txt @@ -0,0 +1,29 @@ +# Exclude tests from libMLIR.so +add_flang_library(FIRTestAnalysis + TestAliasAnalysis.cpp + + DEPENDS + FIRDialect + FIRBuilder + FIRSupport + FIRTransforms + FIRAnalysis + ${dialect_libs} + + LINK_LIBS + FIRDialect + FIRBuilder + FIRSupport + FIRTransforms + FIRAnalysis + ${dialect_libs} + MLIRFuncDialect + MLIRLLVMDialect + MLIRAnalysis + MLIRTestAnalysis + ) + +target_include_directories(FIRTestAnalysis + PRIVATE + ${MLIR_MAIN_SRC_DIR}/.. + ) \ No newline at end of file diff --git a/flang/test/lib/Analysis/AliasAnalysis/TestAliasAnalysis.cpp b/flang/test/lib/Analysis/AliasAnalysis/TestAliasAnalysis.cpp new file mode 100644 index 0000000000000..39aaf8fba180a --- /dev/null +++ b/flang/test/lib/Analysis/AliasAnalysis/TestAliasAnalysis.cpp @@ -0,0 +1,72 @@ +//===- TestAliasAnalysis.cpp - Test FIR lias analysis -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/test/lib/Analysis/TestAliasAnalysis.h" +#include "mlir/Analysis/AliasAnalysis.h" +#include "mlir/Pass/Pass.h" +#include "flang/Optimizer/Analysis/AliasAnalysis.h" + +using namespace mlir; + +namespace { + +//===----------------------------------------------------------------------===// +// Testing AliasResult +//===----------------------------------------------------------------------===// + +struct TestFIRAliasAnalysisPass + : public test::TestAliasAnalysisBase, + PassWrapper> { + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestFIRAliasAnalysisPass) + + StringRef getArgument() const final { return "test-fir-alias-analysis"; } + StringRef getDescription() const final { + return "Test alias analysis results."; + } + void runOnOperation() override { + mlir::AliasAnalysis aliasAnalysis(getOperation()); + aliasAnalysis.addAnalysisImplementation(fir::AliasAnalysis()); + runAliasAnalysisOnOperation(getOperation(), aliasAnalysis); + } +}; + +//===----------------------------------------------------------------------===// +// Testing ModRefResult +//===----------------------------------------------------------------------===// + +struct TestFIRAliasAnalysisModRefPass + : public test::TestAliasAnalysisModRefBase, + PassWrapper> { + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestFIRAliasAnalysisModRefPass) + + StringRef getArgument() const final { + return "test-fir-alias-analysis-modref"; + } + StringRef getDescription() const final { + return "Test alias analysis ModRef results."; + } + void runOnOperation() override { + mlir::AliasAnalysis aliasAnalysis(getOperation()); + aliasAnalysis.addAnalysisImplementation(fir::AliasAnalysis()); + runAliasAnalysisOnOperation(getOperation(), aliasAnalysis); + } +}; +} // namespace + +//===----------------------------------------------------------------------===// +// Pass Registration +//===----------------------------------------------------------------------===// + +namespace fir { +namespace test { +void registerTestFIRAliasAnalysisPass() { + PassRegistration(); + PassRegistration(); +} +} // namespace test +} // namespace fir \ No newline at end of file diff --git a/flang/test/lib/Analysis/AliasAnalysis/alias-analysis-1.fir b/flang/test/lib/Analysis/AliasAnalysis/alias-analysis-1.fir new file mode 100644 index 0000000000000..4ed492ebae2e4 --- /dev/null +++ b/flang/test/lib/Analysis/AliasAnalysis/alias-analysis-1.fir @@ -0,0 +1,21 @@ +// RUN: fir-opt %s -pass-pipeline='builtin.module(func.func(test-fir-alias-analysis))' -split-input-file 2>&1 | FileCheck %s + +// CHECK-LABEL: Testing : "_QPtest" +// CHECK-DAG: alloca_1#0 <-> address_of#0: MayAlias +func.func @_QPtest(%arg1: !fir.ref) { + %c1_i32 = arith.constant 1 : i32 + %0 = fir.alloca () -> () {test.ptr = "alloca_1"} + %1 = fir.address_of(@_QPf) {test.ptr = "address_of"} : () -> i32 + %2 = fir.convert %1 : (() -> i32) -> (() -> ()) + %4 = fir.convert %0 : (!fir.ref<() -> ()>) -> !fir.llvm_ptr<() -> ()> + fir.store %2 to %4 : !fir.llvm_ptr<() -> ()> + %6 = fir.load %0 : !fir.ref<() -> ()> + fir.call @_QPs(%6) : (() -> ()) -> () + return +} + +// ----- +func.func private @_QPs(%arg0: () -> ()) + +// ----- +func.func private @_QPf() -> i32 diff --git a/flang/test/lib/Analysis/CMakeLists.txt b/flang/test/lib/Analysis/CMakeLists.txt new file mode 100644 index 0000000000000..2b313d6c615aa --- /dev/null +++ b/flang/test/lib/Analysis/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(AliasAnalysis) diff --git a/flang/test/lib/CMakeLists.txt b/flang/test/lib/CMakeLists.txt new file mode 100644 index 0000000000000..fc6ef10fab1f5 --- /dev/null +++ b/flang/test/lib/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(Analysis) diff --git a/flang/test/lib/lit.local.cfg b/flang/test/lib/lit.local.cfg new file mode 100644 index 0000000000000..9832f42447387 --- /dev/null +++ b/flang/test/lib/lit.local.cfg @@ -0,0 +1,7 @@ + +# Excluding .cpp file from the extensions since from this level down they are used for the development +config.suffixes = ['.c', '.f', '.F', '.ff', '.FOR', '.for', '.f77', '.f90', '.F90', + '.ff90', '.f95', '.F95', '.ff95', '.fpp', '.FPP', '.cuf' + '.CUF', '.f18', '.F18', '.f03', '.F03', '.f08', '.F08', + '.ll', '.fir', '.mlir'] + diff --git a/flang/tools/fir-opt/CMakeLists.txt b/flang/tools/fir-opt/CMakeLists.txt index adbdb23739dd0..1914c370a407e 100644 --- a/flang/tools/fir-opt/CMakeLists.txt +++ b/flang/tools/fir-opt/CMakeLists.txt @@ -2,12 +2,20 @@ add_flang_tool(fir-opt fir-opt.cpp) llvm_update_compile_flags(fir-opt) get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) +if(FLANG_INCLUDE_TESTS) + set(test_libs + FIRTestAnalysis + ) +endif() + target_link_libraries(fir-opt PRIVATE FIRDialect FIRSupport FIRTransforms FIRCodeGen HLFIRDialect + FIRAnalysis + ${test_libs} ${dialect_libs} # TODO: these should be transitive dependencies from a target providing diff --git a/flang/tools/fir-opt/fir-opt.cpp b/flang/tools/fir-opt/fir-opt.cpp index dc3a0a4b84764..a35960be75e6b 100644 --- a/flang/tools/fir-opt/fir-opt.cpp +++ b/flang/tools/fir-opt/fir-opt.cpp @@ -17,11 +17,19 @@ #include "flang/Optimizer/Transforms/Passes.h" using namespace mlir; +namespace fir { +namespace test { +void registerTestFIRAliasAnalysisPass(); +} // namespace test +} // namespace fir int main(int argc, char **argv) { fir::support::registerMLIRPassesForFortranTools(); fir::registerOptCodeGenPasses(); fir::registerOptTransformPasses(); +#ifdef FLANG_INCLUDE_TESTS + fir::test::registerTestFIRAliasAnalysisPass(); +#endif DialectRegistry registry; fir::support::registerDialects(registry); return failed(MlirOptMain(argc, argv, "FIR modular optimizer driver\n", diff --git a/flang/tools/flang-driver/driver.cpp b/flang/tools/flang-driver/driver.cpp index 28a8db2584b5c..e4880b62ee857 100644 --- a/flang/tools/flang-driver/driver.cpp +++ b/flang/tools/flang-driver/driver.cpp @@ -72,8 +72,8 @@ static int executeFC1Tool(llvm::SmallVectorImpl &argV) { return 1; } -static void ExpandResponseFiles( - llvm::StringSaver &saver, llvm::SmallVectorImpl &args) { +static void ExpandResponseFiles(llvm::StringSaver &saver, + llvm::SmallVectorImpl &args) { // We're defaulting to the GNU syntax, since we don't have a CL mode. llvm::cl::TokenizerCallback tokenizer = &llvm::cl::TokenizeGNUCommandLine; llvm::cl::ExpansionContext ExpCtx(saver.getAllocator(), tokenizer); @@ -96,8 +96,8 @@ int main(int argc, const char **argv) { ExpandResponseFiles(saver, args); // Check if flang-new is in the frontend mode - auto firstArg = std::find_if( - args.begin() + 1, args.end(), [](const char *a) { return a != nullptr; }); + auto firstArg = std::find_if(args.begin() + 1, args.end(), + [](const char *a) { return a != nullptr; }); if (firstArg != args.end()) { if (llvm::StringRef(args[1]).startswith("-cc1")) { llvm::errs() << "error: unknown integrated tool '" << args[1] << "'. " @@ -127,7 +127,8 @@ int main(int argc, const char **argv) { // Prepare the driver clang::driver::Driver theDriver(driverPath, - llvm::sys::getDefaultTargetTriple(), diags, "flang LLVM compiler"); + llvm::sys::getDefaultTargetTriple(), diags, + "flang LLVM compiler"); theDriver.setTargetAndMode(targetandMode); std::unique_ptr c( theDriver.BuildCompilation(args)); diff --git a/flang/unittests/Frontend/CMakeLists.txt b/flang/unittests/Frontend/CMakeLists.txt index 739412c7888c4..0a05b3ffd743e 100644 --- a/flang/unittests/Frontend/CMakeLists.txt +++ b/flang/unittests/Frontend/CMakeLists.txt @@ -12,6 +12,7 @@ target_link_libraries(FlangFrontendTests clangBasic flangFrontend flangFrontendTool + FortranLower FortranParser FortranSemantics FortranCommon diff --git a/flang/unittests/Frontend/FrontendActionTest.cpp b/flang/unittests/Frontend/FrontendActionTest.cpp index 9b289523e761b..f2727656b5eb0 100644 --- a/flang/unittests/Frontend/FrontendActionTest.cpp +++ b/flang/unittests/Frontend/FrontendActionTest.cpp @@ -178,6 +178,11 @@ TEST_F(FrontendActionTest, EmitLLVM) { compInst.getInvocation().getTargetOpts().triple = llvm::Triple::normalize(llvm::sys::getDefaultTargetTriple()); + // Initialise LLVM backend + llvm::InitializeAllTargets(); + llvm::InitializeAllTargetMCs(); + llvm::InitializeAllAsmPrinters(); + // Set-up the output stream. We are using output buffer wrapped as an output // stream, as opposed to an actual file (or a file descriptor). llvm::SmallVector outputFileBuffer; diff --git a/flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp b/flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp index 83d3defd3d067..9defe496b9c05 100644 --- a/flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp +++ b/flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp @@ -528,3 +528,58 @@ TEST_F(FIRBuilderTest, getBaseTypeOf) { EXPECT_TRUE(fir::isDerivedWithLenParameters(array)); } } + +TEST_F(FIRBuilderTest, genArithFastMath) { + auto builder = getBuilder(); + auto ctx = builder.getContext(); + auto loc = builder.getUnknownLoc(); + + auto realTy = mlir::FloatType::getF32(ctx); + auto arg = builder.create(loc, realTy); + + // Test that FastMathFlags is 'none' by default. + mlir::Operation *op1 = builder.create(loc, arg, arg); + auto op1_fmi = + mlir::dyn_cast_or_null(op1); + EXPECT_TRUE(op1_fmi); + auto op1_fmf = op1_fmi.getFastMathFlagsAttr().getValue(); + EXPECT_EQ(op1_fmf, arith::FastMathFlags::none); + + // Test that the builder is copied properly. + fir::FirOpBuilder builder_copy(builder); + + arith::FastMathFlags FMF1 = + arith::FastMathFlags::contract | arith::FastMathFlags::reassoc; + builder.setFastMathFlags(FMF1); + arith::FastMathFlags FMF2 = + arith::FastMathFlags::nnan | arith::FastMathFlags::ninf; + builder_copy.setFastMathFlags(FMF2); + + // Modifying FastMathFlags for the copy must not affect the original builder. + mlir::Operation *op2 = builder.create(loc, arg, arg); + auto op2_fmi = + mlir::dyn_cast_or_null(op2); + EXPECT_TRUE(op2_fmi); + auto op2_fmf = op2_fmi.getFastMathFlagsAttr().getValue(); + EXPECT_EQ(op2_fmf, FMF1); + + // Modifying FastMathFlags for the original builder must not affect the copy. + mlir::Operation *op3 = + builder_copy.create(loc, arg, arg); + auto op3_fmi = + mlir::dyn_cast_or_null(op3); + EXPECT_TRUE(op3_fmi); + auto op3_fmf = op3_fmi.getFastMathFlagsAttr().getValue(); + EXPECT_EQ(op3_fmf, FMF2); + + // Test that the builder copy inherits FastMathFlags from the original. + fir::FirOpBuilder builder_copy2(builder); + + mlir::Operation *op4 = + builder_copy2.create(loc, arg, arg); + auto op4_fmi = + mlir::dyn_cast_or_null(op4); + EXPECT_TRUE(op4_fmi); + auto op4_fmf = op4_fmi.getFastMathFlagsAttr().getValue(); + EXPECT_EQ(op4_fmf, FMF1); +} diff --git a/flang/unittests/Optimizer/FIRContextTest.cpp b/flang/unittests/Optimizer/FIRContextTest.cpp index 5976f2c4979f3..7e1b97bbbd4f2 100644 --- a/flang/unittests/Optimizer/FIRContextTest.cpp +++ b/flang/unittests/Optimizer/FIRContextTest.cpp @@ -7,11 +7,11 @@ //===----------------------------------------------------------------------===// #include "flang/Optimizer/Support/FIRContext.h" -#include "flang/Optimizer/Support/KindMapping.h" +#include "gtest/gtest.h" #include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/BuiltinOps.h" +#include "flang/Optimizer/Support/KindMapping.h" #include "llvm/Support/Host.h" -#include "gtest/gtest.h" #include using namespace fir; diff --git a/flang/unittests/Runtime/Time.cpp b/flang/unittests/Runtime/Time.cpp index 479f82ffe524c..ceccb4a70805c 100644 --- a/flang/unittests/Runtime/Time.cpp +++ b/flang/unittests/Runtime/Time.cpp @@ -166,4 +166,3 @@ TEST(TimeIntrinsics, DateAndTime) { EXPECT_LE(minutes, 59); } } - diff --git a/libc/cmake/modules/LLVMLibCTestRules.cmake b/libc/cmake/modules/LLVMLibCTestRules.cmake index 131bbad7c9c64..5eb3d4e67b78d 100644 --- a/libc/cmake/modules/LLVMLibCTestRules.cmake +++ b/libc/cmake/modules/LLVMLibCTestRules.cmake @@ -436,24 +436,6 @@ function(add_integration_test test_name) libc.utils.IntegrationTest.test) list(REMOVE_DUPLICATES fq_deps_list) - # We don't want memory functions to be dependencies on integration tests. - # Memory functions should be tested using unittests. The main reason - # however is that compiler codegen can emit calls to memory functions. So, - # we add them explicitly to the integration test libc.a (see below). Adding - # explicit deps on the memory functions can potentially cause duplicate - # symbol errors. - set(memory_funcs "bcmp;bzero;memcmp;memcpy;memmove;memset") - foreach(dep IN LISTS fq_deps_list) - get_target_property(name ${dep} ENTRYPOINT_NAME) - if(NOT name) - continue() - endif() - list(FIND memory_funcs ${name} loc) - if(${loc} GREATER_EQUAL 0) - message(FATAL_ERROR "Memory function ${name} cannot be a dependency " - "for integration tests.") - endif() - endforeach() # TODO: Instead of gathering internal object files from entrypoints, # collect the object files with public names of entrypoints. get_object_files_for_test( diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 7b0fb53451004..2544ff2a5e40a 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -376,6 +376,7 @@ if(LLVM_LIBC_FULL_BUILD) libc.src.stdio.ferror libc.src.stdio.ferror_unlocked libc.src.stdio.fgetc + libc.src.stdio.fgetc_unlocked libc.src.stdio.fgets libc.src.stdio.fflush libc.src.stdio.fopen @@ -385,17 +386,23 @@ if(LLVM_LIBC_FULL_BUILD) libc.src.stdio.fread libc.src.stdio.fread_unlocked libc.src.stdio.fseek + libc.src.stdio.ftell libc.src.stdio.funlockfile libc.src.stdio.fwrite libc.src.stdio.fwrite_unlocked libc.src.stdio.fprintf + libc.src.stdio.getc + libc.src.stdio.getc_unlocked libc.src.stdio.printf libc.src.stdio.putc libc.src.stdio.putchar libc.src.stdio.puts + libc.src.stdio.setbuf + libc.src.stdio.setvbuf libc.src.stdio.stderr libc.src.stdio.stdin libc.src.stdio.stdout + libc.src.stdio.ungetc # stdlib.h entrypoints libc.src.stdlib._Exit diff --git a/libc/docs/api_test.rst b/libc/docs/api_test.rst index b63adb3e64574..e39d506c3a92f 100644 --- a/libc/docs/api_test.rst +++ b/libc/docs/api_test.rst @@ -1,5 +1,8 @@ +.. _api_test: + +======== API Test -===================== +======== The implementation of libc-project is unique because our public C header files are generated using information from ground truth captured in TableGen files. Unit tests only exercise the internal C++ implementations and don't ensure the diff --git a/libc/docs/build_and_test.rst b/libc/docs/build_and_test.rst new file mode 100644 index 0000000000000..423481ecc3a59 --- /dev/null +++ b/libc/docs/build_and_test.rst @@ -0,0 +1,43 @@ +.. _build_and_test: + +============================= +Building and Testing the libc +============================= + +The libc can be built and tested in two different modes: + +#. **The overlay mode** - In this mode, one uses the static archive from LLVM's + libc along with the system libc. See :ref:`overlay_mode` for more details + on building and using the libc in this mode. You can only run the libc + unittests in this mode. To run them, one simply does: + + .. code-block:: sh + + $> ninja check-libc + + Note that, unittests for only those functions which are part of the overlay + static archive will be run with the above command. + +#. **The full build mode** - In this mode, the libc is used as the only libc + for the user's application. See :ref:`fullbuild_mode` for more details on + building and using the libc in this mode. Once configured for a full libc + build, you can run three kinds of tests: + + #. Unit tests - You can run unittests by the command: + + .. code-block:: sh + + $> ninja check-libc + + #. Integration tests - You can run integration tests by the command: + + .. code-block:: sh + + $> ninja libc-integration-tests + + #. API verification test - See :ref:`api_test` for more information about + the API test. It can be run by the command: + + .. code-block:: sh + + $> ninja libc-api-test diff --git a/libc/docs/clang_tidy_checks.rst b/libc/docs/clang_tidy_checks.rst index 01480672c465b..67ab5fc65c902 100644 --- a/libc/docs/clang_tidy_checks.rst +++ b/libc/docs/clang_tidy_checks.rst @@ -1,3 +1,5 @@ +.. _clang_tidy_checks: + LLVM libc clang-tidy checks =========================== These are the clang-tidy checks designed to help enforce implementation diff --git a/libc/docs/build_system.rst b/libc/docs/cmake_build_rules.rst similarity index 91% rename from libc/docs/build_system.rst rename to libc/docs/cmake_build_rules.rst index b55f92a2e7bfc..dfa9f7a6d7d41 100644 --- a/libc/docs/build_system.rst +++ b/libc/docs/cmake_build_rules.rst @@ -1,5 +1,8 @@ -LLVM libc build rules -===================== +.. _cmake_build_rules: + +=========================== +The libc CMake build system +=========================== At the cost of verbosity, we want to keep the build system of LLVM libc as simple as possible. We also want to be highly modular with our build diff --git a/libc/docs/code_style.rst b/libc/docs/code_style.rst new file mode 100644 index 0000000000000..9efacc4bbece3 --- /dev/null +++ b/libc/docs/code_style.rst @@ -0,0 +1,22 @@ +.. _code_style: + +=================== +The libc code style +=================== + +For the large part, the libc project follows the general `coding standards of +the LLVM project `_. The libc +project differs from that standard with respect to the naming style. The +differences are as follows: + +#. **Non-const variables** - This includes function arguments, struct and + class data members, non-const globals and local variables. They all use the + ``snake_case`` style. +#. **const and constexpr variables** - They use the capitlized + ``SNAKE_CASE`` irrespective of whether they are local or global. +#. **Function and methods** - They use the ``snake_case`` style like the + non-const variables. +#. **Internal type names** - These are types which are interal to the libc + implementation. They use the `CaptilizedCamelCase` style. +#. **Public names** - These are the names as prescribed by the standards and + will follow the style as prescribed by the standards. diff --git a/libc/docs/contributing.rst b/libc/docs/contributing.rst new file mode 100644 index 0000000000000..65ba9a4079704 --- /dev/null +++ b/libc/docs/contributing.rst @@ -0,0 +1,56 @@ +.. _contributing: + +================================ +Contributing to the libc Project +================================ + +LLVM's libc is being developed as part of the LLVM project so contributions +to the libc project should also follow the general LLVM +`contribution guidelines `_. Below is +a list of open projects that one can start with: + +#. **Cleanup code-style** - The libc project follows the general + `LLVM style `_ but differs in a + few aspects: We use ``snake_case`` for non-constant variable and function + names,``CamelCase`` for internal type names (those which are not defined in a + public header), and ``CAPITILIZED_SNAKE_CASE`` for constants. When we started + working on the project, we started using the general LLVM style for + everything. However, for a short period, we switched to the style that is + currently followed by the `LLD project `_. + But, considering that we implement a lot of functions and types whose names + are prescribed by the standards, we have settled on the style described above. + However, we have not switched over to this style in all parts of the ``libc`` + directory. So, a simple but mechanical project would be to move the parts + following the old styles to the new style. + +#. **Integrating with the rest of the LLVM project** - There are two parts to + this project: + + #. One is about adding CMake facilities to optionally link the libc's overlay + static archive (see :ref:`overlay_mode`) with other LLVM tools/executables. + #. The other is about putting plumbing in place to release the overlay static + archive (see :ref:`overlay_mode`) as part of the LLVM binary releases. + +#. **Implement Linux syscall wrappers** - A large portion of the POSIX API can + be implemented as syscall wrappers on Linux. A good number have already been + implemented but many more are yet to be implemented. So, a project of medium + complexity would be to implement syscall wrappers which have not yet been + implemented. + +#. **Add a better random number generator** - The current random number + generator has a very small range. This has to be improved or switched over + to a fast random number generator with a large range. + +#. **Update the clang-tidy lint rules and use them in the build and/or CI** - + Currently, the :ref:`clang_tidy_checks` have gone stale and are mostly unused + by the developers and on the CI builders. This project is about updating + them and reintegrating them back with the build and running them on the + CI builders. + +#. **double and higher precision math functions** - These are under active + developement but you can take a shot at those not yet implemented. See + :ref:`math` for more information. + +#. **Contribute a new OS/Architecture port** - You can contribute a new + operating system or target architecture port. See :ref:`porting` for more + informaton. diff --git a/libc/docs/date_and_time.rst b/libc/docs/date_and_time.rst index 9439da26e0d21..cadeabcb49478 100644 --- a/libc/docs/date_and_time.rst +++ b/libc/docs/date_and_time.rst @@ -8,8 +8,7 @@ Date and Time Functions Source location --------------- -- The main source for string functions is located at: - ``libc/src/time`` +- The main source for time functions is located at: ``libc/src/time`` --------------------- Implementation Status diff --git a/libc/docs/developer_guides.rst b/libc/docs/developer_guides.rst new file mode 100644 index 0000000000000..e7e05e1cc0348 --- /dev/null +++ b/libc/docs/developer_guides.rst @@ -0,0 +1,21 @@ +.. _developer_guides: + +================ +Developer Guides +================ + +Navigate to the links below for information on the respective topics: + +.. toctree:: + + code_style + source_tree_layout + entrypoints + cmake_build_rules + clang_tidy_checks + fuzzing + ground_truth_specification + header_generation + implementation_standard + api_test + mechanics_of_public_api diff --git a/libc/docs/entrypoints.rst b/libc/docs/entrypoints.rst index dfc0aeca6fea4..3c24a922a2947 100644 --- a/libc/docs/entrypoints.rst +++ b/libc/docs/entrypoints.rst @@ -1,3 +1,5 @@ +.. _entrypoints: + Entrypoints in LLVM libc ------------------------ diff --git a/libc/docs/fullbuild_mode.rst b/libc/docs/fullbuild_mode.rst index 635dc8b66417f..a5c51aa4cb079 100644 --- a/libc/docs/fullbuild_mode.rst +++ b/libc/docs/fullbuild_mode.rst @@ -4,4 +4,80 @@ Fullbuild Mode ============== -Coming soon, stay tuned! +.. contents:: Table of Contents + :depth: 4 + :local: + +The *fullbuild* mode of LLVM's libc is the mode in which it is being used as +the only libc (as opposed to the :ref:`overlay_mode` in which it is used along +with the system libc.) Hence, to start using it that way, you will have to build +and install the ``libc.a`` static archive from LLVM's libc as well as the +start-up objects and public headers provided by it. In this document, we will +present a way to set up a *sysroot* (see the documentation of the ``--sysroot`` +option here: ``_) +which includes not only the components of LLVM's libc, but also full a LLVM only +toolchain consisting of the `clang `_ compiler, the +`lld `_ linker and the +`compiler-rt `_ runtime libraries. LLVM's libc +is not yet complete enough to allow using and linking a C++ application against +a C++ standard library (like libc++). Hence, we do not include a C++ standard +library in the sysroot. + +.. note:: When the libc is complete enough, we should be able to include + `libc++ `_, libcxx-abi and libunwind in the + toolchain and use them to build and link C++ applications. + +Building the full libc +====================== + +LLVM's libc uses `Scudo `_ +as its allocator. So, when building the full libc, we should specify that we +want Scudo to be included in the libc. Since the libc currently only supports +static linking, we also specify that we do not want a shared library for Scudo. +A typical ``cmake`` configure step will look like this: + +.. code-block:: sh + + $> cd llvm-project # The llvm-project checkout + $> mkdir build + $> cd build + $> cmake ../llvm -G Ninja \ + -DLLVM_ENABLE_PROJECTS="clang;libc;lld;compiler-rt" \ + -DCMAKE_BUILD_TYPE= \ # Select build type + -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \ + -DLLVM_LIBC_FULL_BUILD=ON \ # We want the full libc + -DLLVM_LIBC_INCLUDE_SCUDO=ON \ # Include Scudo in the libc + -DCOMPILER_RT_BUILD_SCUDO_STANDALONE_WITH_LLVM_LIBC=ON \ + -DCOMPILER_RT_BUILD_GWP_ASAN=OFF \ + -DCOMPILER_RT_SCUDO_STANDALONE_BUILD_SHARED=OFF \ + -DCMAKE_INSTALL_PREFIX= # Specify a sysroot directory + +Since we want to include ``clang``, ``lld`` and ``compiler-rt`` in our +toolchain, we list them in ``LLVM_ENABLE_PROJECTS`` along with ``libc``. The +option ``CMAKE_INSTALL_PREFIX`` specifies the sysroot directory in which to +install the new toolchain. + +Installation +============ + +To build and install the libc, clang (and its support libraries and builtins), +lld and compiler-rt, run the following command after the above ``cmake`` +command: + +.. code-block:: sh + + $> ninja install-clang install-builtins install-compiler-rt \ + install-core-resource-headers install-libc install-lld + +Once the above command completes successfully, the ```` directory you +have specified with the CMake configure step above will contain a full LLVM-only +toolchain with which you can build practical/real-world C applications. See +``_ for examples +of how to start using this new toolchain. + +Linux Headers +============= + +If you are using the full libc on Linux, then you will also need to install +Linux headers in your sysroot. It is left to the reader to figure out the best +way to install Linux headers on the system they want to use the full libc on. diff --git a/libc/docs/index.rst b/libc/docs/index.rst index 4a1825b5921c3..c298f00c1e99b 100644 --- a/libc/docs/index.rst +++ b/libc/docs/index.rst @@ -68,16 +68,10 @@ stages there is no ABI stability in any form. :maxdepth: 1 :caption: Development - build_system - clang_tidy_checks - entrypoints - fuzzing - ground_truth_specification - header_generation - implementation_standard - api_test - mechanics_of_public_api - source_layout + build_and_test + developer_guides + porting + contributing .. toctree:: :hidden: @@ -86,4 +80,6 @@ stages there is no ABI stability in any form. Source Code Bug Reports + Discourse + Discord Channel Buildbot diff --git a/libc/docs/math.rst b/libc/docs/math.rst index 329280bb305f7..dd965fe6629ec 100644 --- a/libc/docs/math.rst +++ b/libc/docs/math.rst @@ -1,3 +1,5 @@ +.. _math: + ============== Math Functions ============== diff --git a/libc/docs/overlay_mode.rst b/libc/docs/overlay_mode.rst index 4c5eaf939487c..0039e67f6384e 100644 --- a/libc/docs/overlay_mode.rst +++ b/libc/docs/overlay_mode.rst @@ -59,14 +59,14 @@ can follow up the build step with an install step: $> ninja install-llvmlibc -Building the static archive as part of the runtimes build ---------------------------------------------------------- +Building the static archive as part of the bootstrap build +---------------------------------------------------------- -The runtimes build is a build mode in which runtime components like libc++, +The bootstrap build is a build mode in which runtime components like libc++, libcxx-abi, libc etc. are built using the ToT clang. The idea is that this build produces an in-sync toolchain of compiler + runtime libraries. Such a synchrony is not essential for the libc but can one still build the overlay static archive -as part of the runtimes build if one wants to. The first step is to configure +as part of the bootstrap build if one wants to. The first step is to configure appropriately: .. code-block:: sh diff --git a/libc/docs/porting.rst b/libc/docs/porting.rst new file mode 100644 index 0000000000000..42f7fa33bc334 --- /dev/null +++ b/libc/docs/porting.rst @@ -0,0 +1,120 @@ +.. _porting: + +======================================= +Bringup on a New OS or Architecture +======================================= + +.. contents:: Table of Contents + :depth: 4 + :local: + +CI builders +=========== + +If you are contributing a port for a operating system or architecture which +is not covered by existing CI builders, you will also have to present a plan +for testing and contribute a CI builder. See +`this guide `_ for information +on how to add new builders to the +`LLVM buildbot `_. +You will either have to extend the existing +`Linux script `_ +and/or +`Windows script `_ +or add a new script for your operating system. + +An OS specific config directory +=============================== + +If you are starting to bring up LLVM's libc on a new operating system, the first +step is to add a directory for that OS in the ``libc/config`` directory. Both +`Linux `_ and +`Windows `_, +the two operating systems on which LLVM's libc is being actively developed, +have their own config directory. + +.. note:: Windows development is not as active as the development on Linux. + There is a + `Darwin `_ + config also which is in a similar state as Windows. + +.. note:: LLVM's libc is being brought up on the + `Fuchsia `_ operating system also. However, there is no + config directory for Fuchsia as the bring up is being done in the Fuchsia + source tree. + +The api.td file +--------------- + +If the :ref:`fullbuild_mode` is to be supported on the new operating system, +then a file named ``api.td`` should be added in its config directory. It is +written in the +`LLVM tablegen language `_. +It lists all the relevant macros and type definitions we want in the +public libc header files. See the existing Linux +`api.td `_ +file as an example to prepare the ``api.td`` file for the new operating system. + +.. note:: In future, LLVM tablegen will be replaced with a different DSL to list + config information. + +Architecture Subdirectory +========================= + +There are parts of the libc which are implemented differently for different +architectures. The simplest example of this is the ``syscall`` function and +its internal implementation - its Linux implementation differs for different +architectures. Since a large part of the libc makes use of syscalls (or an +equivalent on non-Linux like platforms), it might be simpler and convenient to +bring up the libc for one architecture at a time. In such cases, wherein the +support surface of LLVM's libc differs for each target architecture, one will +have to add a subdirectory (within the config directory os the operating +system) for each target architecture, and list the relevant config information +separately in those subdirectories. For example, for Linux, the x86_64 and +aarch64 configs are in separate directories, named +`x86_64 `_ +and `aarch64 `_. +The libc CMake machinery looks for subdirectories named after the target +architecture. + +The entrypoints.txt file +======================== + +One of the important pieces of config information is listed in a file named +``entrypoints.txt``. This file lists the targets for the entrypoints (see +:ref:`entrypoints`) you want to include in the static archive of the libc (for +the :ref:`overlay_mode` and/or the :ref:`fullbuild_mode`.) If you are doing an +architecture specific bring up, then an ``entrypoints.txt`` file should be +created in the architecture subdirectory for each architecture. Else, having a +single ``entrypoints.txt`` in the operating system directory is sufficient. + +The Linux config has an ``entrypoint.txt`` for each individual target +architecture separately: `aarch64 `_, +`arm32 `_ and +`x86_64 `_. On the +other hand, the Windows config has a single ``entrypoints.txt`` +`file `_. + +A typical bring up procedure will normally bring up a small group of entrypoints +at a time. The usual practice is to progressively add the targets for those +entrypoints to the ``entrypoints.txt`` file as they are being brought up. The +same is the case if one is implementing a new entrypoint - the target for the +new entrypoint should be added to the relevant ``entrypoints.txt`` file. If +the implementation of the new entrypoint supports multiple operating systems and +target architectures, then multiple ``entrypoints.txt`` files will have to be +updated. + +The headers.txt file +==================== + +Another important piece of config informtion is listed in a file named +``headers.txt``. It lists the targets for the set of public headers that are +provided by the libc. This is relevant only if the libc is to be used in the +:ref:`fullbuild_mode` on the target operating system and architecture. As with +the ``entrypoints.txt`` file, one ``headers.txt`` file should be listed for +each individual target architecture if you are doing an architecture specific +bring up. The Linux config has ``headers.txt`` file listed seperately for the +`aarch64 `_ +config and the +`x86_64 `_ +config. diff --git a/libc/docs/source_layout.rst b/libc/docs/source_tree_layout.rst similarity index 98% rename from libc/docs/source_layout.rst rename to libc/docs/source_tree_layout.rst index 960a060d9b7b7..a0565cb713080 100644 --- a/libc/docs/source_layout.rst +++ b/libc/docs/source_tree_layout.rst @@ -1,3 +1,6 @@ +.. _source_tree_layout: + +============================ LLVM-libc Source Tree Layout ============================ diff --git a/libc/examples/README.md b/libc/examples/README.md new file mode 100644 index 0000000000000..36b886090c6c1 --- /dev/null +++ b/libc/examples/README.md @@ -0,0 +1,79 @@ +Examples +======== +This directory contains a few example programs which illustrate how one can set +up their own projects to use LLVM's libc, either as an overlay or as the only +libc in their projects. See the +[the usage mode document](https://libc.llvm.org/usage_modes.html) for more +information about the different modes in which one can build and use the libc. + +Building the Examples +===================== +Each example has its own directory which contain the source code and the CMake +build set up. To build an example, create a directory named `build` in the +example's directory: + +```bash +cd +mkdir build +cd build +``` + +Each example can be built to use the libc in either +[the overlay mode](https://libc.llvm.org/overlay_mode.html) or the +[full build mode](https://libc.llvm.org/fullbuild_mode.html). The CMake +configure step differs slightly depending on the mode you want to use the libc +in. + +Building against an overlay libc +-------------------------------- + +Before you can link an example against the overlay libc, you will have to +install it. See [the documentation of the overlay mode](https://libc.llvm.org/overlay_mode.html) +to learn how to install the libc's overlay static archive named `libllvmlibc.a`. +Once installed, to build an example against it, you have specify the directory +in which the static archive is installed with the option +`LIBC_OVERLAY_ARCHIVE_DIR`: + +```bash +cmake ../ -G \ + -DLIBC_OVERLAY_ARCHIVE_DIR= +``` + +Next, if `Ninja` is used for ``, you can build the example as follows: + +```bash +ninja +``` + +Building against a full libc +---------------------------- + +Before you can link an example against the full libc, you will have to first +install it. See [the documentation of the full build mode](https://libc.llvm.org/fullbuild_mode.html) +to learn how to install a full libc along with the other LLVM toolchain pieces +like `clang`, `lld` and `compiler-rt`. The CMake build for the examples will +assume that you have all of these components installed in a special sysroot +(see decription of the `--sysroot` option +[here](https://gcc.gnu.org/onlinedocs/gcc/Directory-Options.html).) Once you +have installed them, you have to inform CMake that we are linking against the +full libc as follows: + +```bash +cmake ../ -G -DLIBC_FULLBUILD=ON \ + -DCMAKE_SYSROOT= \ + -DCMAKE_C_COMPILER=/bin/clang \ + -DCMAKE_TRY_COMPILE_TARGET_TYPE=STATIC_LIBRARY +``` + +`` is the path to the sysroot directory you have set up while +installing the full libc. The option +`-DCMAKE_TRY_COMPILE_TARGET_TYPE=STATIC_LIBRARY` tells CMake to not attempt +linking full executables against shared libraries. We have to use this as LLVM's +libc does not yet have support for shared libraries and dynamic linking. After +the above `cmake` command, assuming `Ninja` was used for ``, you can build +the example as follows: + + +```bash +ninja +``` diff --git a/libc/examples/examples.cmake b/libc/examples/examples.cmake new file mode 100644 index 0000000000000..81e99e3cbede9 --- /dev/null +++ b/libc/examples/examples.cmake @@ -0,0 +1,16 @@ +function(add_example name) + add_executable( + ${name} + ${ARGN} + ) + + if(LIBC_FULLBUILD) + target_link_options(${name} PRIVATE -static -rtlib=compiler-rt -fuse-ld=lld) + elseif(LIBC_OVERLAY_ARCHIVE_DIR) + target_link_directories(${name} PRIVATE ${LIBC_OVERLAY_ARCHIVE_DIR}) + target_link_options(${name} PRIVATE -l:libllvmlibc.a) + else() + message(FATAL_ERROR "Either LIBC_FULLBUILD should be on or " + "LIBC_OVERLAY_ARCHIVE_DIR should be set.") + endif() +endfunction() diff --git a/libc/examples/hello_world/.gitignore b/libc/examples/hello_world/.gitignore new file mode 100644 index 0000000000000..0bda4771f3297 --- /dev/null +++ b/libc/examples/hello_world/.gitignore @@ -0,0 +1,10 @@ +#==============================================================================# +# This file specifies intentionally untracked files that git should ignore. +# See: http://www.kernel.org/pub/software/scm/git/docs/gitignore.html +# +# This file is intentionally different from the output of `git svn show-ignore`, +# as most of those are useless. +#==============================================================================# + +# Nested build directory +/build* diff --git a/libc/examples/hello_world/CMakeLists.txt b/libc/examples/hello_world/CMakeLists.txt new file mode 100644 index 0000000000000..89bf35c0340d0 --- /dev/null +++ b/libc/examples/hello_world/CMakeLists.txt @@ -0,0 +1,8 @@ +project(hello_world) +cmake_minimum_required(VERSION 3.13.4) +include(../examples.cmake) + +add_example( + hello_world + hello_world.c +) diff --git a/libc/examples/hello_world/hello_world.c b/libc/examples/hello_world/hello_world.c new file mode 100644 index 0000000000000..d065bdbb5a5bf --- /dev/null +++ b/libc/examples/hello_world/hello_world.c @@ -0,0 +1,14 @@ +//===-- libc example - hello world ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +int main() { + printf("Hello, World\n"); + return 0; +} diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt index ac4e57572a696..f9d3ee5164f95 100644 --- a/libc/include/CMakeLists.txt +++ b/libc/include/CMakeLists.txt @@ -162,6 +162,7 @@ add_gen_header( .llvm-libc-macros.stdio_macros .llvm-libc-types.cookie_io_functions_t .llvm-libc-types.FILE + .llvm-libc-types.off_t .llvm-libc-types.size_t ) diff --git a/libc/spec/gnu_ext.td b/libc/spec/gnu_ext.td index ebadbb6f12c4c..239790bb9bdc5 100644 --- a/libc/spec/gnu_ext.td +++ b/libc/spec/gnu_ext.td @@ -137,6 +137,11 @@ def GnuExtensions : StandardSpec<"GNUExtensions"> { ArgSpec, ArgSpec] >, + FunctionSpec< + "fgetc_unlocked", + RetValSpec, + [ArgSpec] + >, ] >; diff --git a/libc/spec/posix.td b/libc/spec/posix.td index 43b3319059c98..0333c6e47c4d3 100644 --- a/libc/spec/posix.td +++ b/libc/spec/posix.td @@ -1011,6 +1011,11 @@ def POSIX : StandardSpec<"POSIX"> { RetValSpec, [ArgSpec] >, + FunctionSpec< + "getc_unlocked", + RetValSpec, + [ArgSpec] + >, ] >; diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index a7a9df46747f4..4d1295313e5ae 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -563,6 +563,16 @@ def StdC : StandardSpec<"stdc"> { [ArgSpec, ArgSpec] >, + FunctionSpec< + "ftell", + RetValSpec, + [ArgSpec] + >, + FunctionSpec< + "getc", + RetValSpec, + [ArgSpec] + >, FunctionSpec< "putc", RetValSpec, @@ -613,6 +623,16 @@ def StdC : StandardSpec<"stdc"> { RetValSpec, [ArgSpec] >, + FunctionSpec< + "setbuf", + RetValSpec, + [ArgSpec, ArgSpec] + >, + FunctionSpec< + "setvbuf", + RetValSpec, + [ArgSpec, ArgSpec, ArgSpec, ArgSpec] + >, FunctionSpec< "sprintf", RetValSpec, @@ -641,6 +661,11 @@ def StdC : StandardSpec<"stdc"> { ArgSpec, ArgSpec] >, + FunctionSpec< + "ungetc", + RetValSpec, + [ArgSpec, ArgSpec] + >, ], [ ObjectSpec< diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt index 83495359d7068..aebc9b4a2f89f 100644 --- a/libc/src/__support/CMakeLists.txt +++ b/libc/src/__support/CMakeLists.txt @@ -10,6 +10,8 @@ add_header_library( builtin_wrappers HDRS builtin_wrappers.h + DEPENDS + libc.src.__support.CPP.type_traits ) add_header_library( diff --git a/libc/src/__support/File/file.cpp b/libc/src/__support/File/file.cpp index b5d00b7a876da..edb2467929f2d 100644 --- a/libc/src/__support/File/file.cpp +++ b/libc/src/__support/File/file.cpp @@ -203,10 +203,14 @@ size_t File::read_unlocked(void *data, size_t len) { for (size_t i = 0; i < available_data; ++i) dataref[i] = bufref[i + pos]; read_limit = pos = 0; // Reset the pointers. + // Update the dataref to reflect that fact that we have already + // copied |available_data| into |data|. + dataref = cpp::span(dataref.data() + available_data, + dataref.size() - available_data); size_t to_fetch = len - available_data; if (to_fetch > bufsize) { - size_t fetched_size = platform_read(this, data, to_fetch); + size_t fetched_size = platform_read(this, dataref.data(), to_fetch); if (fetched_size < to_fetch) { if (errno == 0) eof = true; @@ -233,6 +237,44 @@ size_t File::read_unlocked(void *data, size_t len) { return transfer_size + available_data; } +int File::ungetc_unlocked(int c) { + // There is no meaning to unget if: + // 1. You are trying to push back EOF. + // 2. Read operations are not allowed on this file. + // 3. The previous operation was a write operation. + if (c == EOF || !read_allowed() || (prev_op == FileOp::WRITE)) + return EOF; + + cpp::span bufref(static_cast(buf), bufsize); + if (read_limit == 0) { + // If |read_limit| is zero, it can mean three things: + // a. This file was just created. + // b. The previous operation was a seek operation. + // c. The previous operation was a read operation which emptied + // the buffer. + // For all the above cases, we simply write |c| at the beginning + // of the buffer and bump |read_limit|. Note that |pos| will also + // be zero in this case, so we don't need to adjust it. + bufref[0] = static_cast(c); + ++read_limit; + } else { + // If |read_limit| is non-zero, it means that there is data in the buffer + // from a previous read operation. Which would also mean that |pos| is not + // zero. So, we decrement |pos| and write |c| in to the buffer at the new + // |pos|. If too many ungetc operations are performed without reads, it + // can lead to (pos == 0 but read_limit != 0). We will just error out in + // such a case. + if (pos == 0) + return EOF; + --pos; + bufref[pos] = static_cast(c); + } + + eof = false; // There is atleast one character that can be read now. + err = false; // This operation was a success. + return c; +} + int File::seek(long offset, int whence) { FileLock lock(this); if (prev_op == FileOp::WRITE && pos > 0) { @@ -252,7 +294,28 @@ int File::seek(long offset, int whence) { // Reset the eof flag as a seek might move the file positon to some place // readable. eof = false; - return platform_seek(this, offset, whence); + long platform_pos = platform_seek(this, offset, whence); + if (platform_pos >= 0) + return 0; + else + return -1; +} + +long File::tell() { + FileLock lock(this); + long platform_offset; + if (eof) + platform_offset = platform_seek(this, 0, SEEK_END); + else + platform_offset = platform_seek(this, 0, SEEK_CUR); + if (platform_offset < 0) + return -1; + if (prev_op == FileOp::READ) + return platform_offset - (read_limit - pos); + else if (prev_op == FileOp::WRITE) + return platform_offset + pos; + else + return platform_offset; } int File::flush_unlocked() { @@ -288,12 +351,49 @@ int File::close() { return 0; } -void File::set_buffer(void *buffer, size_t size, bool owned) { - if (own_buf) - free(buf); - buf = static_cast(buffer); - bufsize = size; - own_buf = owned; +int File::set_buffer(void *buffer, size_t size, int buffer_mode) { + // We do not need to lock the file as this method should be called before + // other operations are performed on the file. + + if (buffer != nullptr && size == 0) + return EINVAL; + + switch (buffer_mode) { + case _IOFBF: + case _IOLBF: + case _IONBF: + break; + default: + return EINVAL; + } + + if (buffer == nullptr && size != 0 && buffer_mode != _IONBF) { + // We exclude the case of buffer_mode == _IONBF in this branch + // because we don't need to allocate buffer in such a case. + if (own_buf) { + buf = realloc(buf, size); + } else { + buf = malloc(size); + own_buf = true; + } + bufsize = size; + // TODO: Handle allocation failures. + } else { + if (own_buf) + free(buf); + if (buffer_mode != _IONBF) { + buf = static_cast(buffer); + bufsize = size; + } else { + // We don't need any buffer. + buf = nullptr; + bufsize = 0; + } + own_buf = false; + } + bufmode = buffer_mode; + adjust_buf(); + return 0; } File::ModeFlags File::mode_flags(const char *mode) { diff --git a/libc/src/__support/File/file.h b/libc/src/__support/File/file.h index 74655b1301b85..d182ea4c81b4d 100644 --- a/libc/src/__support/File/file.h +++ b/libc/src/__support/File/file.h @@ -28,7 +28,9 @@ class File { using WriteFunc = size_t(File *, const void *, size_t); using ReadFunc = size_t(File *, void *, size_t); - using SeekFunc = int(File *, long, int); + // The SeekFunc is expected to return the current offset of the external + // file position indicator. + using SeekFunc = long(File *, long, int); using CloseFunc = int(File *); using FlushFunc = int(File *); @@ -71,6 +73,11 @@ class File { Mutex mutex; + // For files which are readable, we should be able to support one ungetc + // operation even if |buf| is nullptr. So, in the constructor of File, we + // set |buf| to point to this buffer character. + char ungetc_buf; + void *buf; // Pointer to the stream buffer for buffered streams size_t bufsize; // Size of the buffer pointed to by |buf|. @@ -111,13 +118,13 @@ class File { }; protected: - bool write_allowed() const { + constexpr bool write_allowed() const { return mode & (static_cast(OpenMode::WRITE) | static_cast(OpenMode::APPEND) | static_cast(OpenMode::PLUS)); } - bool read_allowed() const { + constexpr bool read_allowed() const { return mode & (static_cast(OpenMode::READ) | static_cast(OpenMode::PLUS)); } @@ -125,15 +132,21 @@ class File { public: // We want this constructor to be constexpr so that global file objects // like stdout do not require invocation of the constructor which can - // potentially lead to static initialization order fiasco. + // potentially lead to static initialization order fiasco. Consequently, + // we will assume that the |buffer| and |buffer_size| argument are + // meaningful - that is, |buffer| is nullptr if and only if |buffer_size| + // is zero. This way, we will not have to employ the semantics of + // the set_buffer method and allocate a buffer. constexpr File(WriteFunc *wf, ReadFunc *rf, SeekFunc *sf, CloseFunc *cf, FlushFunc *ff, void *buffer, size_t buffer_size, int buffer_mode, bool owned, ModeFlags modeflags) : platform_write(wf), platform_read(rf), platform_seek(sf), platform_close(cf), platform_flush(ff), mutex(false, false, false), - buf(buffer), bufsize(buffer_size), bufmode(buffer_mode), own_buf(owned), - mode(modeflags), pos(0), prev_op(FileOp::NONE), read_limit(0), - eof(false), err(false) {} + ungetc_buf(0), buf(buffer), bufsize(buffer_size), bufmode(buffer_mode), + own_buf(owned), mode(modeflags), pos(0), prev_op(FileOp::NONE), + read_limit(0), eof(false), err(false) { + adjust_buf(); + } // This function helps initialize the various fields of the File data // structure after a allocating memory for it via a call to malloc. @@ -156,6 +169,8 @@ class File { f->prev_op = FileOp::NONE; f->read_limit = f->pos = 0; f->eof = f->err = false; + + f->adjust_buf(); } // Buffered write of |len| bytes from |data| without the file lock. @@ -178,6 +193,8 @@ class File { int seek(long offset, int whence); + long tell(); + // If buffer has data written to it, flush it out. Does nothing if the // buffer is currently being used as a read buffer. int flush() { @@ -187,10 +204,25 @@ class File { int flush_unlocked(); + // Returns EOF on error and keeps the file unchanged. + int ungetc_unlocked(int c); + + int ungetc(int c) { + FileLock lock(this); + return ungetc_unlocked(c); + } + // Sets the internal buffer to |buffer| with buffering mode |mode|. - // |size| is the size of |buffer|. This new |buffer| is owned by the - // stream only if |owned| is true. - void set_buffer(void *buffer, size_t size, bool owned); + // |size| is the size of |buffer|. If |size| is non-zero, but |buffer| + // is nullptr, then a buffer owned by this file will be allocated. + // Else, |buffer| will not be owned by this file. + // + // Will return zero on success, or an error value on failure. Will fail + // if: + // 1. |buffer| is not a nullptr but |size| is zero. + // 2. |buffer_mode| is not one of _IOLBF, IOFBF or _IONBF. + // In both the above cases, error returned in EINVAL. + int set_buffer(void *buffer, size_t size, int buffer_mode); // Closes the file stream and frees up all resources owned by it. int close(); @@ -227,12 +259,38 @@ class File { size_t write_unlocked_lbf(const uint8_t *data, size_t len); size_t write_unlocked_fbf(const uint8_t *data, size_t len); size_t write_unlocked_nbf(const uint8_t *data, size_t len); + + constexpr void adjust_buf() { + if (read_allowed() && (buf == nullptr || bufsize == 0)) { + // We should allow atleast one ungetc operation. + // This might give an impression that a buffer will be used even when + // the user does not want a buffer. But, that will not be the case. + // For reading, the buffering does not come into play. For writing, let + // us take up the three different kinds of buffering separately: + // 1. If user wants _IOFBF but gives a zero buffer, buffering still + // happens in the OS layer until the user flushes. So, from the user's + // point of view, this single byte buffer does not affect their + // experience. + // 2. If user wants _IOLBF but gives a zero buffer, the reasoning is + // very similar to the _IOFBF case. + // 3. If user wants _IONBF, then the buffer is ignored for writing. + // So, all of the above cases, having a single ungetc buffer does not + // affect the behavior experienced by the user. + buf = &ungetc_buf; + bufsize = 1; + own_buf = false; // We shouldn't call free on |buf| when closing the file. + } + } }; // The implementaiton of this function is provided by the platfrom_file // library. File *openfile(const char *path, const char *mode); +// The platform_file library should implement it if it relevant for that +// platform. +int get_fileno(File *f); + extern File *stdin; extern File *stdout; extern File *stderr; diff --git a/libc/src/__support/File/linux_file.cpp b/libc/src/__support/File/linux_file.cpp index c6c93c8ef5086..09a880743baf4 100644 --- a/libc/src/__support/File/linux_file.cpp +++ b/libc/src/__support/File/linux_file.cpp @@ -22,7 +22,7 @@ namespace { size_t write_func(File *, const void *, size_t); size_t read_func(File *, void *, size_t); -int seek_func(File *, long, int); +long seek_func(File *, long, int); int close_func(File *); int flush_func(File *); @@ -71,10 +71,12 @@ size_t read_func(File *f, void *buf, size_t size) { return ret; } -int seek_func(File *f, long offset, int whence) { +long seek_func(File *f, long offset, int whence) { auto *lf = reinterpret_cast(f); + long result; #ifdef SYS_lseek long ret = __llvm_libc::syscall_impl(SYS_lseek, lf->get_fd(), offset, whence); + result = ret; #elif defined(SYS__llseek) long result; long ret = __llvm_libc::syscall_impl(SYS__llseek, lf->get_fd(), offset >> 32, @@ -87,7 +89,7 @@ int seek_func(File *f, long offset, int whence) { errno = -ret; return -1; } - return 0; + return result; } int close_func(File *f) { @@ -164,6 +166,11 @@ File *openfile(const char *path, const char *mode) { return file; } +int get_fileno(File *f) { + auto *lf = reinterpret_cast(f); + return lf->get_fd(); +} + constexpr size_t STDIN_BUFFER_SIZE = 512; char stdin_buffer[STDIN_BUFFER_SIZE]; static LinuxFile StdIn(0, stdin_buffer, STDIN_BUFFER_SIZE, _IOFBF, false, diff --git a/libc/src/__support/builtin_wrappers.h b/libc/src/__support/builtin_wrappers.h index d30feb92f97b9..108100f0880ec 100644 --- a/libc/src/__support/builtin_wrappers.h +++ b/libc/src/__support/builtin_wrappers.h @@ -10,6 +10,8 @@ #ifndef LLVM_LIBC_SRC_SUPPORT_BUILTIN_WRAPPERS_H #define LLVM_LIBC_SRC_SUPPORT_BUILTIN_WRAPPERS_H +#include "src/__support/CPP/type_traits.h" + namespace __llvm_libc { // The following overloads are matched based on what is accepted by @@ -64,6 +66,60 @@ template static inline int unsafe_clz(T val) { return __internal::clz(val); } +// Add with carry +template +inline constexpr cpp::enable_if_t< + cpp::is_integral_v && cpp::is_unsigned_v, T> +add_with_carry(T a, T b, T carry_in, T &carry_out) { + T tmp = a + carry_in; + T sum = b + tmp; + carry_out = (sum < b) || (tmp < a); + return sum; +} + +#if __has_builtin(__builtin_addc) +// https://clang.llvm.org/docs/LanguageExtensions.html#multiprecision-arithmetic-builtins + +template <> +inline unsigned char add_with_carry(unsigned char a, + unsigned char b, + unsigned char carry_in, + unsigned char &carry_out) { + return __builtin_addcb(a, b, carry_in, &carry_out); +} + +template <> +inline unsigned short +add_with_carry(unsigned short a, unsigned short b, + unsigned short carry_in, + unsigned short &carry_out) { + return __builtin_addcs(a, b, carry_in, &carry_out); +} + +template <> +inline unsigned int add_with_carry(unsigned int a, unsigned int b, + unsigned int carry_in, + unsigned int &carry_out) { + return __builtin_addc(a, b, carry_in, &carry_out); +} + +template <> +inline unsigned long add_with_carry(unsigned long a, + unsigned long b, + unsigned long carry_in, + unsigned long &carry_out) { + return __builtin_addcl(a, b, carry_in, &carry_out); +} + +template <> +inline unsigned long long +add_with_carry(unsigned long long a, unsigned long long b, + unsigned long long carry_in, + unsigned long long &carry_out) { + return __builtin_addcll(a, b, carry_in, &carry_out); +} +#endif // __has_builtin(__builtin_addc) + } // namespace __llvm_libc #endif // LLVM_LIBC_SRC_SUPPORT_BUILTIN_WRAPPERS_H diff --git a/libc/src/stdio/CMakeLists.txt b/libc/src/stdio/CMakeLists.txt index 22536a515bd58..d4b39767473e5 100644 --- a/libc/src/stdio/CMakeLists.txt +++ b/libc/src/stdio/CMakeLists.txt @@ -113,6 +113,42 @@ add_entrypoint_object( libc.src.__support.File.platform_file ) +add_entrypoint_object( + fgetc_unlocked + SRCS + fgetc_unlocked.cpp + HDRS + fgetc_unlocked.h + DEPENDS + libc.include.stdio + libc.src.__support.File.file + libc.src.__support.File.platform_file +) + +add_entrypoint_object( + getc + SRCS + getc.cpp + HDRS + getc.h + DEPENDS + libc.include.stdio + libc.src.__support.File.file + libc.src.__support.File.platform_file +) + +add_entrypoint_object( + getc_unlocked + SRCS + getc_unlocked.cpp + HDRS + getc_unlocked.h + DEPENDS + libc.include.stdio + libc.src.__support.File.file + libc.src.__support.File.platform_file +) + add_entrypoint_object( fgets SRCS @@ -282,6 +318,18 @@ add_entrypoint_object( libc.src.__support.File.platform_file ) +add_entrypoint_object( + ungetc + SRCS + ungetc.cpp + HDRS + ungetc.h + DEPENDS + libc.include.stdio + libc.src.__support.File.file + libc.src.__support.File.platform_file +) + add_entrypoint_object( fopencookie SRCS @@ -329,6 +377,32 @@ add_entrypoint_object( libc.src.__support.File.platform_file ) +add_entrypoint_object( + setbuf + SRCS + setbuf.cpp + HDRS + setbuf.h + DEPENDS + libc.include.errno + libc.include.stdio + libc.src.__support.File.file + libc.src.__support.File.platform_file +) + +add_entrypoint_object( + setvbuf + SRCS + setvbuf.cpp + HDRS + setvbuf.h + DEPENDS + libc.include.errno + libc.include.stdio + libc.src.__support.File.file + libc.src.__support.File.platform_file +) + add_entrypoint_object( sprintf SRCS @@ -378,6 +452,18 @@ add_entrypoint_object( libc.src.stdio.printf_core.vfprintf_internal ) +add_entrypoint_object( + ftell + SRCS + ftell.cpp + HDRS + ftell.h + DEPENDS + libc.include.stdio + libc.src.__support.File.file + libc.src.__support.File.platform_file +) + add_entrypoint_object( remove ALIAS diff --git a/libc/src/stdio/fgetc_unlocked.cpp b/libc/src/stdio/fgetc_unlocked.cpp new file mode 100644 index 0000000000000..d61493b5bba10 --- /dev/null +++ b/libc/src/stdio/fgetc_unlocked.cpp @@ -0,0 +1,25 @@ +//===-- Implementation of fgetc_unlocked ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/fgetc_unlocked.h" +#include "src/__support/File/file.h" + +#include + +namespace __llvm_libc { + +LLVM_LIBC_FUNCTION(int, fgetc_unlocked, (::FILE * stream)) { + unsigned char c; + size_t r = + reinterpret_cast<__llvm_libc::File *>(stream)->read_unlocked(&c, 1); + if (r != 1) + return EOF; + return c; +} + +} // namespace __llvm_libc diff --git a/libc/src/stdio/fgetc_unlocked.h b/libc/src/stdio/fgetc_unlocked.h new file mode 100644 index 0000000000000..e374a6d0f6ce1 --- /dev/null +++ b/libc/src/stdio/fgetc_unlocked.h @@ -0,0 +1,20 @@ +//===-- Implementation header of fgetc_unlocked -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_FGETC_UNLOCKED_H +#define LLVM_LIBC_SRC_STDIO_FGETC_UNLOCKED_H + +#include + +namespace __llvm_libc { + +int fgetc_unlocked(::FILE *f); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDIO_FGETC_UNLOCKED_H diff --git a/libc/src/stdio/fopencookie.cpp b/libc/src/stdio/fopencookie.cpp index 6facc969a2e5e..85f6de7595ce9 100644 --- a/libc/src/stdio/fopencookie.cpp +++ b/libc/src/stdio/fopencookie.cpp @@ -39,14 +39,18 @@ size_t read_func(File *f, void *data, size_t size) { reinterpret_cast(data), size); } -int seek_func(File *f, long offset, int whence) { +long seek_func(File *f, long offset, int whence) { auto cookie_file = reinterpret_cast(f); if (cookie_file->ops.seek == nullptr) { errno = EINVAL; return -1; } off64_t offset64 = offset; - return cookie_file->ops.seek(cookie_file->cookie, &offset64, whence); + int result = cookie_file->ops.seek(cookie_file->cookie, &offset64, whence); + if (result == 0) + return offset64; + else + return -1; } int close_func(File *f) { diff --git a/libc/src/stdio/ftell.cpp b/libc/src/stdio/ftell.cpp new file mode 100644 index 0000000000000..40783ac58fca4 --- /dev/null +++ b/libc/src/stdio/ftell.cpp @@ -0,0 +1,20 @@ +//===-- Implementation of ftell -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/ftell.h" +#include "src/__support/File/file.h" + +#include + +namespace __llvm_libc { + +LLVM_LIBC_FUNCTION(long, ftell, (::FILE * stream)) { + return reinterpret_cast<__llvm_libc::File *>(stream)->tell(); +} + +} // namespace __llvm_libc diff --git a/libc/src/stdio/ftell.h b/libc/src/stdio/ftell.h new file mode 100644 index 0000000000000..95d4494709181 --- /dev/null +++ b/libc/src/stdio/ftell.h @@ -0,0 +1,20 @@ +//===-- Implementation header of ftell --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_FTELL_H +#define LLVM_LIBC_SRC_STDIO_FTELL_H + +#include + +namespace __llvm_libc { + +long ftell(::FILE *f); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDIO_FTELL_H diff --git a/libc/src/stdio/getc.cpp b/libc/src/stdio/getc.cpp new file mode 100644 index 0000000000000..406e83f2b3627 --- /dev/null +++ b/libc/src/stdio/getc.cpp @@ -0,0 +1,24 @@ +//===-- Implementation of getc --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/getc.h" +#include "src/__support/File/file.h" + +#include + +namespace __llvm_libc { + +LLVM_LIBC_FUNCTION(int, getc, (::FILE * stream)) { + unsigned char c; + size_t r = reinterpret_cast<__llvm_libc::File *>(stream)->read(&c, 1); + if (r != 1) + return EOF; + return c; +} + +} // namespace __llvm_libc diff --git a/libc/src/stdio/getc.h b/libc/src/stdio/getc.h new file mode 100644 index 0000000000000..b5de6a7585807 --- /dev/null +++ b/libc/src/stdio/getc.h @@ -0,0 +1,20 @@ +//===-- Implementation header of getc ---------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_GETC_H +#define LLVM_LIBC_SRC_STDIO_GETC_H + +#include + +namespace __llvm_libc { + +int getc(::FILE *f); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDIO_GETC_H diff --git a/libc/src/stdio/getc_unlocked.cpp b/libc/src/stdio/getc_unlocked.cpp new file mode 100644 index 0000000000000..48adba5ff4118 --- /dev/null +++ b/libc/src/stdio/getc_unlocked.cpp @@ -0,0 +1,25 @@ +//===-- Implementation of getc_unlocked ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/getc_unlocked.h" +#include "src/__support/File/file.h" + +#include + +namespace __llvm_libc { + +LLVM_LIBC_FUNCTION(int, getc_unlocked, (::FILE * stream)) { + unsigned char c; + size_t r = + reinterpret_cast<__llvm_libc::File *>(stream)->read_unlocked(&c, 1); + if (r != 1) + return EOF; + return c; +} + +} // namespace __llvm_libc diff --git a/libc/src/stdio/getc_unlocked.h b/libc/src/stdio/getc_unlocked.h new file mode 100644 index 0000000000000..b318dfc934e90 --- /dev/null +++ b/libc/src/stdio/getc_unlocked.h @@ -0,0 +1,20 @@ +//===-- Implementation header of getc_unlocked ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_GETC_UNLOCKED_H +#define LLVM_LIBC_SRC_STDIO_GETC_UNLOCKED_H + +#include + +namespace __llvm_libc { + +int getc_unlocked(::FILE *f); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDIO_GETC_UNLOCKED_H diff --git a/libc/src/stdio/printf_core/parser.cpp b/libc/src/stdio/printf_core/parser.cpp index 38d2e1e69cf92..6a27c1c854824 100644 --- a/libc/src/stdio/printf_core/parser.cpp +++ b/libc/src/stdio/printf_core/parser.cpp @@ -151,7 +151,11 @@ FormatSection Parser::get_next_section() { section.has_conv = false; break; } - ++cur_pos; + // If the end of the format section is on the '\0'. This means we need to + // not advance the cur_pos. + if (str[cur_pos] != '\0') + ++cur_pos; + } else { // raw section section.has_conv = false; @@ -372,7 +376,10 @@ Parser::TypeDesc Parser::get_type_desc(size_t index) { if (conv_index == index) return conv_size; } - ++local_pos; + // If the end of the format section is on the '\0'. This means we need to + // not advance the local_pos. + if (str[local_pos] != '\0') + ++local_pos; } // If there is no size for the requested index, then just guess that it's an diff --git a/libc/src/stdio/scanf_core/CMakeLists.txt b/libc/src/stdio/scanf_core/CMakeLists.txt index 3941d40a838c7..940e9f0d083f3 100644 --- a/libc/src/stdio/scanf_core/CMakeLists.txt +++ b/libc/src/stdio/scanf_core/CMakeLists.txt @@ -23,3 +23,69 @@ add_object_library( libc.src.__support.CPP.bitset libc.src.__support.CPP.string_view ) + +if(NOT (TARGET libc.src.__support.File.file)) + # Not all platforms have a file implementation. If file is unvailable, + # then we must skip all the parts that need file. + return() +endif() + +add_object_library( + scanf_main + SRCS + scanf_main.cpp + HDRS + scanf_main.h + DEPENDS + .parser + .reader + .converter + .core_structs + libc.src.__support.arg_list +) + +add_object_library( + string_reader + SRCS + string_reader.cpp + HDRS + string_reader.h +) + +add_object_library( + file_reader + SRCS + file_reader.cpp + HDRS + file_reader.h + DEPENDS + libc.src.__support.File.file +) + +add_object_library( + reader + SRCS + reader.cpp + HDRS + reader.h + DEPENDS + .string_reader + .file_reader +) + +add_object_library( + converter + SRCS + converter.cpp + string_converter.cpp + HDRS + converter.h + string_converter.h + DEPENDS + .reader + .core_structs + libc.src.__support.ctype_utils + libc.src.__support.CPP.bitset + libc.src.__support.CPP.string_view + libc.src.__support.CPP.limits +) diff --git a/libc/src/stdio/scanf_core/converter.cpp b/libc/src/stdio/scanf_core/converter.cpp new file mode 100644 index 0000000000000..3cfa8758349ec --- /dev/null +++ b/libc/src/stdio/scanf_core/converter.cpp @@ -0,0 +1,98 @@ +//===-- Format specifier converter implmentation for scanf -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/scanf_core/converter.h" + +#include "src/__support/ctype_utils.h" +#include "src/stdio/scanf_core/core_structs.h" +#include "src/stdio/scanf_core/reader.h" + +#include "src/stdio/scanf_core/string_converter.h" + +#include + +namespace __llvm_libc { +namespace scanf_core { + +int convert(Reader *reader, const FormatSection &to_conv) { + int ret_val = 0; + switch (to_conv.conv_name) { + case '%': + return raw_match(reader, "%"); + case 's': + ret_val = raw_match(reader, " "); + if (ret_val != READ_OK) + return ret_val; + return convert_string(reader, to_conv); + case 'c': + case '[': + return convert_string(reader, to_conv); + // case 'd': + // case 'i': + // case 'u': + // case 'o': + // case 'x': + // case 'X': + // ret_val = raw_match(reader, " "); + // if (ret_val != READ_OK) + // return ret_val; + // return convert_int(reader, to_conv); + // #ifndef LLVM_LIBC_SCANF_DISABLE_FLOAT + // case 'f': + // case 'F': + // case 'e': + // case 'E': + // case 'a': + // case 'A': + // case 'g': + // case 'G': + // ret_val = raw_match(reader, " "); + // if (ret_val != READ_OK) + // return ret_val; + // return convert_float(reader, to_conv); + // #endif // LLVM_LIBC_SCANF_DISABLE_FLOAT + // #ifndef LLVM_LIBC_SCANF_DISABLE_WRITE_INT + // case 'n': + // return convert_write_int(reader, to_conv); + // #endif // LLVM_LIBC_SCANF_DISABLE_WRITE_INT + // case 'p': + // ret_val = raw_match(reader, " "); + // if (ret_val != READ_OK) + // return ret_val; + // return convert_pointer(reader, to_conv); + default: + return raw_match(reader, to_conv.raw_string); + } + return -1; +} + +// raw_string is assumed to have a positive size. +int raw_match(Reader *reader, cpp::string_view raw_string) { + char cur_char = reader->getc(); + int ret_val = READ_OK; + for (size_t i = 0; i < raw_string.size(); ++i) { + // Any space character matches any number of space characters. + if (internal::isspace(raw_string[i])) { + while (internal::isspace(cur_char)) { + cur_char = reader->getc(); + } + } else { + if (raw_string[i] == cur_char) { + cur_char = reader->getc(); + } else { + ret_val = MATCHING_FAILURE; + break; + } + } + } + reader->ungetc(cur_char); + return ret_val; +} + +} // namespace scanf_core +} // namespace __llvm_libc diff --git a/libc/src/stdio/scanf_core/converter.h b/libc/src/stdio/scanf_core/converter.h new file mode 100644 index 0000000000000..cd91ff66a3aed --- /dev/null +++ b/libc/src/stdio/scanf_core/converter.h @@ -0,0 +1,33 @@ +//===-- Format specifier converter for scanf -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_CONVERTER_H +#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_CONVERTER_H + +#include "src/__support/CPP/string_view.h" +#include "src/stdio/scanf_core/core_structs.h" +#include "src/stdio/scanf_core/reader.h" + +#include + +namespace __llvm_libc { +namespace scanf_core { + +// convert will call a conversion function to convert the FormatSection into +// its string representation, and then that will write the result to the +// reader. +int convert(Reader *reader, const FormatSection &to_conv); + +// raw_match takes a raw string and matches it to the characters obtained from +// the reader. +int raw_match(Reader *reader, cpp::string_view raw_string); + +} // namespace scanf_core +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_CONVERTER_H diff --git a/libc/src/stdio/scanf_core/core_structs.h b/libc/src/stdio/scanf_core/core_structs.h index 213a5e1a2b59a..7f331db362023 100644 --- a/libc/src/stdio/scanf_core/core_structs.h +++ b/libc/src/stdio/scanf_core/core_structs.h @@ -78,7 +78,7 @@ struct FormatSection { enum ErrorCodes : int { // This is the value to be returned by conversions when no error has occurred. - WRITE_OK = 0, + READ_OK = 0, // These are the scanf return values for when an error has occurred. They are // all negative, and should be distinct. FILE_READ_ERROR = -1, diff --git a/libc/src/stdio/scanf_core/file_reader.cpp b/libc/src/stdio/scanf_core/file_reader.cpp new file mode 100644 index 0000000000000..f39c3b9ab8412 --- /dev/null +++ b/libc/src/stdio/scanf_core/file_reader.cpp @@ -0,0 +1,26 @@ +//===-- FILE Reader implementation for scanf --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/scanf_core/file_reader.h" +#include "src/__support/File/file.h" +#include + +namespace __llvm_libc { +namespace scanf_core { + +char FileReader::get_char() { + char tiny_buff = 0; + if (file->read_unlocked(&tiny_buff, 1) != 1) + return 0; + return tiny_buff; +} + +void FileReader::unget_char(char c) { file->ungetc_unlocked(c); } + +} // namespace scanf_core +} // namespace __llvm_libc diff --git a/libc/src/stdio/scanf_core/file_reader.h b/libc/src/stdio/scanf_core/file_reader.h new file mode 100644 index 0000000000000..5e97eb604e66b --- /dev/null +++ b/libc/src/stdio/scanf_core/file_reader.h @@ -0,0 +1,38 @@ +//===-- FILE Reader definition for scanf ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_FILE_READER_H +#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_FILE_READER_H + +#include "src/__support/File/file.h" + +#include +#include + +namespace __llvm_libc { +namespace scanf_core { + +class FileReader { + __llvm_libc::File *file; + +public: + FileReader(::FILE *init_file) { + file = reinterpret_cast<__llvm_libc::File *>(init_file); + file->lock(); + } + + ~FileReader() { file->unlock(); } + + char get_char(); + void unget_char(char c); +}; + +} // namespace scanf_core +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_FILE_READER_H diff --git a/libc/src/stdio/scanf_core/parser.cpp b/libc/src/stdio/scanf_core/parser.cpp index 31dd118ad17a7..76e658e376e05 100644 --- a/libc/src/stdio/scanf_core/parser.cpp +++ b/libc/src/stdio/scanf_core/parser.cpp @@ -74,7 +74,14 @@ FormatSection Parser::get_next_section() { section.output_ptr = GET_ARG_VAL_SIMPLEST(void *, conv_index); } - ++cur_pos; + // If the end of the format section is on the '\0'. This means we need to + // not advance the cur_pos and we should not count this has having a + // conversion. + if (str[cur_pos] != '\0') { + ++cur_pos; + } else { + section.has_conv = false; + } // If the format is a bracketed one, then we need to parse out the insides // of the brackets. diff --git a/libc/src/stdio/scanf_core/reader.cpp b/libc/src/stdio/scanf_core/reader.cpp new file mode 100644 index 0000000000000..0d8d5a30f7c4d --- /dev/null +++ b/libc/src/stdio/scanf_core/reader.cpp @@ -0,0 +1,37 @@ +//===-- Reader definition for scanf -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/scanf_core/reader.h" +#include + +namespace __llvm_libc { +namespace scanf_core { + +char Reader::getc() { + ++cur_chars_read; + if (reader_type == ReaderType::String) { + return string_reader->get_char(); + } else { + return file_reader->get_char(); + } +} + +void Reader::ungetc(char c) { + --cur_chars_read; + if (reader_type == ReaderType::String) { + // The string reader ignores the char c passed to unget since it doesn't + // need to place anything back into a buffer, and modifying the source + // string would be dangerous. + return string_reader->unget_char(); + } else { + return file_reader->unget_char(c); + } +} + +} // namespace scanf_core +} // namespace __llvm_libc diff --git a/libc/src/stdio/scanf_core/reader.h b/libc/src/stdio/scanf_core/reader.h new file mode 100644 index 0000000000000..4ca25cc0d0cab --- /dev/null +++ b/libc/src/stdio/scanf_core/reader.h @@ -0,0 +1,53 @@ +//===-- Reader definition for scanf -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_READER_H +#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_READER_H + +#include "src/stdio/scanf_core/file_reader.h" +#include "src/stdio/scanf_core/string_reader.h" +#include + +namespace __llvm_libc { +namespace scanf_core { + +enum class ReaderType { String, File }; + +class Reader final { + union { + StringReader *string_reader; + FileReader *file_reader; + }; + + const ReaderType reader_type; + + size_t cur_chars_read = 0; + +public: + Reader(StringReader *init_string_reader) + : string_reader(init_string_reader), reader_type(ReaderType::String) {} + + Reader(FileReader *init_file_reader) + : file_reader(init_file_reader), reader_type(ReaderType::File) {} + + // This returns the next character from the input and advances it by one + // character. When it hits the end of the string or file it returns '\0' to + // signal to stop parsing. + char getc(); + + // This moves the input back by one character, placing c into the buffer if + // this is a file reader, else c is ignored. + void ungetc(char c); + + size_t chars_read() { return cur_chars_read; } +}; + +} // namespace scanf_core +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_READER_H diff --git a/libc/src/stdio/scanf_core/scanf_main.cpp b/libc/src/stdio/scanf_core/scanf_main.cpp new file mode 100644 index 0000000000000..fcf7af2083f22 --- /dev/null +++ b/libc/src/stdio/scanf_core/scanf_main.cpp @@ -0,0 +1,47 @@ +//===-- Starting point for scanf --------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/scanf_core/scanf_main.h" + +#include "src/__support/arg_list.h" +#include "src/stdio/scanf_core/converter.h" +#include "src/stdio/scanf_core/core_structs.h" +#include "src/stdio/scanf_core/parser.h" +#include "src/stdio/scanf_core/reader.h" + +#include + +namespace __llvm_libc { +namespace scanf_core { + +int scanf_main(Reader *reader, const char *__restrict str, + internal::ArgList &args) { + Parser parser(str, args); + int ret_val = READ_OK; + int conversions = 0; + for (FormatSection cur_section = parser.get_next_section(); + !cur_section.raw_string.empty() && ret_val == READ_OK; + cur_section = parser.get_next_section()) { + if (cur_section.has_conv) { + ret_val = convert(reader, cur_section); + conversions += ret_val == READ_OK ? 1 : 0; + } else { + ret_val = raw_match(reader, cur_section.raw_string); + } + } + + if (conversions == 0 && ret_val != READ_OK) { + // This is intended to be converted to EOF in the client call to avoid + // including stdio.h in this internal file. + return -1; + } + return conversions; +} + +} // namespace scanf_core +} // namespace __llvm_libc diff --git a/libc/src/stdio/scanf_core/scanf_main.h b/libc/src/stdio/scanf_core/scanf_main.h new file mode 100644 index 0000000000000..d1db46b7c77dc --- /dev/null +++ b/libc/src/stdio/scanf_core/scanf_main.h @@ -0,0 +1,26 @@ +//===-- Starting point for scanf --------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_SCANF_MAIN_H +#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_SCANF_MAIN_H + +#include "src/__support/arg_list.h" +#include "src/stdio/scanf_core/reader.h" + +#include + +namespace __llvm_libc { +namespace scanf_core { + +int scanf_main(Reader *reader, const char *__restrict str, + internal::ArgList &args); + +} // namespace scanf_core +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_SCANF_MAIN_H diff --git a/libc/src/stdio/scanf_core/string_converter.cpp b/libc/src/stdio/scanf_core/string_converter.cpp new file mode 100644 index 0000000000000..bdbb5c87f75e5 --- /dev/null +++ b/libc/src/stdio/scanf_core/string_converter.cpp @@ -0,0 +1,76 @@ +//===-- String type specifier converters for scanf --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/scanf_core/string_converter.h" + +#include "src/__support/CPP/limits.h" +#include "src/__support/ctype_utils.h" +#include "src/stdio/scanf_core/core_structs.h" +#include "src/stdio/scanf_core/reader.h" + +#include + +namespace __llvm_libc { +namespace scanf_core { + +int convert_string(Reader *reader, const FormatSection &to_conv) { + // %s "Matches a sequence of non-white-space characters" + + // %c "Matches a sequence of characters of exactly the number specified by the + // field width (1 if no field width is present in the directive)" + + // %[ "Matches a nonempty sequence of characters from a set of expected + // characters (the scanset)." + size_t max_width = 0; + if (to_conv.max_width > 0) { + max_width = to_conv.max_width; + } else { + if (to_conv.conv_name == 'c') { + max_width = 1; + } else { + max_width = cpp::numeric_limits::max(); + } + } + + char *output = reinterpret_cast(to_conv.output_ptr); + + char cur_char = reader->getc(); + size_t i = 0; + for (; i < max_width && cur_char != '\0'; ++i) { + // If this is %s and we've hit a space, or if this is %[] and we've found + // something not in the scanset. + if ((to_conv.conv_name == 's' && internal::isspace(cur_char)) || + (to_conv.conv_name == '[' && !to_conv.scan_set.test(cur_char))) { + break; + } + // if the NO_WRITE flag is not set, write to the output. + if ((to_conv.flags & NO_WRITE) == 0) + output[i] = cur_char; + cur_char = reader->getc(); + } + + // We always read one more character than will be used, so we have to put the + // last one back. + reader->ungetc(cur_char); + + // If this is %s or %[] + if (to_conv.conv_name != 'c' && (to_conv.flags & NO_WRITE) == 0) { + // Always null terminate the string. This may cause a write to the + // (max_width + 1) byte, which is correct. The max width describes the max + // number of characters read from the input string, and doesn't necessarily + // correspond to the output. + output[i] = '\0'; + } + + if (i == 0) + return MATCHING_FAILURE; + return READ_OK; +} + +} // namespace scanf_core +} // namespace __llvm_libc diff --git a/libc/src/stdio/scanf_core/string_converter.h b/libc/src/stdio/scanf_core/string_converter.h new file mode 100644 index 0000000000000..4113f5cb9a369 --- /dev/null +++ b/libc/src/stdio/scanf_core/string_converter.h @@ -0,0 +1,25 @@ +//===-- String type specifier converters for scanf --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_STRING_CONVERTER_H +#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_STRING_CONVERTER_H + +#include "src/stdio/scanf_core/core_structs.h" +#include "src/stdio/scanf_core/reader.h" + +#include + +namespace __llvm_libc { +namespace scanf_core { + +int convert_string(Reader *reader, const FormatSection &to_conv); + +} // namespace scanf_core +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_STRING_CONVERTER_H diff --git a/libc/src/stdio/scanf_core/string_reader.cpp b/libc/src/stdio/scanf_core/string_reader.cpp new file mode 100644 index 0000000000000..1d728d2b9eb35 --- /dev/null +++ b/libc/src/stdio/scanf_core/string_reader.cpp @@ -0,0 +1,24 @@ +//===-- String Reader implementation for scanf ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/scanf_core/string_reader.h" +#include + +namespace __llvm_libc { +namespace scanf_core { + +char StringReader::get_char() { + char cur_char = string[cur_index]; + ++cur_index; + return cur_char; +} + +void StringReader::unget_char() { --cur_index; } + +} // namespace scanf_core +} // namespace __llvm_libc diff --git a/libc/src/stdio/scanf_core/string_reader.h b/libc/src/stdio/scanf_core/string_reader.h new file mode 100644 index 0000000000000..35550b16c3214 --- /dev/null +++ b/libc/src/stdio/scanf_core/string_reader.h @@ -0,0 +1,33 @@ +//===-- String Reader definition for scanf ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_STRING_READER_H +#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_STRING_READER_H + +#include + +namespace __llvm_libc { +namespace scanf_core { + +class StringReader { + const char *string; + size_t cur_index = 0; + +public: + StringReader(const char *init_string) { string = init_string; } + + ~StringReader() {} + + char get_char(); + void unget_char(); +}; + +} // namespace scanf_core +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_STRING_READER_H diff --git a/libc/src/stdio/setbuf.cpp b/libc/src/stdio/setbuf.cpp new file mode 100644 index 0000000000000..b75963239216b --- /dev/null +++ b/libc/src/stdio/setbuf.cpp @@ -0,0 +1,28 @@ +//===-- Implementation of setbuf ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/setbuf.h" +#include "src/__support/File/file.h" + +#include +#include + +namespace __llvm_libc { + +LLVM_LIBC_FUNCTION(void, setbuf, + (::FILE *__restrict stream, char *__restrict buf)) { + int mode = _IOFBF; + if (buf == nullptr) + mode = _IONBF; + int err = reinterpret_cast<__llvm_libc::File *>(stream)->set_buffer( + buf, BUFSIZ, mode); + if (err != 0) + errno = err; +} + +} // namespace __llvm_libc diff --git a/libc/src/stdio/setbuf.h b/libc/src/stdio/setbuf.h new file mode 100644 index 0000000000000..7a158ac0f173e --- /dev/null +++ b/libc/src/stdio/setbuf.h @@ -0,0 +1,20 @@ +//===-- Implementation header of setbuf -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_SETBUF_H +#define LLVM_LIBC_SRC_STDIO_SETBUF_H + +#include + +namespace __llvm_libc { + +void setbuf(::FILE *__restrict stream, char *__restrict buf); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDIO_SETBUF_H diff --git a/libc/src/stdio/setvbuf.cpp b/libc/src/stdio/setvbuf.cpp new file mode 100644 index 0000000000000..162519fcca36b --- /dev/null +++ b/libc/src/stdio/setvbuf.cpp @@ -0,0 +1,27 @@ +//===-- Implementation of setvbuf -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/setvbuf.h" +#include "src/__support/File/file.h" + +#include +#include + +namespace __llvm_libc { + +LLVM_LIBC_FUNCTION(int, setvbuf, + (::FILE *__restrict stream, char *__restrict buf, int type, + size_t size)) { + int err = reinterpret_cast<__llvm_libc::File *>(stream)->set_buffer(buf, size, + type); + if (err != 0) + errno = err; + return err; +} + +} // namespace __llvm_libc diff --git a/libc/src/stdio/setvbuf.h b/libc/src/stdio/setvbuf.h new file mode 100644 index 0000000000000..bceedd8b44113 --- /dev/null +++ b/libc/src/stdio/setvbuf.h @@ -0,0 +1,21 @@ +//===-- Implementation header of setvbuf ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_SETVBUF_H +#define LLVM_LIBC_SRC_STDIO_SETVBUF_H + +#include + +namespace __llvm_libc { + +int setvbuf(::FILE *__restrict stream, char *__restrict buf, int type, + size_t size); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDIO_SETVBUF_H diff --git a/libc/src/stdio/ungetc.cpp b/libc/src/stdio/ungetc.cpp new file mode 100644 index 0000000000000..de6ce0ba0683d --- /dev/null +++ b/libc/src/stdio/ungetc.cpp @@ -0,0 +1,20 @@ +//===-- Implementation of ungetc ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/ungetc.h" +#include "src/__support/File/file.h" + +#include + +namespace __llvm_libc { + +LLVM_LIBC_FUNCTION(int, ungetc, (int c, ::FILE *stream)) { + return reinterpret_cast<__llvm_libc::File *>(stream)->ungetc(c); +} + +} // namespace __llvm_libc diff --git a/libc/src/stdio/ungetc.h b/libc/src/stdio/ungetc.h new file mode 100644 index 0000000000000..b5b7acb5962c1 --- /dev/null +++ b/libc/src/stdio/ungetc.h @@ -0,0 +1,20 @@ +//===-- Implementation header of ungetc -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_UNGETC_H +#define LLVM_LIBC_SRC_STDIO_UNGETC_H + +#include + +namespace __llvm_libc { + +int ungetc(int c, ::FILE *stream); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDIO_UNGETC_H diff --git a/libc/test/src/CMakeLists.txt b/libc/test/src/CMakeLists.txt index bb5bbb1e5b34d..c4f0aab5a3808 100644 --- a/libc/test/src/CMakeLists.txt +++ b/libc/test/src/CMakeLists.txt @@ -88,42 +88,28 @@ add_custom_command( ${LIBC_TARGET} ) -add_executable( - libc-api-test - EXCLUDE_FROM_ALL - ${public_test} +add_custom_target(libc-api-test) +set( + allocator_entrypoints + libc.src.stdlib.malloc + libc.src.stdlib.calloc + libc.src.stdlib.realloc + libc.src.stdlib.aligned_alloc + libc.src.stdlib.free ) -# Blank out default include directories to prevent accidentally including -# system headers or our own internal headers. -set_target_properties( - libc-api-test - PROPERTIES - INCLUDE_DIRECTORIES "" +set(api-test-entrypoints ${TARGET_LLVMLIBC_ENTRYPOINTS}) +list(REMOVE_ITEM api-test-entrypoints ${allocator_entrypoints}) +add_integration_test( + api-test + SUITE + libc-api-test + SRCS + ${public_test} + LOADER + libc.loader.linux.crt1 + DEPENDS + ${api-test-entrypoints} ) -target_link_libraries(libc-api-test ${LIBC_TARGET}) - -# Only include we need is the include for cpp::IsSame and our generated -# public headers. -target_include_directories( - libc-api-test BEFORE - PRIVATE - "${LIBC_SOURCE_DIR}/src/__support/CPP" - "${LIBC_BUILD_DIR}/include" -) -target_compile_options( - libc-api-test - PRIVATE - -ffreestanding -) -target_link_options( - libc-api-test - PRIVATE "-nostdlib" -) -set(library_files) -foreach(library_name IN LISTS "llvmlibc") - get_target_property(library_file ${library_name} "LIBRARY_FILE") - list(APPEND library_files ${library_file}) -endforeach() if(COMPILER_RESOURCE_DIR AND LLVM_LIBC_ENABLE_LINTING) add_custom_target( @@ -143,8 +129,3 @@ if(COMPILER_RESOURCE_DIR AND LLVM_LIBC_ENABLE_LINTING) ) add_dependencies(libc-api-test libc-api-test-tidy) endif() - -target_link_libraries(libc-api-test - PRIVATE - ${library_files} -) diff --git a/libc/test/src/__support/File/file_test.cpp b/libc/test/src/__support/File/file_test.cpp index 271f2dce4ef55..bdf8de3639eb6 100644 --- a/libc/test/src/__support/File/file_test.cpp +++ b/libc/test/src/__support/File/file_test.cpp @@ -26,7 +26,7 @@ class StringFile : public __llvm_libc::File { static size_t str_read(__llvm_libc::File *f, void *data, size_t len); static size_t str_write(__llvm_libc::File *f, const void *data, size_t len); - static int str_seek(__llvm_libc::File *f, long offset, int whence); + static long str_seek(__llvm_libc::File *f, long offset, int whence); static int str_close(__llvm_libc::File *f) { return 0; } static int str_flush(__llvm_libc::File *f) { return 0; } @@ -94,7 +94,7 @@ size_t StringFile::str_write(__llvm_libc::File *f, const void *data, return i; } -int StringFile::str_seek(__llvm_libc::File *f, long offset, int whence) { +long StringFile::str_seek(__llvm_libc::File *f, long offset, int whence) { StringFile *sf = static_cast(f); if (whence == SEEK_SET) sf->pos = offset; @@ -102,7 +102,7 @@ int StringFile::str_seek(__llvm_libc::File *f, long offset, int whence) { sf->pos += offset; if (whence == SEEK_END) sf->pos = SIZE + offset; - return 0; + return sf->pos; } StringFile *new_string_file(char *buffer, size_t buflen, int bufmode, diff --git a/libc/test/src/stdio/CMakeLists.txt b/libc/test/src/stdio/CMakeLists.txt index 515619e2aa822..f27d7bba1c562 100644 --- a/libc/test/src/stdio/CMakeLists.txt +++ b/libc/test/src/stdio/CMakeLists.txt @@ -21,6 +21,54 @@ add_libc_unittest( libc.src.stdio.fwrite ) +add_libc_unittest( + ungetc_test + SUITE + libc_stdio_unittests + SRCS + ungetc_test.cpp + DEPENDS + libc.include.stdio + libc.src.stdio.fclose + libc.src.stdio.fopen + libc.src.stdio.fread + libc.src.stdio.fseek + libc.src.stdio.fwrite + libc.src.stdio.ungetc +) + +add_libc_unittest( + setbuf_test + SUITE + libc_stdio_unittests + SRCS + setbuf_test.cpp + DEPENDS + libc.include.stdio + libc.src.stdio.fclose + libc.src.stdio.fopen + libc.src.stdio.fread + libc.src.stdio.fwrite + libc.src.stdio.setbuf + libc.src.stdio.ungetc +) + +add_libc_unittest( + setvbuf_test + SUITE + libc_stdio_unittests + SRCS + setvbuf_test.cpp + DEPENDS + libc.include.errno + libc.include.stdio + libc.src.stdio.fclose + libc.src.stdio.fopen + libc.src.stdio.fread + libc.src.stdio.fwrite + libc.src.stdio.setvbuf +) + add_libc_unittest( unlocked_fileop_test SUITE @@ -168,6 +216,29 @@ add_libc_unittest( libc.src.stdio.fgetc libc.src.stdio.fopen libc.src.stdio.fwrite + libc.src.stdio.getc +) + +add_libc_unittest( + fgetc_unlocked_test + SUITE + libc_stdio_unittests + SRCS + fgetc_unlocked_test.cpp + DEPENDS + libc.include.errno + libc.include.stdio + libc.src.stdio.fclose + libc.src.stdio.ferror + libc.src.stdio.ferror_unlocked + libc.src.stdio.feof + libc.src.stdio.feof_unlocked + libc.src.stdio.fgetc_unlocked + libc.src.stdio.flockfile + libc.src.stdio.fopen + libc.src.stdio.funlockfile + libc.src.stdio.fwrite + libc.src.stdio.getc_unlocked ) add_libc_unittest( @@ -187,6 +258,25 @@ add_libc_unittest( libc.src.stdio.fwrite ) +add_libc_unittest( + ftell_test + SUITE + libc_stdio_unittests + SRCS + ftell_test.cpp + DEPENDS + libc.include.errno + libc.include.stdio + libc.src.stdio.fclose + libc.src.stdio.fflush + libc.src.stdio.fopen + libc.src.stdio.fread + libc.src.stdio.fseek + libc.src.stdio.ftell + libc.src.stdio.fwrite + libc.src.stdio.setvbuf +) + add_subdirectory(printf_core) add_subdirectory(scanf_core) add_subdirectory(testdata) diff --git a/libc/test/src/stdio/fgetc_test.cpp b/libc/test/src/stdio/fgetc_test.cpp index 50d1780840de4..5be141ecd202e 100644 --- a/libc/test/src/stdio/fgetc_test.cpp +++ b/libc/test/src/stdio/fgetc_test.cpp @@ -13,38 +13,50 @@ #include "src/stdio/fgetc.h" #include "src/stdio/fopen.h" #include "src/stdio/fwrite.h" +#include "src/stdio/getc.h" #include "utils/UnitTest/Test.h" #include #include -TEST(LlvmLibcFGetCTest, WriteAndReadCharacters) { - constexpr char FILENAME[] = "testdata/fgetc.test"; - ::FILE *file = __llvm_libc::fopen(FILENAME, "w"); - ASSERT_FALSE(file == nullptr); - constexpr char CONTENT[] = "123456789"; - constexpr size_t WRITE_SIZE = sizeof(CONTENT) - 1; - ASSERT_EQ(WRITE_SIZE, __llvm_libc::fwrite(CONTENT, 1, WRITE_SIZE, file)); - // This is a write-only file so reads should fail. - ASSERT_EQ(__llvm_libc::fgetc(file), EOF); - // This is an error and not a real EOF. - ASSERT_EQ(__llvm_libc::feof(file), 0); - ASSERT_NE(__llvm_libc::ferror(file), 0); - errno = 0; - - ASSERT_EQ(0, __llvm_libc::fclose(file)); - - file = __llvm_libc::fopen(FILENAME, "r"); - ASSERT_FALSE(file == nullptr); - - for (size_t i = 0; i < WRITE_SIZE; ++i) { - int c = __llvm_libc::fgetc(file); - ASSERT_EQ(c, int('1' + i)); +class LlvmLibcGetcTest : public __llvm_libc::testing::Test { +public: + using GetcFunc = int(FILE *); + void test_with_func(GetcFunc *func, const char *filename) { + ::FILE *file = __llvm_libc::fopen(filename, "w"); + ASSERT_FALSE(file == nullptr); + constexpr char CONTENT[] = "123456789"; + constexpr size_t WRITE_SIZE = sizeof(CONTENT) - 1; + ASSERT_EQ(WRITE_SIZE, __llvm_libc::fwrite(CONTENT, 1, WRITE_SIZE, file)); + // This is a write-only file so reads should fail. + ASSERT_EQ(func(file), EOF); + // This is an error and not a real EOF. + ASSERT_EQ(__llvm_libc::feof(file), 0); + ASSERT_NE(__llvm_libc::ferror(file), 0); + errno = 0; + + ASSERT_EQ(0, __llvm_libc::fclose(file)); + + file = __llvm_libc::fopen(filename, "r"); + ASSERT_FALSE(file == nullptr); + + for (size_t i = 0; i < WRITE_SIZE; ++i) { + int c = func(file); + ASSERT_EQ(c, int('1' + i)); + } + // Reading more should return EOF but not set error. + ASSERT_EQ(func(file), EOF); + ASSERT_NE(__llvm_libc::feof(file), 0); + ASSERT_EQ(__llvm_libc::ferror(file), 0); + + ASSERT_EQ(0, __llvm_libc::fclose(file)); } - // Reading more should return EOF but not set error. - ASSERT_EQ(__llvm_libc::fgetc(file), EOF); - ASSERT_NE(__llvm_libc::feof(file), 0); - ASSERT_EQ(__llvm_libc::ferror(file), 0); +}; + +TEST_F(LlvmLibcGetcTest, WriteAndReadCharactersWithFgetc) { + test_with_func(&__llvm_libc::fgetc, "testdata/fgetc.test"); +} - ASSERT_EQ(0, __llvm_libc::fclose(file)); +TEST_F(LlvmLibcGetcTest, WriteAndReadCharactersWithGetc) { + test_with_func(&__llvm_libc::getc, "testdata/getc.test"); } diff --git a/libc/test/src/stdio/fgetc_unlocked_test.cpp b/libc/test/src/stdio/fgetc_unlocked_test.cpp new file mode 100644 index 0000000000000..2687b2231afd5 --- /dev/null +++ b/libc/test/src/stdio/fgetc_unlocked_test.cpp @@ -0,0 +1,67 @@ +//===-- Unittests for fgetc -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/fclose.h" +#include "src/stdio/feof.h" +#include "src/stdio/feof_unlocked.h" +#include "src/stdio/ferror.h" +#include "src/stdio/ferror_unlocked.h" +#include "src/stdio/fgetc_unlocked.h" +#include "src/stdio/flockfile.h" +#include "src/stdio/fopen.h" +#include "src/stdio/funlockfile.h" +#include "src/stdio/fwrite.h" +#include "src/stdio/getc_unlocked.h" +#include "utils/UnitTest/Test.h" + +#include +#include + +class LlvmLibcGetcTest : public __llvm_libc::testing::Test { +public: + using GetcFunc = int(FILE *); + void test_with_func(GetcFunc *func, const char *filename) { + ::FILE *file = __llvm_libc::fopen(filename, "w"); + ASSERT_FALSE(file == nullptr); + constexpr char CONTENT[] = "123456789"; + constexpr size_t WRITE_SIZE = sizeof(CONTENT) - 1; + ASSERT_EQ(WRITE_SIZE, __llvm_libc::fwrite(CONTENT, 1, WRITE_SIZE, file)); + // This is a write-only file so reads should fail. + ASSERT_EQ(func(file), EOF); + // This is an error and not a real EOF. + ASSERT_EQ(__llvm_libc::feof(file), 0); + ASSERT_NE(__llvm_libc::ferror(file), 0); + errno = 0; + + ASSERT_EQ(0, __llvm_libc::fclose(file)); + + file = __llvm_libc::fopen(filename, "r"); + ASSERT_FALSE(file == nullptr); + + __llvm_libc::flockfile(file); + for (size_t i = 0; i < WRITE_SIZE; ++i) { + int c = func(file); + ASSERT_EQ(c, int('1' + i)); + } + // Reading more should return EOF but not set error. + ASSERT_EQ(func(file), EOF); + ASSERT_NE(__llvm_libc::feof_unlocked(file), 0); + ASSERT_EQ(__llvm_libc::ferror_unlocked(file), 0); + + __llvm_libc::funlockfile(file); + ASSERT_EQ(0, __llvm_libc::fclose(file)); + } +}; + +TEST_F(LlvmLibcGetcTest, WriteAndReadCharactersWithFgetcUnlocked) { + test_with_func(&__llvm_libc::fgetc_unlocked, "testdata/fgetc_unlocked.test"); +} + +TEST_F(LlvmLibcGetcTest, WriteAndReadCharactersWithGetcUnlocked) { + test_with_func(&__llvm_libc::getc_unlocked, "testdata/getc_unlocked.test"); +} diff --git a/libc/test/src/stdio/ftell_test.cpp b/libc/test/src/stdio/ftell_test.cpp new file mode 100644 index 0000000000000..a788c759300ea --- /dev/null +++ b/libc/test/src/stdio/ftell_test.cpp @@ -0,0 +1,63 @@ +//===-- Unittests for ftell -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/fclose.h" +#include "src/stdio/fflush.h" +#include "src/stdio/fopen.h" +#include "src/stdio/fread.h" +#include "src/stdio/fseek.h" +#include "src/stdio/ftell.h" +#include "src/stdio/fwrite.h" +#include "src/stdio/setvbuf.h" +#include "utils/UnitTest/Test.h" + +#include + +class LlvmLibcFTellTest : public __llvm_libc::testing::Test { +protected: + void test_with_bufmode(int bufmode) { + constexpr char FILENAME[] = "testdata/ftell.test"; + // We will set a special buffer to the file so that we guarantee buffering. + constexpr size_t BUFFER_SIZE = 1024; + char buffer[BUFFER_SIZE]; + ::FILE *file = __llvm_libc::fopen(FILENAME, "w+"); + ASSERT_FALSE(file == nullptr); + ASSERT_EQ(__llvm_libc::setvbuf(file, buffer, bufmode, BUFFER_SIZE), 0); + + // Include few '\n' chars to test when |bufmode| is _IOLBF. + constexpr char CONTENT[] = "12\n345\n6789"; + constexpr size_t WRITE_SIZE = sizeof(CONTENT) - 1; + ASSERT_EQ(WRITE_SIZE, __llvm_libc::fwrite(CONTENT, 1, WRITE_SIZE, file)); + // The above write should have buffered the written data and not have + // trasferred it to the underlying stream. But, ftell operation should + // still return the correct effective offset. + ASSERT_EQ(size_t(__llvm_libc::ftell(file)), WRITE_SIZE); + + long offset = 5; + ASSERT_EQ(0, __llvm_libc::fseek(file, offset, SEEK_SET)); + ASSERT_EQ(__llvm_libc::ftell(file), offset); + ASSERT_EQ(0, __llvm_libc::fseek(file, -offset, SEEK_END)); + ASSERT_EQ(size_t(__llvm_libc::ftell(file)), size_t(WRITE_SIZE - offset)); + + ASSERT_EQ(0, __llvm_libc::fseek(file, 0, SEEK_SET)); + constexpr size_t READ_SIZE = WRITE_SIZE / 2; + char data[READ_SIZE]; + // Reading a small amount will actually read out much more data and + // buffer it. But, ftell should return the correct effective offset. + ASSERT_EQ(READ_SIZE, __llvm_libc::fread(data, 1, READ_SIZE, file)); + ASSERT_EQ(size_t(__llvm_libc::ftell(file)), READ_SIZE); + + ASSERT_EQ(0, __llvm_libc::fclose(file)); + } +}; + +TEST_F(LlvmLibcFTellTest, TellWithFBF) { test_with_bufmode(_IOFBF); } + +TEST_F(LlvmLibcFTellTest, TellWithNBF) { test_with_bufmode(_IONBF); } + +TEST_F(LlvmLibcFTellTest, TellWithLBF) { test_with_bufmode(_IOLBF); } diff --git a/libc/test/src/stdio/printf_core/parser_test.cpp b/libc/test/src/stdio/printf_core/parser_test.cpp index 0684ebc8d444d..3ae8bf47c8909 100644 --- a/libc/test/src/stdio/printf_core/parser_test.cpp +++ b/libc/test/src/stdio/printf_core/parser_test.cpp @@ -102,6 +102,19 @@ TEST(LlvmLibcPrintfParserTest, EvalOneArg) { ASSERT_PFORMAT_EQ(expected, format_arr[0]); } +TEST(LlvmLibcPrintfParserTest, EvalBadArg) { + __llvm_libc::printf_core::FormatSection format_arr[10]; + const char *str = "%\0abc"; + int arg1 = 12345; + evaluate(format_arr, str, arg1); + + __llvm_libc::printf_core::FormatSection expected; + expected.has_conv = false; + expected.raw_string = {str, 1}; + + ASSERT_PFORMAT_EQ(expected, format_arr[0]); +} + TEST(LlvmLibcPrintfParserTest, EvalOneArgWithFlags) { __llvm_libc::printf_core::FormatSection format_arr[10]; const char *str = "%+-0 #d"; diff --git a/libc/test/src/stdio/scanf_core/CMakeLists.txt b/libc/test/src/stdio/scanf_core/CMakeLists.txt index 3235a0e53e010..db20335a5c943 100644 --- a/libc/test/src/stdio/scanf_core/CMakeLists.txt +++ b/libc/test/src/stdio/scanf_core/CMakeLists.txt @@ -12,3 +12,34 @@ add_libc_unittest( libc.src.__support.CPP.string_view libc.src.__support.arg_list ) + +if(NOT (TARGET libc.src.__support.File.file)) + # Not all platforms have a file implementation. If file is unvailable, + # then we must skip all the parts that need file. + return() +endif() + +add_libc_unittest( + string_reader_test + SUITE + libc_stdio_unittests + SRCS + string_reader_test.cpp + DEPENDS + libc.src.stdio.scanf_core.reader + libc.src.stdio.scanf_core.string_reader + libc.src.__support.CPP.string_view +) + +add_libc_unittest( + converter_test + SUITE + libc_stdio_unittests + SRCS + converter_test.cpp + DEPENDS + libc.src.stdio.scanf_core.reader + libc.src.stdio.scanf_core.string_reader + libc.src.stdio.scanf_core.converter + libc.src.__support.CPP.string_view +) diff --git a/libc/test/src/stdio/scanf_core/converter_test.cpp b/libc/test/src/stdio/scanf_core/converter_test.cpp new file mode 100644 index 0000000000000..d90af34ff1979 --- /dev/null +++ b/libc/test/src/stdio/scanf_core/converter_test.cpp @@ -0,0 +1,295 @@ +//===-- Unittests for the basic scanf converters --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/CPP/string_view.h" +#include "src/stdio/scanf_core/converter.h" +#include "src/stdio/scanf_core/core_structs.h" +#include "src/stdio/scanf_core/reader.h" +#include "src/stdio/scanf_core/string_reader.h" + +#include "utils/UnitTest/Test.h" + +TEST(LlvmLibcScanfConverterTest, RawMatchBasic) { + const char *str = "abcdef"; + __llvm_libc::scanf_core::StringReader str_reader(str); + __llvm_libc::scanf_core::Reader reader(&str_reader); + + // Reading "abc" should succeed. + ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "abc"), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(3)); + + // Reading nothing should succeed and not advance. + ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, ""), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(3)); + + // Reading a space where there is none should succeed and not advance. + ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, " "), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(3)); + + // Reading "d" should succeed and advance by 1. + ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "d"), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(4)); + + // Reading "z" should fail and not advance. + ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "z"), + static_cast(__llvm_libc::scanf_core::MATCHING_FAILURE)); + ASSERT_EQ(reader.chars_read(), size_t(4)); + + // Reading "efgh" should fail but advance to the end. + ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "efgh"), + static_cast(__llvm_libc::scanf_core::MATCHING_FAILURE)); + ASSERT_EQ(reader.chars_read(), size_t(6)); +} + +TEST(LlvmLibcScanfConverterTest, RawMatchSpaces) { + const char *str = " a \t\n b cd"; + __llvm_libc::scanf_core::StringReader str_reader(str); + __llvm_libc::scanf_core::Reader reader(&str_reader); + + // Reading "a" should fail and not advance. + // Since there's nothing in the format string (the second argument to + // raw_match) to match the space in the buffer it isn't consumed. + ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "a"), + static_cast(__llvm_libc::scanf_core::MATCHING_FAILURE)); + ASSERT_EQ(reader.chars_read(), size_t(0)); + + // Reading " \t\n " should succeed and advance past the space. + // Any number of space characters in the format string match 0 or more space + // characters in the buffer. + ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, " \t\n "), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(1)); + + // Reading "ab" should fail and only advance past the a + // The a characters match, but the format string doesn't have anything to + // consume the spaces in the buffer, so it fails. + ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "ab"), + static_cast(__llvm_libc::scanf_core::MATCHING_FAILURE)); + ASSERT_EQ(reader.chars_read(), size_t(2)); + + // Reading " b" should succeed and advance past the b + // Any number of space characters in the format string matches 0 or more space + // characters in the buffer. + ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, " b"), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(7)); + + // Reading "\t" should succeed and advance past the spaces to the c + ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "\t"), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(10)); + + // Reading "c d" should succeed and advance past the d. + // Here the space character in the format string is matching 0 space + // characters in the buffer. + ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "c d"), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(12)); +} + +TEST(LlvmLibcScanfConverterTest, StringConvSimple) { + const char *str = "abcDEF123 654LKJihg"; + char result[20]; + __llvm_libc::scanf_core::StringReader str_reader(str); + __llvm_libc::scanf_core::Reader reader(&str_reader); + + __llvm_libc::scanf_core::FormatSection conv; + conv.has_conv = true; + conv.conv_name = 's'; + conv.output_ptr = result; + + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(9)); + ASSERT_STREQ(result, "abcDEF123"); + + //%s skips all spaces before beginning to read. + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(19)); + ASSERT_STREQ(result, "654LKJihg"); +} + +TEST(LlvmLibcScanfConverterTest, StringConvNoWrite) { + const char *str = "abcDEF123 654LKJihg"; + __llvm_libc::scanf_core::StringReader str_reader(str); + __llvm_libc::scanf_core::Reader reader(&str_reader); + + __llvm_libc::scanf_core::FormatSection conv; + conv.has_conv = true; + conv.conv_name = 's'; + conv.flags = __llvm_libc::scanf_core::NO_WRITE; + + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(9)); + + //%s skips all spaces before beginning to read. + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(19)); +} + +TEST(LlvmLibcScanfConverterTest, StringConvWidth) { + const char *str = "abcDEF123 654LKJihg"; + char result[6]; + __llvm_libc::scanf_core::StringReader str_reader(str); + __llvm_libc::scanf_core::Reader reader(&str_reader); + + __llvm_libc::scanf_core::FormatSection conv; + conv.has_conv = true; + conv.conv_name = 's'; + conv.max_width = 5; // this means the result takes up 6 characters (with \0). + conv.output_ptr = result; + + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(5)); + ASSERT_STREQ(result, "abcDE"); + + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(9)); + ASSERT_STREQ(result, "F123"); + + //%s skips all spaces before beginning to read. + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(15)); + ASSERT_STREQ(result, "654LK"); + + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(19)); + ASSERT_STREQ(result, "Jihg"); +} + +TEST(LlvmLibcScanfConverterTest, CharsConv) { + const char *str = "abcDEF123 654LKJihg MNOpqr&*("; + char result[20]; + __llvm_libc::scanf_core::StringReader str_reader(str); + __llvm_libc::scanf_core::Reader reader(&str_reader); + + __llvm_libc::scanf_core::FormatSection conv; + conv.has_conv = true; + conv.conv_name = 'c'; + conv.output_ptr = result; + + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(1)); + ASSERT_EQ(result[0], 'a'); + + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(2)); + ASSERT_EQ(result[0], 'b'); + + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(3)); + ASSERT_EQ(result[0], 'c'); + + // Switch from character by character to 8 at a time. + conv.max_width = 8; + __llvm_libc::cpp::string_view result_view(result, 8); + + //%c doesn't stop on spaces. + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(11)); + ASSERT_EQ(result_view, __llvm_libc::cpp::string_view("DEF123 6", 8)); + + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(19)); + ASSERT_EQ(result_view, __llvm_libc::cpp::string_view("54LKJihg", 8)); + + //%c also doesn't skip spaces at the start. + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(27)); + ASSERT_EQ(result_view, __llvm_libc::cpp::string_view(" MNOpqr&", 8)); + + //%c will stop on a null byte though. + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(29)); + ASSERT_EQ(__llvm_libc::cpp::string_view(result, 2), + __llvm_libc::cpp::string_view("*(", 2)); +} + +TEST(LlvmLibcScanfConverterTest, ScansetConv) { + const char *str = "abcDEF[123] 654LKJihg"; + char result[20]; + __llvm_libc::scanf_core::StringReader str_reader(str); + __llvm_libc::scanf_core::Reader reader(&str_reader); + + __llvm_libc::scanf_core::FormatSection conv; + conv.has_conv = true; + conv.conv_name = '['; + conv.output_ptr = result; + + __llvm_libc::cpp::bitset<256> bitset1; + bitset1.set_range('a', 'c'); + bitset1.set_range('D', 'F'); + bitset1.set_range('1', '6'); + bitset1.set('['); + bitset1.set(']'); + + conv.scan_set = bitset1; + + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(11)); + ASSERT_EQ(__llvm_libc::cpp::string_view(result, 11), + __llvm_libc::cpp::string_view("abcDEF[123]", 11)); + + // The scanset conversion doesn't consume leading spaces. If it did it would + // return "654" here. + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::MATCHING_FAILURE)); + ASSERT_EQ(reader.chars_read(), size_t(11)); + + // This set is everything except for a-g. + __llvm_libc::cpp::bitset<256> bitset2; + bitset2.set_range('a', 'g'); + bitset2.flip(); + conv.scan_set = bitset2; + + conv.max_width = 5; + + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(16)); + ASSERT_EQ(__llvm_libc::cpp::string_view(result, 5), + __llvm_libc::cpp::string_view(" 654L", 5)); + + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(20)); + ASSERT_EQ(__llvm_libc::cpp::string_view(result, 4), + __llvm_libc::cpp::string_view("KJih", 4)); + + // This set is g and '\0'. + __llvm_libc::cpp::bitset<256> bitset3; + bitset3.set('g'); + bitset3.set('\0'); + conv.scan_set = bitset3; + + // Even though '\0' is in the scanset, it should still stop on it. + ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv), + static_cast(__llvm_libc::scanf_core::READ_OK)); + ASSERT_EQ(reader.chars_read(), size_t(21)); + ASSERT_EQ(__llvm_libc::cpp::string_view(result, 1), + __llvm_libc::cpp::string_view("g", 1)); +} diff --git a/libc/test/src/stdio/scanf_core/parser_test.cpp b/libc/test/src/stdio/scanf_core/parser_test.cpp index e2ed4b026e5d0..3d2c0817e880f 100644 --- a/libc/test/src/stdio/scanf_core/parser_test.cpp +++ b/libc/test/src/stdio/scanf_core/parser_test.cpp @@ -103,6 +103,19 @@ TEST(LlvmLibcScanfParserTest, EvalOneArg) { ASSERT_SFORMAT_EQ(expected, format_arr[0]); } +TEST(LlvmLibcScanfParserTest, EvalBadArg) { + __llvm_libc::scanf_core::FormatSection format_arr[10]; + const char *str = "%\0abc"; + int arg1 = 12345; + evaluate(format_arr, str, &arg1); + + __llvm_libc::scanf_core::FormatSection expected; + expected.has_conv = false; + expected.raw_string = {str, 1}; + + ASSERT_SFORMAT_EQ(expected, format_arr[0]); +} + TEST(LlvmLibcScanfParserTest, EvalOneArgWithFlag) { __llvm_libc::scanf_core::FormatSection format_arr[10]; const char *str = "%*d"; diff --git a/libc/test/src/stdio/scanf_core/string_reader_test.cpp b/libc/test/src/stdio/scanf_core/string_reader_test.cpp new file mode 100644 index 0000000000000..4331d488be06a --- /dev/null +++ b/libc/test/src/stdio/scanf_core/string_reader_test.cpp @@ -0,0 +1,66 @@ +//===-- Unittests for the scanf String Reader -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/CPP/string_view.h" +#include "src/stdio/scanf_core/reader.h" +#include "src/stdio/scanf_core/string_reader.h" + +#include "utils/UnitTest/Test.h" + +TEST(LlvmLibcScanfStringReaderTest, Constructor) { + char str[10]; + __llvm_libc::scanf_core::StringReader str_reader(str); + __llvm_libc::scanf_core::Reader reader(&str_reader); +} + +TEST(LlvmLibcScanfStringReaderTest, SimpleRead) { + const char *str = "abc"; + __llvm_libc::scanf_core::StringReader str_reader(str); + __llvm_libc::scanf_core::Reader reader(&str_reader); + + for (size_t i = 0; i < sizeof("abc"); ++i) { + ASSERT_EQ(str[i], reader.getc()); + } +} + +TEST(LlvmLibcScanfStringReaderTest, ReadAndReverse) { + const char *str = "abcDEF123"; + __llvm_libc::scanf_core::StringReader str_reader(str); + __llvm_libc::scanf_core::Reader reader(&str_reader); + + for (size_t i = 0; i < 5; ++i) { + ASSERT_EQ(str[i], reader.getc()); + } + + // Move back by 3, cursor should now be on 2 + reader.ungetc(str[4]); + reader.ungetc(str[3]); + reader.ungetc(str[2]); + + for (size_t i = 2; i < 7; ++i) { + ASSERT_EQ(str[i], reader.getc()); + } + + // Move back by 2, cursor should now be on 5 + reader.ungetc(str[6]); + reader.ungetc(str[5]); + + for (size_t i = 5; i < 10; ++i) { + ASSERT_EQ(str[i], reader.getc()); + } + + // Move back by 10, which should be back to the start. + for (size_t i = 0; i < 10; ++i) { + reader.ungetc(str[9 - i]); + } + + // Check the whole string. + for (size_t i = 0; i < sizeof("abcDEF123"); ++i) { + ASSERT_EQ(str[i], reader.getc()); + } +} diff --git a/libc/test/src/stdio/setbuf_test.cpp b/libc/test/src/stdio/setbuf_test.cpp new file mode 100644 index 0000000000000..0a53e221cf425 --- /dev/null +++ b/libc/test/src/stdio/setbuf_test.cpp @@ -0,0 +1,68 @@ +//===-- Unittests for setbuf ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/fclose.h" +#include "src/stdio/fopen.h" +#include "src/stdio/fread.h" +#include "src/stdio/fwrite.h" +#include "src/stdio/setbuf.h" +#include "src/stdio/ungetc.h" +#include "utils/UnitTest/Test.h" + +#include + +TEST(LlvmLibcSetbufTest, DefaultBufsize) { + // The idea in this test is to change the buffer after opening a file and + // ensure that read and write work as expected. + constexpr char FILENAME[] = "testdata/setbuf_test_default_bufsize.test"; + ::FILE *file = __llvm_libc::fopen(FILENAME, "w"); + ASSERT_FALSE(file == nullptr); + char buffer[BUFSIZ]; + __llvm_libc::setbuf(file, buffer); + constexpr char CONTENT[] = "abcdef"; + constexpr size_t CONTENT_SIZE = sizeof(CONTENT); + ASSERT_EQ(CONTENT_SIZE, __llvm_libc::fwrite(CONTENT, 1, CONTENT_SIZE, file)); + ASSERT_EQ(0, __llvm_libc::fclose(file)); + + file = __llvm_libc::fopen(FILENAME, "r"); + __llvm_libc::setbuf(file, buffer); + ASSERT_FALSE(file == nullptr); + char data[CONTENT_SIZE]; + ASSERT_EQ(__llvm_libc::fread(&data, 1, CONTENT_SIZE, file), CONTENT_SIZE); + ASSERT_STREQ(CONTENT, data); + ASSERT_EQ(0, __llvm_libc::fclose(file)); +} + +TEST(LlvmLibcSetbufTest, NullBuffer) { + // The idea in this test is that we set a null buffer and ensure that + // everything works correctly. + constexpr char FILENAME[] = "testdata/setbuf_test_null_buffer.test"; + ::FILE *file = __llvm_libc::fopen(FILENAME, "w"); + ASSERT_FALSE(file == nullptr); + __llvm_libc::setbuf(file, nullptr); + constexpr char CONTENT[] = "abcdef"; + constexpr size_t CONTENT_SIZE = sizeof(CONTENT); + ASSERT_EQ(CONTENT_SIZE, __llvm_libc::fwrite(CONTENT, 1, CONTENT_SIZE, file)); + ASSERT_EQ(0, __llvm_libc::fclose(file)); + + file = __llvm_libc::fopen(FILENAME, "r"); + __llvm_libc::setbuf(file, nullptr); + ASSERT_FALSE(file == nullptr); + char data[CONTENT_SIZE]; + ASSERT_EQ(__llvm_libc::fread(&data, 1, CONTENT_SIZE, file), CONTENT_SIZE); + ASSERT_STREQ(CONTENT, data); + + // Ensure that ungetc also works. + char unget_char = 'z'; + ASSERT_EQ(int(unget_char), __llvm_libc::ungetc(unget_char, file)); + char c; + ASSERT_EQ(__llvm_libc::fread(&c, 1, 1, file), size_t(1)); + ASSERT_EQ(c, unget_char); + + ASSERT_EQ(0, __llvm_libc::fclose(file)); +} diff --git a/libc/test/src/stdio/setvbuf_test.cpp b/libc/test/src/stdio/setvbuf_test.cpp new file mode 100644 index 0000000000000..3cdcc044c38e1 --- /dev/null +++ b/libc/test/src/stdio/setvbuf_test.cpp @@ -0,0 +1,106 @@ +//===-- Unittests for setvbuf ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/fclose.h" +#include "src/stdio/fopen.h" +#include "src/stdio/fread.h" +#include "src/stdio/fwrite.h" +#include "src/stdio/setvbuf.h" +#include "utils/UnitTest/Test.h" + +#include +#include + +TEST(LlvmLibcSetvbufTest, SetNBFBuffer) { + // The idea in this test is that we open a file for writing and reading, and + // then set a NBF buffer to the write handle. Since it is NBF, the data + // written using the write handle should be immediately readable by the read + // handle. + constexpr char FILENAME[] = "testdata/setvbuf_nbf.test"; + + ::FILE *fw = __llvm_libc::fopen(FILENAME, "w"); + ASSERT_FALSE(fw == nullptr); + char buffer[BUFSIZ]; + ASSERT_EQ(__llvm_libc::setvbuf(fw, buffer, _IONBF, BUFSIZ), 0); + + ::FILE *fr = __llvm_libc::fopen(FILENAME, "r"); + ASSERT_FALSE(fr == nullptr); + + constexpr char CONTENT[] = "abcdef"; + constexpr size_t CONTENT_SIZE = sizeof(CONTENT); + for (size_t i = 0; i < CONTENT_SIZE; ++i) { + ASSERT_EQ(size_t(1), __llvm_libc::fwrite(CONTENT + i, 1, 1, fw)); + char c; + ASSERT_EQ(size_t(1), __llvm_libc::fread(&c, 1, 1, fr)); + ASSERT_EQ(c, CONTENT[i]); + } + + ASSERT_EQ(0, __llvm_libc::fclose(fw)); + ASSERT_EQ(0, __llvm_libc::fclose(fr)); + + // Make sure NBF buffer has no effect for reading. + fr = __llvm_libc::fopen(FILENAME, "r"); + char data[CONTENT_SIZE]; + ASSERT_EQ(__llvm_libc::setvbuf(fr, buffer, _IONBF, BUFSIZ), 0); + ASSERT_EQ(CONTENT_SIZE, __llvm_libc::fread(data, 1, CONTENT_SIZE, fr)); + ASSERT_STREQ(CONTENT, data); + ASSERT_EQ(0, __llvm_libc::fclose(fr)); +} + +TEST(LlvmLibcSetvbufTest, SetLBFBuffer) { + // The idea in this test is that we open a file for writing and reading, and + // then set a LBF buffer to the write handle. Since it is LBF, the data + // written using the write handle should be available right after a '\n' is + // written. + constexpr char FILENAME[] = "testdata/setvbuf_lbf.test"; + + ::FILE *fw = __llvm_libc::fopen(FILENAME, "w"); + ASSERT_FALSE(fw == nullptr); + char buffer[BUFSIZ]; + ASSERT_EQ(__llvm_libc::setvbuf(fw, buffer, _IOLBF, BUFSIZ), 0); + + ::FILE *fr = __llvm_libc::fopen(FILENAME, "r"); + ASSERT_FALSE(fr == nullptr); + + constexpr char CONTENT[] = "abcdef\n"; + constexpr size_t CONTENT_SIZE = sizeof(CONTENT); + ASSERT_EQ(CONTENT_SIZE, __llvm_libc::fwrite(CONTENT, 1, CONTENT_SIZE, fw)); + + // Note that CONTENT_SIZE worth of data written also includes the + // null-terminator '\0'. But, since it is after the new line character, + // it should not be availabe for reading. + char data[CONTENT_SIZE]; + ASSERT_EQ(CONTENT_SIZE - 1, __llvm_libc::fread(data, 1, CONTENT_SIZE, fr)); + char c; + ASSERT_EQ(size_t(0), __llvm_libc::fread(&c, 1, 1, fr)); + + data[CONTENT_SIZE - 1] = '\0'; + ASSERT_STREQ(CONTENT, data); + + ASSERT_EQ(0, __llvm_libc::fclose(fw)); + ASSERT_EQ(0, __llvm_libc::fclose(fr)); + + // Make sure LBF buffer has no effect for reading. + fr = __llvm_libc::fopen(FILENAME, "r"); + ASSERT_EQ(__llvm_libc::setvbuf(fr, buffer, _IOLBF, BUFSIZ), 0); + ASSERT_EQ(CONTENT_SIZE, __llvm_libc::fread(data, 1, CONTENT_SIZE, fr)); + ASSERT_STREQ(CONTENT, data); + ASSERT_EQ(0, __llvm_libc::fclose(fr)); +} + +TEST(LlvmLibcSetbufTest, InvalidBufferMode) { + constexpr char FILENAME[] = "testdata/setvbuf_invalid_bufmode.test"; + ::FILE *f = __llvm_libc::fopen(FILENAME, "w"); + ASSERT_FALSE(f == nullptr); + char buf[BUFSIZ]; + ASSERT_NE(__llvm_libc::setvbuf(f, buf, _IOFBF + _IOLBF + _IONBF, BUFSIZ), 0); + ASSERT_EQ(errno, EINVAL); + + errno = 0; + ASSERT_EQ(0, __llvm_libc::fclose(f)); +} diff --git a/libc/test/src/stdio/ungetc_test.cpp b/libc/test/src/stdio/ungetc_test.cpp new file mode 100644 index 0000000000000..0102be7b2e0fb --- /dev/null +++ b/libc/test/src/stdio/ungetc_test.cpp @@ -0,0 +1,59 @@ +//===-- Unittests for ungetc ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/fclose.h" +#include "src/stdio/fopen.h" +#include "src/stdio/fread.h" +#include "src/stdio/fseek.h" +#include "src/stdio/fwrite.h" +#include "src/stdio/ungetc.h" +#include "utils/UnitTest/Test.h" + +#include + +TEST(LlvmLibcUngetcTest, UngetAndReadBack) { + constexpr char FILENAME[] = "testdata/ungetc_test.test"; + ::FILE *file = __llvm_libc::fopen(FILENAME, "w"); + ASSERT_FALSE(file == nullptr); + constexpr char CONTENT[] = "abcdef"; + constexpr size_t CONTENT_SIZE = sizeof(CONTENT); + ASSERT_EQ(CONTENT_SIZE, __llvm_libc::fwrite(CONTENT, 1, CONTENT_SIZE, file)); + // Cannot unget to an un-readable file. + ASSERT_EQ(EOF, __llvm_libc::ungetc('1', file)); + ASSERT_EQ(0, __llvm_libc::fclose(file)); + + file = __llvm_libc::fopen(FILENAME, "r+"); + ASSERT_FALSE(file == nullptr); + char c; + ASSERT_EQ(__llvm_libc::fread(&c, 1, 1, file), size_t(1)); + ASSERT_EQ(c, CONTENT[0]); + ASSERT_EQ(__llvm_libc::ungetc(int(c), file), int(c)); + + char data[CONTENT_SIZE]; + ASSERT_EQ(CONTENT_SIZE, __llvm_libc::fread(data, 1, CONTENT_SIZE, file)); + ASSERT_STREQ(CONTENT, data); + + ASSERT_EQ(0, __llvm_libc::fseek(file, 0, SEEK_SET)); + // ungetc should not fail after a seek operation. + int unget_char = 'z'; + ASSERT_EQ(unget_char, __llvm_libc::ungetc(unget_char, file)); + // Another unget should fail. + ASSERT_EQ(EOF, __llvm_libc::ungetc(unget_char, file)); + // ungetting a char at the beginning of the file will allow us to fetch + // one additional character. + char new_data[CONTENT_SIZE + 1]; + ASSERT_EQ(CONTENT_SIZE + 1, + __llvm_libc::fread(new_data, 1, CONTENT_SIZE + 1, file)); + ASSERT_STREQ("zabcdef", new_data); + + ASSERT_EQ(size_t(1), __llvm_libc::fwrite("x", 1, 1, file)); + // unget should fail after a write operation. + ASSERT_EQ(EOF, __llvm_libc::ungetc('1', file)); + + ASSERT_EQ(0, __llvm_libc::fclose(file)); +} diff --git a/libc/utils/HdrGen/PrototypeTestGen/PrototypeTestGen.cpp b/libc/utils/HdrGen/PrototypeTestGen/PrototypeTestGen.cpp index cd1e61b2f35a1..06f621052f154 100644 --- a/libc/utils/HdrGen/PrototypeTestGen/PrototypeTestGen.cpp +++ b/libc/utils/HdrGen/PrototypeTestGen/PrototypeTestGen.cpp @@ -22,7 +22,7 @@ llvm::cl::list } // anonymous namespace bool TestGeneratorMain(llvm::raw_ostream &OS, llvm::RecordKeeper &records) { - OS << "#include \"type_traits.h\"\n"; + OS << "#include \"src/__support/CPP/type_traits.h\"\n"; llvm_libc::APIIndexer G(records); std::unordered_set headerFileSet; for (const auto &entrypoint : EntrypointNamesOption) { @@ -45,7 +45,7 @@ bool TestGeneratorMain(llvm::raw_ostream &OS, llvm::RecordKeeper &records) { OS << '\n'; - OS << "int main() {\n"; + OS << "extern \"C\" int main() {\n"; for (const auto &entrypoint : EntrypointNamesOption) { auto match = G.FunctionSpecMap.find(entrypoint); if (match == G.FunctionSpecMap.end()) { @@ -94,6 +94,7 @@ bool TestGeneratorMain(llvm::raw_ostream &OS, llvm::RecordKeeper &records) { // We provide dummy malloc and free implementations to support the case // when LLVM libc does to include them. OS << "void *malloc(size_t) { return nullptr; }\n"; + OS << "void *realloc(void *, size_t) { return nullptr; }\n"; OS << "void free(void *) {}\n"; return false; diff --git a/libcxx/cmake/config-ix.cmake b/libcxx/cmake/config-ix.cmake index a5ce4745a5f6a..3bae536436835 100644 --- a/libcxx/cmake/config-ix.cmake +++ b/libcxx/cmake/config-ix.cmake @@ -98,7 +98,7 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror=unknown-pragmas") check_c_source_compiles(" #pragma comment(lib, \"c\") -int main() { return 0; } +int main(void) { return 0; } " C_SUPPORTS_COMMENT_LIB_PRAGMA) cmake_pop_check_state() endif() diff --git a/libcxx/docs/ReleaseNotes.rst b/libcxx/docs/ReleaseNotes.rst index d10fdde8d719f..abcebeb01ebee 100644 --- a/libcxx/docs/ReleaseNotes.rst +++ b/libcxx/docs/ReleaseNotes.rst @@ -46,6 +46,7 @@ Implemented Papers ``from_chars`` for Integral Types in ```` Header - P0220R1 - Adopt Library Fundamentals V1 TS Components for C++17 - P0482R6 - char8_t: A type for UTF-8 characters and strings +- P2438R2 - ``std::string::substr() &&`` Improvements and New Features ----------------------------- diff --git a/libcxx/docs/Status/Cxx2bPapers.csv b/libcxx/docs/Status/Cxx2bPapers.csv index 7017c31a92c41..f40cca9c7ac60 100644 --- a/libcxx/docs/Status/Cxx2bPapers.csv +++ b/libcxx/docs/Status/Cxx2bPapers.csv @@ -71,7 +71,7 @@ "`P2408R5 `__","LWG","Ranges iterators as inputs to non-Ranges algorithms","July 2022","","" "`P2417R2 `__","LWG","A more ``constexpr`` ``bitset``","July 2022","|Complete|","16.0" "`P2419R2 `__","LWG","Clarify handling of encodings in localized formatting of chrono types","July 2022","","" -"`P2438R2 `__","LWG","``std::string::substr() &&``","July 2022","","" +"`P2438R2 `__","LWG","``std::string::substr() &&``","July 2022","|Complete|","16.0" "`P2445R1 `__","LWG","``forward_like``","July 2022","|Complete|","16.0" "`P2446R2 `__","LWG","``views::as_rvalue``","July 2022","","" "`P2460R2 `__","LWG","Relax requirements on ``wchar_t`` to match existing practices","July 2022","","" diff --git a/libcxx/docs/UsingLibcxx.rst b/libcxx/docs/UsingLibcxx.rst index 59a1e4b982f97..e6425d8c7c8b4 100644 --- a/libcxx/docs/UsingLibcxx.rst +++ b/libcxx/docs/UsingLibcxx.rst @@ -393,6 +393,44 @@ which no dialect declares as such (See the second form described above). * ``search`` * ``unique`` * ``upper_bound`` +* ``ranges::adjacent_find`` +* ``ranges::all_of`` +* ``ranges::any_of`` +* ``ranges::binary_search`` +* ``ranges::clamp`` +* ``ranges::count_if`` +* ``ranges::count`` +* ``ranges::equal_range`` +* ``ranges::equal`` +* ``ranges::find_end`` +* ``ranges::find_first_of`` +* ``ranges::find_if_not`` +* ``ranges::find_if`` +* ``ranges::find`` +* ``ranges::get_temporary_buffer`` +* ``ranges::includes`` +* ``ranges::is_heap_until`` +* ``ranges::is_heap`` +* ``ranges::is_partitioned`` +* ``ranges::is_permutation`` +* ``ranges::is_sorted_until`` +* ``ranges::is_sorted`` +* ``ranges::lexicographical_compare`` +* ``ranges::lower_bound`` +* ``ranges::max_element`` +* ``ranges::max`` +* ``ranges::min_element`` +* ``ranges::min`` +* ``ranges::minmax_element`` +* ``ranges::minmax`` +* ``ranges::mismatch`` +* ``ranges::none_of`` +* ``ranges::remove_if`` +* ``ranges::remove`` +* ``ranges::search_n`` +* ``ranges::search`` +* ``ranges::unique`` +* ``ranges::upper_bound`` * ``lock_guard``'s constructors * ``as_const`` * ``bit_cast`` diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index 7603b37c9d5c0..4310cfdc50308 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -396,6 +396,7 @@ set(files __memory/allocation_guard.h __memory/allocator.h __memory/allocator_arg_t.h + __memory/allocator_destructor.h __memory/allocator_traits.h __memory/assume_aligned.h __memory/auto_ptr.h diff --git a/libcxx/include/__algorithm/ranges_adjacent_find.h b/libcxx/include/__algorithm/ranges_adjacent_find.h index 4ed306baf474d..d338d13e6eee9 100644 --- a/libcxx/include/__algorithm/ranges_adjacent_find.h +++ b/libcxx/include/__algorithm/ranges_adjacent_find.h @@ -50,7 +50,7 @@ struct __fn { template _Sent, class _Proj = identity, indirect_binary_predicate, projected<_Iter, _Proj>> _Pred = ranges::equal_to> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr _Iter operator()(_Iter __first, _Sent __last, _Pred __pred = {}, _Proj __proj = {}) const { return __adjacent_find_impl(std::move(__first), std::move(__last), __pred, __proj); } @@ -59,7 +59,7 @@ struct __fn { class _Proj = identity, indirect_binary_predicate, _Proj>, projected, _Proj>> _Pred = ranges::equal_to> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr borrowed_iterator_t<_Range> operator()(_Range&& __range, _Pred __pred = {}, _Proj __proj = {}) const { return __adjacent_find_impl(ranges::begin(__range), ranges::end(__range), __pred, __proj); } diff --git a/libcxx/include/__algorithm/ranges_all_of.h b/libcxx/include/__algorithm/ranges_all_of.h index f73d069b99c2d..e45c4e5843790 100644 --- a/libcxx/include/__algorithm/ranges_all_of.h +++ b/libcxx/include/__algorithm/ranges_all_of.h @@ -42,14 +42,14 @@ struct __fn { template _Sent, class _Proj = identity, indirect_unary_predicate> _Pred> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Iter __first, _Sent __last, _Pred __pred, _Proj __proj = {}) const { return __all_of_impl(std::move(__first), std::move(__last), __pred, __proj); } template , _Proj>> _Pred> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Range&& __range, _Pred __pred, _Proj __proj = {}) const { return __all_of_impl(ranges::begin(__range), ranges::end(__range), __pred, __proj); } diff --git a/libcxx/include/__algorithm/ranges_any_of.h b/libcxx/include/__algorithm/ranges_any_of.h index 53627ed5c2e57..e7d1e723a70f4 100644 --- a/libcxx/include/__algorithm/ranges_any_of.h +++ b/libcxx/include/__algorithm/ranges_any_of.h @@ -42,14 +42,14 @@ struct __fn { template _Sent, class _Proj = identity, indirect_unary_predicate> _Pred> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Iter __first, _Sent __last, _Pred __pred = {}, _Proj __proj = {}) const { return __any_of_impl(std::move(__first), std::move(__last), __pred, __proj); } template , _Proj>> _Pred> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Range&& __range, _Pred __pred, _Proj __proj = {}) const { return __any_of_impl(ranges::begin(__range), ranges::end(__range), __pred, __proj); } diff --git a/libcxx/include/__algorithm/ranges_binary_search.h b/libcxx/include/__algorithm/ranges_binary_search.h index 39a9a02e44dd2..b2a8977652fb9 100644 --- a/libcxx/include/__algorithm/ranges_binary_search.h +++ b/libcxx/include/__algorithm/ranges_binary_search.h @@ -33,7 +33,7 @@ namespace __binary_search { struct __fn { template _Sent, class _Type, class _Proj = identity, indirect_strict_weak_order> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Iter __first, _Sent __last, const _Type& __value, _Comp __comp = {}, _Proj __proj = {}) const { auto __ret = std::__lower_bound_impl<_RangeAlgPolicy>(__first, __last, __value, __comp, __proj); return __ret != __last && !std::invoke(__comp, __value, std::invoke(__proj, *__first)); @@ -41,7 +41,7 @@ struct __fn { template , _Proj>> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Range&& __r, const _Type& __value, _Comp __comp = {}, _Proj __proj = {}) const { auto __first = ranges::begin(__r); auto __last = ranges::end(__r); diff --git a/libcxx/include/__algorithm/ranges_clamp.h b/libcxx/include/__algorithm/ranges_clamp.h index 1bb3a5a2ee674..09a97fc790eac 100644 --- a/libcxx/include/__algorithm/ranges_clamp.h +++ b/libcxx/include/__algorithm/ranges_clamp.h @@ -33,7 +33,7 @@ struct __fn { template > _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr const _Type& operator()(const _Type& __value, const _Type& __low, const _Type& __high, diff --git a/libcxx/include/__algorithm/ranges_count.h b/libcxx/include/__algorithm/ranges_count.h index f790c994f8be2..527dd0620085f 100644 --- a/libcxx/include/__algorithm/ranges_count.h +++ b/libcxx/include/__algorithm/ranges_count.h @@ -34,7 +34,7 @@ namespace __count { struct __fn { template _Sent, class _Type, class _Proj = identity> requires indirect_binary_predicate, const _Type*> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr iter_difference_t<_Iter> operator()(_Iter __first, _Sent __last, const _Type& __value, _Proj __proj = {}) const { auto __pred = [&](auto&& __e) { return __e == __value; }; return ranges::__count_if_impl(std::move(__first), std::move(__last), __pred, __proj); @@ -42,7 +42,7 @@ struct __fn { template requires indirect_binary_predicate, _Proj>, const _Type*> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr range_difference_t<_Range> operator()(_Range&& __r, const _Type& __value, _Proj __proj = {}) const { auto __pred = [&](auto&& __e) { return __e == __value; }; return ranges::__count_if_impl(ranges::begin(__r), ranges::end(__r), __pred, __proj); diff --git a/libcxx/include/__algorithm/ranges_count_if.h b/libcxx/include/__algorithm/ranges_count_if.h index 8cf1d026804c1..931618b7b545b 100644 --- a/libcxx/include/__algorithm/ranges_count_if.h +++ b/libcxx/include/__algorithm/ranges_count_if.h @@ -46,14 +46,14 @@ namespace __count_if { struct __fn { template _Sent, class _Proj = identity, indirect_unary_predicate> _Predicate> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr iter_difference_t<_Iter> operator()(_Iter __first, _Sent __last, _Predicate __pred, _Proj __proj = {}) const { return ranges::__count_if_impl(std::move(__first), std::move(__last), __pred, __proj); } template , _Proj>> _Predicate> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr range_difference_t<_Range> operator()(_Range&& __r, _Predicate __pred, _Proj __proj = {}) const { return ranges::__count_if_impl(ranges::begin(__r), ranges::end(__r), __pred, __proj); } diff --git a/libcxx/include/__algorithm/ranges_equal.h b/libcxx/include/__algorithm/ranges_equal.h index f7424ffd52ad1..3c417f09de902 100644 --- a/libcxx/include/__algorithm/ranges_equal.h +++ b/libcxx/include/__algorithm/ranges_equal.h @@ -60,7 +60,7 @@ struct __fn { class _Proj1 = identity, class _Proj2 = identity> requires indirectly_comparable<_Iter1, _Iter2, _Pred, _Proj1, _Proj2> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Sent2 __last2, _Pred __pred = {}, @@ -83,7 +83,7 @@ struct __fn { class _Proj1 = identity, class _Proj2 = identity> requires indirectly_comparable, iterator_t<_Range2>, _Pred, _Proj1, _Proj2> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Range1&& __range1, _Range2&& __range2, _Pred __pred = {}, diff --git a/libcxx/include/__algorithm/ranges_equal_range.h b/libcxx/include/__algorithm/ranges_equal_range.h index efe5b2f4193bf..94dc058e7bc15 100644 --- a/libcxx/include/__algorithm/ranges_equal_range.h +++ b/libcxx/include/__algorithm/ranges_equal_range.h @@ -44,7 +44,7 @@ struct __fn { class _Tp, class _Proj = identity, indirect_strict_weak_order> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr subrange<_Iter> + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr subrange<_Iter> operator()(_Iter __first, _Sent __last, const _Tp& __value, _Comp __comp = {}, _Proj __proj = {}) const { auto __ret = std::__equal_range<_RangeAlgPolicy>( std::move(__first), std::move(__last), __value, __comp, __proj); @@ -56,7 +56,7 @@ struct __fn { class _Tp, class _Proj = identity, indirect_strict_weak_order, _Proj>> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr borrowed_subrange_t<_Range> + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr borrowed_subrange_t<_Range> operator()(_Range&& __range, const _Tp& __value, _Comp __comp = {}, _Proj __proj = {}) const { auto __ret = std::__equal_range<_RangeAlgPolicy>( ranges::begin(__range), ranges::end(__range), __value, __comp, __proj); diff --git a/libcxx/include/__algorithm/ranges_find.h b/libcxx/include/__algorithm/ranges_find.h index 1681eb1876779..580c2a14c9ed9 100644 --- a/libcxx/include/__algorithm/ranges_find.h +++ b/libcxx/include/__algorithm/ranges_find.h @@ -35,7 +35,7 @@ namespace __find { struct __fn { template _Sp, class _Tp, class _Proj = identity> requires indirect_binary_predicate, const _Tp*> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr _Ip operator()(_Ip __first, _Sp __last, const _Tp& __value, _Proj __proj = {}) const { auto __pred = [&](auto&& __e) { return std::forward(__e) == __value; }; return ranges::__find_if_impl(std::move(__first), std::move(__last), __pred, __proj); @@ -43,7 +43,7 @@ struct __fn { template requires indirect_binary_predicate, _Proj>, const _Tp*> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr borrowed_iterator_t<_Rp> operator()(_Rp&& __r, const _Tp& __value, _Proj __proj = {}) const { auto __pred = [&](auto&& __e) { return std::forward(__e) == __value; }; return ranges::__find_if_impl(ranges::begin(__r), ranges::end(__r), __pred, __proj); diff --git a/libcxx/include/__algorithm/ranges_find_end.h b/libcxx/include/__algorithm/ranges_find_end.h index df891000b526f..ea36f4d4e6e7e 100644 --- a/libcxx/include/__algorithm/ranges_find_end.h +++ b/libcxx/include/__algorithm/ranges_find_end.h @@ -40,7 +40,7 @@ struct __fn { class _Proj1 = identity, class _Proj2 = identity> requires indirectly_comparable<_Iter1, _Iter2, _Pred, _Proj1, _Proj2> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr subrange<_Iter1> operator()(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Sent2 __last2, _Pred __pred = {}, @@ -65,7 +65,7 @@ struct __fn { class _Proj1 = identity, class _Proj2 = identity> requires indirectly_comparable, iterator_t<_Range2>, _Pred, _Proj1, _Proj2> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr borrowed_subrange_t<_Range1> operator()(_Range1&& __range1, _Range2&& __range2, _Pred __pred = {}, diff --git a/libcxx/include/__algorithm/ranges_find_first_of.h b/libcxx/include/__algorithm/ranges_find_first_of.h index 44221c1d5f8cb..9d66e7511c0fb 100644 --- a/libcxx/include/__algorithm/ranges_find_first_of.h +++ b/libcxx/include/__algorithm/ranges_find_first_of.h @@ -54,7 +54,7 @@ struct __fn { class _Proj1 = identity, class _Proj2 = identity> requires indirectly_comparable<_Iter1, _Iter2, _Pred, _Proj1, _Proj2> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr _Iter1 operator()(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Sent2 __last2, _Pred __pred = {}, @@ -73,7 +73,7 @@ struct __fn { class _Proj1 = identity, class _Proj2 = identity> requires indirectly_comparable, iterator_t<_Range2>, _Pred, _Proj1, _Proj2> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr borrowed_iterator_t<_Range1> operator()(_Range1&& __range1, _Range2&& __range2, _Pred __pred = {}, diff --git a/libcxx/include/__algorithm/ranges_find_if.h b/libcxx/include/__algorithm/ranges_find_if.h index b3f450e79be52..45ce6e460d685 100644 --- a/libcxx/include/__algorithm/ranges_find_if.h +++ b/libcxx/include/__algorithm/ranges_find_if.h @@ -45,14 +45,14 @@ struct __fn { template _Sp, class _Proj = identity, indirect_unary_predicate> _Pred> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr _Ip operator()(_Ip __first, _Sp __last, _Pred __pred, _Proj __proj = {}) const { return ranges::__find_if_impl(std::move(__first), std::move(__last), __pred, __proj); } template , _Proj>> _Pred> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr borrowed_iterator_t<_Rp> operator()(_Rp&& __r, _Pred __pred, _Proj __proj = {}) const { return ranges::__find_if_impl(ranges::begin(__r), ranges::end(__r), __pred, __proj); } diff --git a/libcxx/include/__algorithm/ranges_find_if_not.h b/libcxx/include/__algorithm/ranges_find_if_not.h index ffd42ed21d912..3dd12132754bf 100644 --- a/libcxx/include/__algorithm/ranges_find_if_not.h +++ b/libcxx/include/__algorithm/ranges_find_if_not.h @@ -35,7 +35,7 @@ namespace __find_if_not { struct __fn { template _Sp, class _Proj = identity, indirect_unary_predicate> _Pred> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr _Ip operator()(_Ip __first, _Sp __last, _Pred __pred, _Proj __proj = {}) const { auto __pred2 = [&](auto&& __e) { return !std::invoke(__pred, std::forward(__e)); }; return ranges::__find_if_impl(std::move(__first), std::move(__last), __pred2, __proj); @@ -43,7 +43,7 @@ struct __fn { template , _Proj>> _Pred> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr borrowed_iterator_t<_Rp> operator()(_Rp&& __r, _Pred __pred, _Proj __proj = {}) const { auto __pred2 = [&](auto&& __e) { return !std::invoke(__pred, std::forward(__e)); }; return ranges::__find_if_impl(ranges::begin(__r), ranges::end(__r), __pred2, __proj); diff --git a/libcxx/include/__algorithm/ranges_includes.h b/libcxx/include/__algorithm/ranges_includes.h index 26cd8d8b8dfb6..8438117cfa808 100644 --- a/libcxx/include/__algorithm/ranges_includes.h +++ b/libcxx/include/__algorithm/ranges_includes.h @@ -43,7 +43,7 @@ struct __fn { class _Proj1 = identity, class _Proj2 = identity, indirect_strict_weak_order, projected<_Iter2, _Proj2>> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr bool operator()( + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()( _Iter1 __first1, _Sent1 __last1, _Iter2 __first2, @@ -68,7 +68,7 @@ struct __fn { class _Proj2 = identity, indirect_strict_weak_order, _Proj1>, projected, _Proj2>> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr bool operator()( + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()( _Range1&& __range1, _Range2&& __range2, _Comp __comp = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const { return std::__includes( ranges::begin(__range1), diff --git a/libcxx/include/__algorithm/ranges_is_heap.h b/libcxx/include/__algorithm/ranges_is_heap.h index 6b1193178028b..a16c075b0763f 100644 --- a/libcxx/include/__algorithm/ranges_is_heap.h +++ b/libcxx/include/__algorithm/ranges_is_heap.h @@ -47,14 +47,14 @@ struct __fn { template _Sent, class _Proj = identity, indirect_strict_weak_order> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Iter __first, _Sent __last, _Comp __comp = {}, _Proj __proj = {}) const { return __is_heap_fn_impl(std::move(__first), std::move(__last), __comp, __proj); } template , _Proj>> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Range&& __range, _Comp __comp = {}, _Proj __proj = {}) const { return __is_heap_fn_impl(ranges::begin(__range), ranges::end(__range), __comp, __proj); } diff --git a/libcxx/include/__algorithm/ranges_is_heap_until.h b/libcxx/include/__algorithm/ranges_is_heap_until.h index 2ca0d06f6e30a..8c8dac5bc9099 100644 --- a/libcxx/include/__algorithm/ranges_is_heap_until.h +++ b/libcxx/include/__algorithm/ranges_is_heap_until.h @@ -47,14 +47,14 @@ struct __fn { template _Sent, class _Proj = identity, indirect_strict_weak_order> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr _Iter operator()(_Iter __first, _Sent __last, _Comp __comp = {}, _Proj __proj = {}) const { return __is_heap_until_fn_impl(std::move(__first), std::move(__last), __comp, __proj); } template , _Proj>> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr borrowed_iterator_t<_Range> operator()(_Range&& __range, _Comp __comp = {}, _Proj __proj = {}) const { return __is_heap_until_fn_impl(ranges::begin(__range), ranges::end(__range), __comp, __proj); } diff --git a/libcxx/include/__algorithm/ranges_is_partitioned.h b/libcxx/include/__algorithm/ranges_is_partitioned.h index ce56378303ebf..b903953d61658 100644 --- a/libcxx/include/__algorithm/ranges_is_partitioned.h +++ b/libcxx/include/__algorithm/ranges_is_partitioned.h @@ -54,7 +54,7 @@ struct __fn { template _Sent, class _Proj = identity, indirect_unary_predicate> _Pred> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Iter __first, _Sent __last, _Pred __pred, _Proj __proj = {}) const { return __is_parititioned_impl(std::move(__first), std::move(__last), __pred, __proj); } @@ -62,7 +62,7 @@ struct __fn { template , _Proj>> _Pred> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Range&& __range, _Pred __pred, _Proj __proj = {}) const { return __is_parititioned_impl(ranges::begin(__range), ranges::end(__range), __pred, __proj); } diff --git a/libcxx/include/__algorithm/ranges_is_permutation.h b/libcxx/include/__algorithm/ranges_is_permutation.h index afee6b5573e76..b617500ea0d87 100644 --- a/libcxx/include/__algorithm/ranges_is_permutation.h +++ b/libcxx/include/__algorithm/ranges_is_permutation.h @@ -49,7 +49,7 @@ struct __fn { class _Proj2 = identity, indirect_equivalence_relation, projected<_Iter2, _Proj2>> _Pred = ranges::equal_to> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Sent2 __last2, _Pred __pred = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const { return __is_permutation_func_impl( @@ -62,7 +62,7 @@ struct __fn { class _Proj1 = identity, class _Proj2 = identity, indirect_equivalence_relation, _Proj1>, projected, _Proj2>> _Pred = ranges::equal_to> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Range1&& __range1, _Range2&& __range2, _Pred __pred = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const { if constexpr (sized_range<_Range1> && sized_range<_Range2>) { diff --git a/libcxx/include/__algorithm/ranges_is_sorted.h b/libcxx/include/__algorithm/ranges_is_sorted.h index e3550569af289..ce3032ff226e1 100644 --- a/libcxx/include/__algorithm/ranges_is_sorted.h +++ b/libcxx/include/__algorithm/ranges_is_sorted.h @@ -33,7 +33,7 @@ struct __fn { template _Sent, class _Proj = identity, indirect_strict_weak_order> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Iter __first, _Sent __last, _Comp __comp = {}, _Proj __proj = {}) const { return ranges::__is_sorted_until_impl(std::move(__first), __last, __comp, __proj) == __last; } @@ -41,7 +41,7 @@ struct __fn { template , _Proj>> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Range&& __range, _Comp __comp = {}, _Proj __proj = {}) const { auto __last = ranges::end(__range); return ranges::__is_sorted_until_impl(ranges::begin(__range), __last, __comp, __proj) == __last; diff --git a/libcxx/include/__algorithm/ranges_is_sorted_until.h b/libcxx/include/__algorithm/ranges_is_sorted_until.h index 47e98b8f452ec..17fc42e97fd30 100644 --- a/libcxx/include/__algorithm/ranges_is_sorted_until.h +++ b/libcxx/include/__algorithm/ranges_is_sorted_until.h @@ -49,7 +49,7 @@ struct __fn { template _Sent, class _Proj = identity, indirect_strict_weak_order> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr _Iter operator()(_Iter __first, _Sent __last, _Comp __comp = {}, _Proj __proj = {}) const { return ranges::__is_sorted_until_impl(std::move(__first), std::move(__last), __comp, __proj); } @@ -57,7 +57,7 @@ struct __fn { template , _Proj>> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr borrowed_iterator_t<_Range> operator()(_Range&& __range, _Comp __comp = {}, _Proj __proj = {}) const { return ranges::__is_sorted_until_impl(ranges::begin(__range), ranges::end(__range), __comp, __proj); } diff --git a/libcxx/include/__algorithm/ranges_lexicographical_compare.h b/libcxx/include/__algorithm/ranges_lexicographical_compare.h index eab7bbe3e0d11..2972e327169da 100644 --- a/libcxx/include/__algorithm/ranges_lexicographical_compare.h +++ b/libcxx/include/__algorithm/ranges_lexicographical_compare.h @@ -55,7 +55,7 @@ struct __fn { class _Proj1 = identity, class _Proj2 = identity, indirect_strict_weak_order, projected<_Iter2, _Proj2>> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Sent2 __last2, _Comp __comp = {}, @@ -74,7 +74,7 @@ struct __fn { class _Proj2 = identity, indirect_strict_weak_order, _Proj1>, projected, _Proj2>> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Range1&& __range1, _Range2&& __range2, _Comp __comp = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const { return __lexicographical_compare_impl(ranges::begin(__range1), ranges::end(__range1), ranges::begin(__range2), ranges::end(__range2), diff --git a/libcxx/include/__algorithm/ranges_lower_bound.h b/libcxx/include/__algorithm/ranges_lower_bound.h index abcbe82a443d1..78cbb6d4fb245 100644 --- a/libcxx/include/__algorithm/ranges_lower_bound.h +++ b/libcxx/include/__algorithm/ranges_lower_bound.h @@ -37,14 +37,14 @@ namespace __lower_bound { struct __fn { template _Sent, class _Type, class _Proj = identity, indirect_strict_weak_order> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr _Iter operator()(_Iter __first, _Sent __last, const _Type& __value, _Comp __comp = {}, _Proj __proj = {}) const { return std::__lower_bound_impl<_RangeAlgPolicy>(__first, __last, __value, __comp, __proj); } template , _Proj>> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr borrowed_iterator_t<_Range> operator()(_Range&& __r, const _Type& __value, _Comp __comp = {}, diff --git a/libcxx/include/__algorithm/ranges_max.h b/libcxx/include/__algorithm/ranges_max.h index f027faa075575..55aef997698c6 100644 --- a/libcxx/include/__algorithm/ranges_max.h +++ b/libcxx/include/__algorithm/ranges_max.h @@ -39,14 +39,14 @@ namespace __max { struct __fn { template > _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr const _Tp& operator()(const _Tp& __a, const _Tp& __b, _Comp __comp = {}, _Proj __proj = {}) const { return std::invoke(__comp, std::invoke(__proj, __a), std::invoke(__proj, __b)) ? __b : __a; } template > _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr _Tp operator()(initializer_list<_Tp> __il, _Comp __comp = {}, _Proj __proj = {}) const { _LIBCPP_ASSERT(__il.begin() != __il.end(), "initializer_list must contain at least one element"); @@ -57,7 +57,7 @@ struct __fn { template , _Proj>> _Comp = ranges::less> requires indirectly_copyable_storable, range_value_t<_Rp>*> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr range_value_t<_Rp> operator()(_Rp&& __r, _Comp __comp = {}, _Proj __proj = {}) const { auto __first = ranges::begin(__r); auto __last = ranges::end(__r); diff --git a/libcxx/include/__algorithm/ranges_max_element.h b/libcxx/include/__algorithm/ranges_max_element.h index d4c3242e04eba..490f32075a4c1 100644 --- a/libcxx/include/__algorithm/ranges_max_element.h +++ b/libcxx/include/__algorithm/ranges_max_element.h @@ -33,7 +33,7 @@ namespace __max_element { struct __fn { template _Sp, class _Proj = identity, indirect_strict_weak_order> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr _Ip operator()(_Ip __first, _Sp __last, _Comp __comp = {}, _Proj __proj = {}) const { auto __comp_lhs_rhs_swapped = [&](auto&& __lhs, auto&& __rhs) { return std::invoke(__comp, __rhs, __lhs); }; return ranges::__min_element_impl(__first, __last, __comp_lhs_rhs_swapped, __proj); @@ -41,7 +41,7 @@ struct __fn { template , _Proj>> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr borrowed_iterator_t<_Rp> operator()(_Rp&& __r, _Comp __comp = {}, _Proj __proj = {}) const { auto __comp_lhs_rhs_swapped = [&](auto&& __lhs, auto&& __rhs) { return std::invoke(__comp, __rhs, __lhs); }; return ranges::__min_element_impl(ranges::begin(__r), ranges::end(__r), __comp_lhs_rhs_swapped, __proj); diff --git a/libcxx/include/__algorithm/ranges_min.h b/libcxx/include/__algorithm/ranges_min.h index 8152a411f3832..0e31f57fb8dde 100644 --- a/libcxx/include/__algorithm/ranges_min.h +++ b/libcxx/include/__algorithm/ranges_min.h @@ -38,14 +38,14 @@ namespace __min { struct __fn { template > _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr const _Tp& operator()(const _Tp& __a, const _Tp& __b, _Comp __comp = {}, _Proj __proj = {}) const { return std::invoke(__comp, std::invoke(__proj, __b), std::invoke(__proj, __a)) ? __b : __a; } template > _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr _Tp operator()(initializer_list<_Tp> __il, _Comp __comp = {}, _Proj __proj = {}) const { _LIBCPP_ASSERT(__il.begin() != __il.end(), "initializer_list must contain at least one element"); return *ranges::__min_element_impl(__il.begin(), __il.end(), __comp, __proj); @@ -54,7 +54,7 @@ struct __fn { template , _Proj>> _Comp = ranges::less> requires indirectly_copyable_storable, range_value_t<_Rp>*> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr range_value_t<_Rp> operator()(_Rp&& __r, _Comp __comp = {}, _Proj __proj = {}) const { auto __first = ranges::begin(__r); auto __last = ranges::end(__r); diff --git a/libcxx/include/__algorithm/ranges_min_element.h b/libcxx/include/__algorithm/ranges_min_element.h index 66d649971465f..1751874d03bb8 100644 --- a/libcxx/include/__algorithm/ranges_min_element.h +++ b/libcxx/include/__algorithm/ranges_min_element.h @@ -48,14 +48,14 @@ namespace __min_element { struct __fn { template _Sp, class _Proj = identity, indirect_strict_weak_order> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr _Ip operator()(_Ip __first, _Sp __last, _Comp __comp = {}, _Proj __proj = {}) const { return ranges::__min_element_impl(__first, __last, __comp, __proj); } template , _Proj>> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr borrowed_iterator_t<_Rp> operator()(_Rp&& __r, _Comp __comp = {}, _Proj __proj = {}) const { return ranges::__min_element_impl(ranges::begin(__r), ranges::end(__r), __comp, __proj); } diff --git a/libcxx/include/__algorithm/ranges_minmax.h b/libcxx/include/__algorithm/ranges_minmax.h index 377fe9b4a26d4..f82e00551e47b 100644 --- a/libcxx/include/__algorithm/ranges_minmax.h +++ b/libcxx/include/__algorithm/ranges_minmax.h @@ -45,7 +45,7 @@ namespace __minmax { struct __fn { template > _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr ranges::minmax_result + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr ranges::minmax_result operator()(const _Type& __a, const _Type& __b, _Comp __comp = {}, _Proj __proj = {}) const { if (std::invoke(__comp, std::invoke(__proj, __b), std::invoke(__proj, __a))) return {__b, __a}; @@ -54,7 +54,7 @@ struct __fn { template > _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr ranges::minmax_result<_Type> operator()(initializer_list<_Type> __il, _Comp __comp = {}, _Proj __proj = {}) const { _LIBCPP_ASSERT(__il.begin() != __il.end(), "initializer_list has to contain at least one element"); auto __iters = std::__minmax_element_impl(__il.begin(), __il.end(), __comp, __proj); @@ -64,7 +64,7 @@ struct __fn { template , _Proj>> _Comp = ranges::less> requires indirectly_copyable_storable, range_value_t<_Range>*> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr ranges::minmax_result> operator()(_Range&& __r, _Comp __comp = {}, _Proj __proj = {}) const { auto __first = ranges::begin(__r); auto __last = ranges::end(__r); diff --git a/libcxx/include/__algorithm/ranges_minmax_element.h b/libcxx/include/__algorithm/ranges_minmax_element.h index 9d1093b648a28..6699f9626e1bf 100644 --- a/libcxx/include/__algorithm/ranges_minmax_element.h +++ b/libcxx/include/__algorithm/ranges_minmax_element.h @@ -42,7 +42,7 @@ namespace __minmax_element { struct __fn { template _Sp, class _Proj = identity, indirect_strict_weak_order> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr ranges::minmax_element_result<_Ip> operator()(_Ip __first, _Sp __last, _Comp __comp = {}, _Proj __proj = {}) const { auto __ret = std::__minmax_element_impl(std::move(__first), std::move(__last), __comp, __proj); return {__ret.first, __ret.second}; @@ -50,7 +50,7 @@ struct __fn { template , _Proj>> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr ranges::minmax_element_result> operator()(_Rp&& __r, _Comp __comp = {}, _Proj __proj = {}) const { auto __ret = std::__minmax_element_impl(ranges::begin(__r), ranges::end(__r), __comp, __proj); diff --git a/libcxx/include/__algorithm/ranges_mismatch.h b/libcxx/include/__algorithm/ranges_mismatch.h index 4775daf4f7f69..4fd051792838c 100644 --- a/libcxx/include/__algorithm/ranges_mismatch.h +++ b/libcxx/include/__algorithm/ranges_mismatch.h @@ -55,7 +55,7 @@ struct __fn { input_iterator _I2, sentinel_for<_I2> _S2, class _Pred = ranges::equal_to, class _Proj1 = identity, class _Proj2 = identity> requires indirectly_comparable<_I1, _I2, _Pred, _Proj1, _Proj2> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr mismatch_result<_I1, _I2> operator()(_I1 __first1, _S1 __last1, _I2 __first2, _S2 __last2, _Pred __pred = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const { return __go(std::move(__first1), __last1, std::move(__first2), __last2, __pred, __proj1, __proj2); @@ -64,7 +64,7 @@ struct __fn { template requires indirectly_comparable, iterator_t<_R2>, _Pred, _Proj1, _Proj2> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr mismatch_result, borrowed_iterator_t<_R2>> operator()(_R1&& __r1, _R2&& __r2, _Pred __pred = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const { return __go(ranges::begin(__r1), ranges::end(__r1), ranges::begin(__r2), ranges::end(__r2), diff --git a/libcxx/include/__algorithm/ranges_none_of.h b/libcxx/include/__algorithm/ranges_none_of.h index d93b630bde2b5..b39e570fd33a8 100644 --- a/libcxx/include/__algorithm/ranges_none_of.h +++ b/libcxx/include/__algorithm/ranges_none_of.h @@ -42,14 +42,14 @@ struct __fn { template _Sent, class _Proj = identity, indirect_unary_predicate> _Pred> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Iter __first, _Sent __last, _Pred __pred = {}, _Proj __proj = {}) const { return __none_of_impl(std::move(__first), std::move(__last), __pred, __proj); } template , _Proj>> _Pred> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Range&& __range, _Pred __pred, _Proj __proj = {}) const { return __none_of_impl(ranges::begin(__range), ranges::end(__range), __pred, __proj); } diff --git a/libcxx/include/__algorithm/ranges_remove.h b/libcxx/include/__algorithm/ranges_remove.h index eb53a5db1b5aa..dd5c5fb4536ac 100644 --- a/libcxx/include/__algorithm/ranges_remove.h +++ b/libcxx/include/__algorithm/ranges_remove.h @@ -35,7 +35,7 @@ struct __fn { template _Sent, class _Type, class _Proj = identity> requires indirect_binary_predicate, const _Type*> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr subrange<_Iter> operator()(_Iter __first, _Sent __last, const _Type& __value, _Proj __proj = {}) const { auto __pred = [&](auto&& __other) { return __value == __other; }; return ranges::__remove_if_impl(std::move(__first), std::move(__last), __pred, __proj); @@ -44,7 +44,7 @@ struct __fn { template requires permutable> && indirect_binary_predicate, _Proj>, const _Type*> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr borrowed_subrange_t<_Range> operator()(_Range&& __range, const _Type& __value, _Proj __proj = {}) const { auto __pred = [&](auto&& __other) { return __value == __other; }; return ranges::__remove_if_impl(ranges::begin(__range), ranges::end(__range), __pred, __proj); diff --git a/libcxx/include/__algorithm/ranges_remove_if.h b/libcxx/include/__algorithm/ranges_remove_if.h index c2e9052d3b313..1f17467fc43eb 100644 --- a/libcxx/include/__algorithm/ranges_remove_if.h +++ b/libcxx/include/__algorithm/ranges_remove_if.h @@ -56,7 +56,7 @@ struct __fn { template _Sent, class _Proj = identity, indirect_unary_predicate> _Pred> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr subrange<_Iter> operator()(_Iter __first, _Sent __last, _Pred __pred, _Proj __proj = {}) const { return ranges::__remove_if_impl(std::move(__first), std::move(__last), __pred, __proj); } @@ -65,7 +65,7 @@ struct __fn { class _Proj = identity, indirect_unary_predicate, _Proj>> _Pred> requires permutable> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr borrowed_subrange_t<_Range> operator()(_Range&& __range, _Pred __pred, _Proj __proj = {}) const { return ranges::__remove_if_impl(ranges::begin(__range), ranges::end(__range), __pred, __proj); } diff --git a/libcxx/include/__algorithm/ranges_search.h b/libcxx/include/__algorithm/ranges_search.h index 24bbe28ead4d1..388d5afa499d6 100644 --- a/libcxx/include/__algorithm/ranges_search.h +++ b/libcxx/include/__algorithm/ranges_search.h @@ -75,7 +75,7 @@ struct __fn { class _Proj1 = identity, class _Proj2 = identity> requires indirectly_comparable<_Iter1, _Iter2, _Pred, _Proj1, _Proj2> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr subrange<_Iter1> operator()(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Sent2 __last2, _Pred __pred = {}, @@ -90,7 +90,7 @@ struct __fn { class _Proj1 = identity, class _Proj2 = identity> requires indirectly_comparable, iterator_t<_Range2>, _Pred, _Proj1, _Proj2> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr borrowed_subrange_t<_Range1> operator()(_Range1&& __range1, _Range2&& __range2, _Pred __pred = {}, diff --git a/libcxx/include/__algorithm/ranges_search_n.h b/libcxx/include/__algorithm/ranges_search_n.h index d2846f6c5c874..f44afde03e99a 100644 --- a/libcxx/include/__algorithm/ranges_search_n.h +++ b/libcxx/include/__algorithm/ranges_search_n.h @@ -76,7 +76,7 @@ struct __fn { class _Pred = ranges::equal_to, class _Proj = identity> requires indirectly_comparable<_Iter, const _Type*, _Pred, _Proj> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr subrange<_Iter> operator()(_Iter __first, _Sent __last, iter_difference_t<_Iter> __count, const _Type& __value, @@ -87,7 +87,7 @@ struct __fn { template requires indirectly_comparable, const _Type*, _Pred, _Proj> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr borrowed_subrange_t<_Range> operator()(_Range&& __range, range_difference_t<_Range> __count, const _Type& __value, diff --git a/libcxx/include/__algorithm/ranges_unique.h b/libcxx/include/__algorithm/ranges_unique.h index 45e54276ed55a..be427ccf7fad8 100644 --- a/libcxx/include/__algorithm/ranges_unique.h +++ b/libcxx/include/__algorithm/ranges_unique.h @@ -45,7 +45,7 @@ namespace __unique { sentinel_for<_Iter> _Sent, class _Proj = identity, indirect_equivalence_relation> _Comp = ranges::equal_to> - _LIBCPP_HIDE_FROM_ABI constexpr subrange<_Iter> + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr subrange<_Iter> operator()(_Iter __first, _Sent __last, _Comp __comp = {}, _Proj __proj = {}) const { auto __ret = std::__unique<_RangeAlgPolicy>( std::move(__first), std::move(__last), std::__make_projected(__comp, __proj)); @@ -57,7 +57,7 @@ namespace __unique { class _Proj = identity, indirect_equivalence_relation, _Proj>> _Comp = ranges::equal_to> requires permutable> - _LIBCPP_HIDE_FROM_ABI constexpr borrowed_subrange_t<_Range> + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr borrowed_subrange_t<_Range> operator()(_Range&& __range, _Comp __comp = {}, _Proj __proj = {}) const { auto __ret = std::__unique<_RangeAlgPolicy>( ranges::begin(__range), ranges::end(__range), std::__make_projected(__comp, __proj)); diff --git a/libcxx/include/__algorithm/ranges_upper_bound.h b/libcxx/include/__algorithm/ranges_upper_bound.h index ec1addd520a61..a1340809048c6 100644 --- a/libcxx/include/__algorithm/ranges_upper_bound.h +++ b/libcxx/include/__algorithm/ranges_upper_bound.h @@ -34,7 +34,7 @@ namespace __upper_bound { struct __fn { template _Sent, class _Type, class _Proj = identity, indirect_strict_weak_order> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr _Iter operator()(_Iter __first, _Sent __last, const _Type& __value, _Comp __comp = {}, _Proj __proj = {}) const { auto __comp_lhs_rhs_swapped = [&](const auto& __lhs, const auto& __rhs) { return !std::invoke(__comp, __rhs, __lhs); @@ -45,7 +45,7 @@ struct __fn { template , _Proj>> _Comp = ranges::less> - _LIBCPP_HIDE_FROM_ABI constexpr + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr borrowed_iterator_t<_Range> operator()(_Range&& __r, const _Type& __value, _Comp __comp = {}, diff --git a/libcxx/include/__format/buffer.h b/libcxx/include/__format/buffer.h index 4f7577eb06055..60c1f8093c61d 100644 --- a/libcxx/include/__format/buffer.h +++ b/libcxx/include/__format/buffer.h @@ -17,6 +17,7 @@ #include <__algorithm/ranges_copy_n.h> #include <__algorithm/transform.h> #include <__algorithm/unwrap_iter.h> +#include <__concepts/same_as.h> #include <__config> #include <__format/concepts.h> #include <__format/enable_insertable.h> @@ -27,7 +28,6 @@ #include <__iterator/iterator_traits.h> #include <__iterator/wrap_iter.h> #include <__utility/move.h> -#include #include #include #include diff --git a/libcxx/include/__format/format_context.h b/libcxx/include/__format/format_context.h index 19468de45ca37..b3c0b34427852 100644 --- a/libcxx/include/__format/format_context.h +++ b/libcxx/include/__format/format_context.h @@ -18,7 +18,6 @@ #include <__iterator/back_insert_iterator.h> #include <__iterator/concepts.h> #include <__utility/move.h> -#include #include #ifndef _LIBCPP_HAS_NO_LOCALIZATION diff --git a/libcxx/include/__format/parser_std_format_spec.h b/libcxx/include/__format/parser_std_format_spec.h index bd5b6ae2a4e54..05f51f7cf9b94 100644 --- a/libcxx/include/__format/parser_std_format_spec.h +++ b/libcxx/include/__format/parser_std_format_spec.h @@ -19,6 +19,7 @@ #include <__algorithm/find_if.h> #include <__algorithm/min.h> #include <__assert> +#include <__concepts/same_as.h> #include <__config> #include <__debug> #include <__format/format_arg.h> @@ -28,7 +29,6 @@ #include <__format/unicode.h> #include <__variant/monostate.h> #include -#include #include #include #include diff --git a/libcxx/include/__functional/function.h b/libcxx/include/__functional/function.h index 44e33a5a3a624..2d9cdc0459d63 100644 --- a/libcxx/include/__functional/function.h +++ b/libcxx/include/__functional/function.h @@ -18,10 +18,10 @@ #include <__iterator/iterator_traits.h> #include <__memory/addressof.h> #include <__memory/allocator.h> +#include <__memory/allocator_destructor.h> #include <__memory/allocator_traits.h> #include <__memory/builtin_new_allocator.h> #include <__memory/compressed_pair.h> -#include <__memory/shared_ptr.h> #include <__memory/unique_ptr.h> #include <__utility/forward.h> #include <__utility/move.h> diff --git a/libcxx/include/__functional/ranges_operations.h b/libcxx/include/__functional/ranges_operations.h index 3f63a86731e0e..87081dd56a05c 100644 --- a/libcxx/include/__functional/ranges_operations.h +++ b/libcxx/include/__functional/ranges_operations.h @@ -10,9 +10,10 @@ #ifndef _LIBCPP___FUNCTIONAL_RANGES_OPERATIONS_H #define _LIBCPP___FUNCTIONAL_RANGES_OPERATIONS_H +#include <__concepts/equality_comparable.h> +#include <__concepts/totally_ordered.h> #include <__config> #include <__utility/forward.h> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/libcxx/include/__iterator/advance.h b/libcxx/include/__iterator/advance.h index e26ad4b7317cf..cd6353e65e86a 100644 --- a/libcxx/include/__iterator/advance.h +++ b/libcxx/include/__iterator/advance.h @@ -11,6 +11,8 @@ #define _LIBCPP___ITERATOR_ADVANCE_H #include <__assert> +#include <__concepts/assignable.h> +#include <__concepts/same_as.h> #include <__config> #include <__iterator/concepts.h> #include <__iterator/incrementable_traits.h> @@ -18,7 +20,6 @@ #include <__utility/convert_to_integral.h> #include <__utility/move.h> #include <__utility/unreachable.h> -#include #include #include #include diff --git a/libcxx/include/__iterator/common_iterator.h b/libcxx/include/__iterator/common_iterator.h index 4de57c5d67c08..a1985c9287534 100644 --- a/libcxx/include/__iterator/common_iterator.h +++ b/libcxx/include/__iterator/common_iterator.h @@ -11,6 +11,13 @@ #define _LIBCPP___ITERATOR_COMMON_ITERATOR_H #include <__assert> +#include <__concepts/assignable.h> +#include <__concepts/constructible.h> +#include <__concepts/convertible_to.h> +#include <__concepts/copyable.h> +#include <__concepts/derived_from.h> +#include <__concepts/equality_comparable.h> +#include <__concepts/same_as.h> #include <__config> #include <__iterator/concepts.h> #include <__iterator/incrementable_traits.h> @@ -18,7 +25,6 @@ #include <__iterator/iter_swap.h> #include <__iterator/iterator_traits.h> #include <__iterator/readable_traits.h> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__iterator/concepts.h b/libcxx/include/__iterator/concepts.h index bd68889333ce6..246f84c7cf53d 100644 --- a/libcxx/include/__iterator/concepts.h +++ b/libcxx/include/__iterator/concepts.h @@ -10,6 +10,21 @@ #ifndef _LIBCPP___ITERATOR_CONCEPTS_H #define _LIBCPP___ITERATOR_CONCEPTS_H +#include <__concepts/arithmetic.h> +#include <__concepts/assignable.h> +#include <__concepts/common_reference_with.h> +#include <__concepts/constructible.h> +#include <__concepts/copyable.h> +#include <__concepts/derived_from.h> +#include <__concepts/equality_comparable.h> +#include <__concepts/invocable.h> +#include <__concepts/movable.h> +#include <__concepts/predicate.h> +#include <__concepts/regular.h> +#include <__concepts/relation.h> +#include <__concepts/same_as.h> +#include <__concepts/semiregular.h> +#include <__concepts/totally_ordered.h> #include <__config> #include <__iterator/incrementable_traits.h> #include <__iterator/iter_move.h> @@ -17,7 +32,6 @@ #include <__iterator/readable_traits.h> #include <__memory/pointer_traits.h> #include <__utility/forward.h> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__iterator/counted_iterator.h b/libcxx/include/__iterator/counted_iterator.h index aab2c51f33a84..8303013ef4e12 100644 --- a/libcxx/include/__iterator/counted_iterator.h +++ b/libcxx/include/__iterator/counted_iterator.h @@ -10,6 +10,11 @@ #define _LIBCPP___ITERATOR_COUNTED_ITERATOR_H #include <__assert> +#include <__concepts/assignable.h> +#include <__concepts/common_with.h> +#include <__concepts/constructible.h> +#include <__concepts/convertible_to.h> +#include <__concepts/same_as.h> #include <__config> #include <__iterator/concepts.h> #include <__iterator/default_sentinel.h> @@ -21,7 +26,6 @@ #include <__memory/pointer_traits.h> #include <__utility/move.h> #include -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__iterator/incrementable_traits.h b/libcxx/include/__iterator/incrementable_traits.h index e6a5ed720f9fa..6f966ec4c7484 100644 --- a/libcxx/include/__iterator/incrementable_traits.h +++ b/libcxx/include/__iterator/incrementable_traits.h @@ -10,9 +10,9 @@ #ifndef _LIBCPP___ITERATOR_INCREMENTABLE_TRAITS_H #define _LIBCPP___ITERATOR_INCREMENTABLE_TRAITS_H +#include <__concepts/arithmetic.h> #include <__config> #include <__type_traits/is_primary_template.h> -#include #include #include diff --git a/libcxx/include/__iterator/iter_swap.h b/libcxx/include/__iterator/iter_swap.h index 9e06464c36904..40272e2b0ad5f 100644 --- a/libcxx/include/__iterator/iter_swap.h +++ b/libcxx/include/__iterator/iter_swap.h @@ -9,6 +9,8 @@ #ifndef _LIBCPP___ITERATOR_ITER_SWAP_H #define _LIBCPP___ITERATOR_ITER_SWAP_H +#include <__concepts/class_or_enum.h> +#include <__concepts/swappable.h> #include <__config> #include <__iterator/concepts.h> #include <__iterator/iter_move.h> @@ -16,7 +18,6 @@ #include <__iterator/readable_traits.h> #include <__utility/forward.h> #include <__utility/move.h> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__iterator/iterator_traits.h b/libcxx/include/__iterator/iterator_traits.h index 918c7138ec187..b4cf07233296d 100644 --- a/libcxx/include/__iterator/iterator_traits.h +++ b/libcxx/include/__iterator/iterator_traits.h @@ -10,11 +10,17 @@ #ifndef _LIBCPP___ITERATOR_ITERATOR_TRAITS_H #define _LIBCPP___ITERATOR_ITERATOR_TRAITS_H +#include <__concepts/arithmetic.h> +#include <__concepts/constructible.h> +#include <__concepts/convertible_to.h> +#include <__concepts/copyable.h> +#include <__concepts/equality_comparable.h> +#include <__concepts/same_as.h> +#include <__concepts/totally_ordered.h> #include <__config> #include <__fwd/pair.h> #include <__iterator/incrementable_traits.h> #include <__iterator/readable_traits.h> -#include #include #include diff --git a/libcxx/include/__iterator/readable_traits.h b/libcxx/include/__iterator/readable_traits.h index 500b46ac145f6..dc818d8a230e7 100644 --- a/libcxx/include/__iterator/readable_traits.h +++ b/libcxx/include/__iterator/readable_traits.h @@ -10,8 +10,8 @@ #ifndef _LIBCPP___ITERATOR_READABLE_TRAITS_H #define _LIBCPP___ITERATOR_READABLE_TRAITS_H +#include <__concepts/same_as.h> #include <__config> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__locale b/libcxx/include/__locale index 65160f3562fbc..242de1ad1a716 100644 --- a/libcxx/include/__locale +++ b/libcxx/include/__locale @@ -12,16 +12,18 @@ #include <__availability> #include <__config> -#include <__memory/shared_ptr.h> #include #include #include #include #include +// Some platforms require more includes than others. Keep the includes on all plaforms for now. +#include +#include + #if defined(_LIBCPP_MSVCRT_LIKE) # include <__support/win32/locale_win32.h> -# include #elif defined(_AIX) || defined(__MVS__) # include <__support/ibm/xlocale.h> #elif defined(__ANDROID__) diff --git a/libcxx/include/__memory/allocator_destructor.h b/libcxx/include/__memory/allocator_destructor.h new file mode 100644 index 0000000000000..623ad8ad800a1 --- /dev/null +++ b/libcxx/include/__memory/allocator_destructor.h @@ -0,0 +1,42 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___MEMORY_ALLOCATOR_DESTRUCTOR_H +#define _LIBCPP___MEMORY_ALLOCATOR_DESTRUCTOR_H + +#include <__config> +#include <__memory/allocator_traits.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template +class __allocator_destructor +{ + typedef _LIBCPP_NODEBUG allocator_traits<_Alloc> __alloc_traits; +public: + typedef _LIBCPP_NODEBUG typename __alloc_traits::pointer pointer; + typedef _LIBCPP_NODEBUG typename __alloc_traits::size_type size_type; +private: + _Alloc& __alloc_; + size_type __s_; +public: + _LIBCPP_INLINE_VISIBILITY __allocator_destructor(_Alloc& __a, size_type __s) + _NOEXCEPT + : __alloc_(__a), __s_(__s) {} + _LIBCPP_INLINE_VISIBILITY + void operator()(pointer __p) _NOEXCEPT + {__alloc_traits::deallocate(__alloc_, __p, __s_);} +}; + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___MEMORY_ALLOCATOR_DESTRUCTOR_H diff --git a/libcxx/include/__memory/concepts.h b/libcxx/include/__memory/concepts.h index bc4cff7719e4e..76d2a2e729d61 100644 --- a/libcxx/include/__memory/concepts.h +++ b/libcxx/include/__memory/concepts.h @@ -10,13 +10,13 @@ #ifndef _LIBCPP___MEMORY_CONCEPTS_H #define _LIBCPP___MEMORY_CONCEPTS_H +#include <__concepts/same_as.h> #include <__config> #include <__iterator/concepts.h> #include <__iterator/iterator_traits.h> #include <__iterator/readable_traits.h> #include <__ranges/access.h> #include <__ranges/concepts.h> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__memory/shared_ptr.h b/libcxx/include/__memory/shared_ptr.h index 238d765f3ebb0..57051dfde3027 100644 --- a/libcxx/include/__memory/shared_ptr.h +++ b/libcxx/include/__memory/shared_ptr.h @@ -21,6 +21,7 @@ #include <__memory/addressof.h> #include <__memory/allocation_guard.h> #include <__memory/allocator.h> +#include <__memory/allocator_destructor.h> #include <__memory/allocator_traits.h> #include <__memory/auto_ptr.h> #include <__memory/compressed_pair.h> @@ -42,32 +43,12 @@ # include #endif - #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif _LIBCPP_BEGIN_NAMESPACE_STD -template -class __allocator_destructor -{ - typedef _LIBCPP_NODEBUG allocator_traits<_Alloc> __alloc_traits; -public: - typedef _LIBCPP_NODEBUG typename __alloc_traits::pointer pointer; - typedef _LIBCPP_NODEBUG typename __alloc_traits::size_type size_type; -private: - _Alloc& __alloc_; - size_type __s_; -public: - _LIBCPP_INLINE_VISIBILITY __allocator_destructor(_Alloc& __a, size_type __s) - _NOEXCEPT - : __alloc_(__a), __s_(__s) {} - _LIBCPP_INLINE_VISIBILITY - void operator()(pointer __p) _NOEXCEPT - {__alloc_traits::deallocate(__alloc_, __p, __s_);} -}; - // NOTE: Relaxed and acq/rel atomics (for increment and decrement respectively) // should be sufficient for thread safety. // See https://llvm.org/PR22803 diff --git a/libcxx/include/__ranges/common_view.h b/libcxx/include/__ranges/common_view.h index ec0c7632d9297..aad0d2f49e78d 100644 --- a/libcxx/include/__ranges/common_view.h +++ b/libcxx/include/__ranges/common_view.h @@ -9,6 +9,8 @@ #ifndef _LIBCPP___RANGES_COMMON_VIEW_H #define _LIBCPP___RANGES_COMMON_VIEW_H +#include <__concepts/constructible.h> +#include <__concepts/copyable.h> #include <__config> #include <__iterator/common_iterator.h> #include <__iterator/iterator_traits.h> @@ -21,7 +23,6 @@ #include <__ranges/view_interface.h> #include <__utility/forward.h> #include <__utility/move.h> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__ranges/copyable_box.h b/libcxx/include/__ranges/copyable_box.h index 9b708d942c20f..fb3d6e409c8f2 100644 --- a/libcxx/include/__ranges/copyable_box.h +++ b/libcxx/include/__ranges/copyable_box.h @@ -10,11 +10,13 @@ #ifndef _LIBCPP___RANGES_COPYABLE_BOX_H #define _LIBCPP___RANGES_COPYABLE_BOX_H +#include <__concepts/constructible.h> +#include <__concepts/copyable.h> +#include <__concepts/movable.h> #include <__config> #include <__memory/addressof.h> #include <__memory/construct_at.h> #include <__utility/move.h> -#include #include #include diff --git a/libcxx/include/__ranges/drop_view.h b/libcxx/include/__ranges/drop_view.h index 9e0fe16a55a40..3131f4b43d41d 100644 --- a/libcxx/include/__ranges/drop_view.h +++ b/libcxx/include/__ranges/drop_view.h @@ -11,6 +11,8 @@ #include <__algorithm/min.h> #include <__assert> +#include <__concepts/constructible.h> +#include <__concepts/convertible_to.h> #include <__config> #include <__functional/bind_back.h> #include <__fwd/span.h> @@ -33,7 +35,6 @@ #include <__utility/auto_cast.h> #include <__utility/forward.h> #include <__utility/move.h> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__ranges/enable_view.h b/libcxx/include/__ranges/enable_view.h index a1e5721404cdd..2dc4752ff428f 100644 --- a/libcxx/include/__ranges/enable_view.h +++ b/libcxx/include/__ranges/enable_view.h @@ -10,8 +10,9 @@ #ifndef _LIBCPP___RANGES_ENABLE_VIEW_H #define _LIBCPP___RANGES_ENABLE_VIEW_H +#include <__concepts/derived_from.h> +#include <__concepts/same_as.h> #include <__config> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__ranges/filter_view.h b/libcxx/include/__ranges/filter_view.h index 3e50c17c64050..74c07d9d36691 100644 --- a/libcxx/include/__ranges/filter_view.h +++ b/libcxx/include/__ranges/filter_view.h @@ -10,6 +10,10 @@ #define _LIBCPP___RANGES_FILTER_VIEW_H #include <__algorithm/ranges_find_if.h> +#include <__concepts/constructible.h> +#include <__concepts/copyable.h> +#include <__concepts/derived_from.h> +#include <__concepts/equality_comparable.h> #include <__config> #include <__debug> #include <__functional/bind_back.h> @@ -30,7 +34,6 @@ #include <__utility/forward.h> #include <__utility/in_place.h> #include <__utility/move.h> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__ranges/non_propagating_cache.h b/libcxx/include/__ranges/non_propagating_cache.h index b565af69e609d..76ca73dd03742 100644 --- a/libcxx/include/__ranges/non_propagating_cache.h +++ b/libcxx/include/__ranges/non_propagating_cache.h @@ -14,7 +14,6 @@ #include <__iterator/iterator_traits.h> // iter_reference_t #include <__memory/addressof.h> #include <__utility/forward.h> -#include // constructible_from #include #include diff --git a/libcxx/include/__ranges/range_adaptor.h b/libcxx/include/__ranges/range_adaptor.h index c287a193a57db..37e48179e378b 100644 --- a/libcxx/include/__ranges/range_adaptor.h +++ b/libcxx/include/__ranges/range_adaptor.h @@ -10,13 +10,16 @@ #ifndef _LIBCPP___RANGES_RANGE_ADAPTOR_H #define _LIBCPP___RANGES_RANGE_ADAPTOR_H +#include <__concepts/constructible.h> +#include <__concepts/derived_from.h> +#include <__concepts/invocable.h> +#include <__concepts/same_as.h> #include <__config> #include <__functional/compose.h> #include <__functional/invoke.h> #include <__ranges/concepts.h> #include <__utility/forward.h> #include <__utility/move.h> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__ranges/ref_view.h b/libcxx/include/__ranges/ref_view.h index 7d1ae74ca2a50..bf94889f62bf7 100644 --- a/libcxx/include/__ranges/ref_view.h +++ b/libcxx/include/__ranges/ref_view.h @@ -9,6 +9,8 @@ #ifndef _LIBCPP___RANGES_REF_VIEW_H #define _LIBCPP___RANGES_REF_VIEW_H +#include <__concepts/convertible_to.h> +#include <__concepts/different_from.h> #include <__config> #include <__iterator/concepts.h> #include <__iterator/incrementable_traits.h> @@ -22,7 +24,6 @@ #include <__ranges/size.h> #include <__ranges/view_interface.h> #include <__utility/forward.h> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__ranges/single_view.h b/libcxx/include/__ranges/single_view.h index 38726c225e57b..e15a0c3199597 100644 --- a/libcxx/include/__ranges/single_view.h +++ b/libcxx/include/__ranges/single_view.h @@ -9,6 +9,7 @@ #ifndef _LIBCPP___RANGES_SINGLE_VIEW_H #define _LIBCPP___RANGES_SINGLE_VIEW_H +#include <__concepts/constructible.h> #include <__config> #include <__ranges/copyable_box.h> #include <__ranges/range_adaptor.h> @@ -16,7 +17,6 @@ #include <__utility/forward.h> #include <__utility/in_place.h> #include <__utility/move.h> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__ranges/size.h b/libcxx/include/__ranges/size.h index 0e6d295651a3c..e4c82222f56ee 100644 --- a/libcxx/include/__ranges/size.h +++ b/libcxx/include/__ranges/size.h @@ -9,13 +9,13 @@ #ifndef _LIBCPP___RANGES_SIZE_H #define _LIBCPP___RANGES_SIZE_H +#include <__concepts/arithmetic.h> #include <__concepts/class_or_enum.h> #include <__config> #include <__iterator/concepts.h> #include <__iterator/iterator_traits.h> #include <__ranges/access.h> #include <__utility/auto_cast.h> -#include #include #include diff --git a/libcxx/include/__ranges/take_view.h b/libcxx/include/__ranges/take_view.h index 3fb9499c0118a..2c98a0bed690b 100644 --- a/libcxx/include/__ranges/take_view.h +++ b/libcxx/include/__ranges/take_view.h @@ -11,6 +11,8 @@ #include <__algorithm/min.h> #include <__algorithm/ranges_min.h> +#include <__concepts/constructible.h> +#include <__concepts/convertible_to.h> #include <__config> #include <__functional/bind_back.h> #include <__fwd/span.h> @@ -34,7 +36,6 @@ #include <__utility/auto_cast.h> #include <__utility/forward.h> #include <__utility/move.h> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__split_buffer b/libcxx/include/__split_buffer index 1754baacd76ab..823487318ee9b 100644 --- a/libcxx/include/__split_buffer +++ b/libcxx/include/__split_buffer @@ -22,7 +22,6 @@ #include <__memory/allocator_traits.h> #include <__memory/compressed_pair.h> #include <__memory/pointer_traits.h> -#include <__memory/shared_ptr.h> #include <__memory/swap_allocator.h> #include <__utility/forward.h> #include <__utility/move.h> diff --git a/libcxx/include/algorithm b/libcxx/include/algorithm index 9b21e7bbf1936..e27872285f15e 100644 --- a/libcxx/include/algorithm +++ b/libcxx/include/algorithm @@ -1917,6 +1917,7 @@ template #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include +# include # include # include # include diff --git a/libcxx/include/any b/libcxx/include/any index dc26a20e6ae22..ec5171ff714e0 100644 --- a/libcxx/include/any +++ b/libcxx/include/any @@ -84,8 +84,8 @@ namespace std { #include <__availability> #include <__config> #include <__memory/allocator.h> +#include <__memory/allocator_destructor.h> #include <__memory/allocator_traits.h> -#include <__memory/shared_ptr.h> #include <__memory/unique_ptr.h> #include <__utility/forward.h> #include <__utility/in_place.h> @@ -699,6 +699,9 @@ _LIBCPP_END_NAMESPACE_STD #endif #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include +# include +# include # include # include # include diff --git a/libcxx/include/array b/libcxx/include/array index af199ca2e7199..cb1a6d1202582 100644 --- a/libcxx/include/array +++ b/libcxx/include/array @@ -534,6 +534,7 @@ _LIBCPP_END_NAMESPACE_STD #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include +# include # include # include #endif diff --git a/libcxx/include/bitset b/libcxx/include/bitset index 72b678b5ffdda..c260f9f592c01 100644 --- a/libcxx/include/bitset +++ b/libcxx/include/bitset @@ -1152,4 +1152,8 @@ _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS +#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include +#endif + #endif // _LIBCPP_BITSET diff --git a/libcxx/include/charconv b/libcxx/include/charconv index b8664a456b6b9..d2031eac8ba9b 100644 --- a/libcxx/include/charconv +++ b/libcxx/include/charconv @@ -832,6 +832,7 @@ _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include # include #endif diff --git a/libcxx/include/chrono b/libcxx/include/chrono index f34919040950b..05e4b6d126ebf 100644 --- a/libcxx/include/chrono +++ b/libcxx/include/chrono @@ -728,4 +728,8 @@ constexpr chrono::year operator ""y(unsigned lo # pragma GCC system_header #endif +#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include +#endif + #endif // _LIBCPP_CHRONO diff --git a/libcxx/include/codecvt b/libcxx/include/codecvt index a724477b15548..ce378c8d1c6b2 100644 --- a/libcxx/include/codecvt +++ b/libcxx/include/codecvt @@ -553,4 +553,19 @@ _LIBCPP_SUPPRESS_DEPRECATED_POP _LIBCPP_END_NAMESPACE_STD +#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +#endif + #endif // _LIBCPP_CODECVT diff --git a/libcxx/include/condition_variable b/libcxx/include/condition_variable index 038f0ba2257d7..6b4d506939e16 100644 --- a/libcxx/include/condition_variable +++ b/libcxx/include/condition_variable @@ -268,4 +268,8 @@ _LIBCPP_END_NAMESPACE_STD #endif // !_LIBCPP_HAS_NO_THREADS +#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include +#endif + #endif // _LIBCPP_CONDITION_VARIABLE diff --git a/libcxx/include/deque b/libcxx/include/deque index b6e092b396370..989b64d7a109e 100644 --- a/libcxx/include/deque +++ b/libcxx/include/deque @@ -176,6 +176,7 @@ template #include <__iterator/next.h> #include <__iterator/prev.h> #include <__iterator/reverse_iterator.h> +#include <__memory/allocator_destructor.h> #include <__memory/pointer_traits.h> #include <__memory/temp_value.h> #include <__memory/unique_ptr.h> @@ -2943,8 +2944,12 @@ _LIBCPP_POP_MACROS #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include +# include +# include # include +# include # include +# include #endif #endif // _LIBCPP_DEQUE diff --git a/libcxx/include/ext/hash_map b/libcxx/include/ext/hash_map index 48186aed0b6a6..3ad4b166865af 100644 --- a/libcxx/include/ext/hash_map +++ b/libcxx/include/ext/hash_map @@ -983,6 +983,7 @@ operator!=(const hash_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>& __x, } // namespace __gnu_cxx #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include # include #endif diff --git a/libcxx/include/ext/hash_set b/libcxx/include/ext/hash_set index 68b449d6b867a..dc8b786453842 100644 --- a/libcxx/include/ext/hash_set +++ b/libcxx/include/ext/hash_set @@ -663,6 +663,7 @@ operator!=(const hash_multiset<_Value, _Hash, _Pred, _Alloc>& __x, } // namespace __gnu_cxx #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include # include #endif diff --git a/libcxx/include/filesystem b/libcxx/include/filesystem index 1e7efd248aa18..98d4bb9261bcc 100644 --- a/libcxx/include/filesystem +++ b/libcxx/include/filesystem @@ -461,4 +461,8 @@ inline constexpr bool std::ranges::enable_view +#endif + #endif // _LIBCPP_FILESYSTEM diff --git a/libcxx/include/forward_list b/libcxx/include/forward_list index 1f817ae8927fd..51643e9e2828c 100644 --- a/libcxx/include/forward_list +++ b/libcxx/include/forward_list @@ -190,10 +190,10 @@ template #include <__iterator/next.h> #include <__memory/addressof.h> #include <__memory/allocator.h> +#include <__memory/allocator_destructor.h> #include <__memory/allocator_traits.h> #include <__memory/compressed_pair.h> #include <__memory/pointer_traits.h> -#include <__memory/shared_ptr.h> #include <__memory/swap_allocator.h> #include <__memory/unique_ptr.h> #include <__memory_resource/polymorphic_allocator.h> @@ -1792,8 +1792,12 @@ _LIBCPP_POP_MACROS #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include +# include +# include # include +# include # include +# include #endif #endif // _LIBCPP_FORWARD_LIST diff --git a/libcxx/include/fstream b/libcxx/include/fstream index 95f345fae4d78..6b6e5b603c2e7 100644 --- a/libcxx/include/fstream +++ b/libcxx/include/fstream @@ -1744,4 +1744,13 @@ _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS +#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include +# include +# include +# include +# include +# include +#endif + #endif // _LIBCPP_FSTREAM diff --git a/libcxx/include/functional b/libcxx/include/functional index 4c4e02dd702bc..8589d3a9d6a9c 100644 --- a/libcxx/include/functional +++ b/libcxx/include/functional @@ -531,7 +531,6 @@ POLICY: For non-variadic implementations, the number of arguments is limited #include <__functional/unary_negate.h> #include <__functional/unwrap_ref.h> #include <__utility/forward.h> -#include #include #include // TODO: find out why removing this breaks the modules build #include @@ -543,6 +542,7 @@ POLICY: For non-variadic implementations, the number of arguments is limited #endif #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include # include # include #endif diff --git a/libcxx/include/future b/libcxx/include/future index 5371de4fc43ae..e317e8d636ef7 100644 --- a/libcxx/include/future +++ b/libcxx/include/future @@ -367,6 +367,7 @@ template struct uses_allocator, Alloc>; #include <__chrono/time_point.h> #include <__config> #include <__memory/allocator_arg_t.h> +#include <__memory/allocator_destructor.h> #include <__memory/uses_allocator.h> #include <__utility/auto_cast.h> #include <__utility/forward.h> diff --git a/libcxx/include/ios b/libcxx/include/ios index 6e8360f4ab565..e67b7d2b779a3 100644 --- a/libcxx/include/ios +++ b/libcxx/include/ios @@ -1038,4 +1038,17 @@ defaultfloat(ios_base& __str) _LIBCPP_END_NAMESPACE_STD +#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +#endif + #endif // _LIBCPP_IOS diff --git a/libcxx/include/istream b/libcxx/include/istream index 403b29c29e9e3..1c9adcc0c6297 100644 --- a/libcxx/include/istream +++ b/libcxx/include/istream @@ -1637,6 +1637,10 @@ extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS basic_iostream; _LIBCPP_END_NAMESPACE_STD +#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include +#endif + _LIBCPP_POP_MACROS #endif // _LIBCPP_ISTREAM diff --git a/libcxx/include/list b/libcxx/include/list index d3c47bb1d6155..4d68c5e72668b 100644 --- a/libcxx/include/list +++ b/libcxx/include/list @@ -196,10 +196,10 @@ template #include <__iterator/reverse_iterator.h> #include <__memory/addressof.h> #include <__memory/allocator.h> +#include <__memory/allocator_destructor.h> #include <__memory/allocator_traits.h> #include <__memory/compressed_pair.h> #include <__memory/pointer_traits.h> -#include <__memory/shared_ptr.h> #include <__memory/swap_allocator.h> #include <__memory/unique_ptr.h> #include <__memory_resource/polymorphic_allocator.h> @@ -2375,8 +2375,12 @@ _LIBCPP_POP_MACROS #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include +# include +# include # include +# include # include +# include #endif #endif // _LIBCPP_LIST diff --git a/libcxx/include/locale b/libcxx/include/locale index c9ec7c36f5820..8a330ae374d0f 100644 --- a/libcxx/include/locale +++ b/libcxx/include/locale @@ -4353,8 +4353,12 @@ _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include +# include # include # include +# include +# include #endif #endif // _LIBCPP_LOCALE diff --git a/libcxx/include/map b/libcxx/include/map index 2d55b69dc9267..9cf47b245a02a 100644 --- a/libcxx/include/map +++ b/libcxx/include/map @@ -2352,6 +2352,7 @@ _LIBCPP_END_NAMESPACE_STD #endif #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include # include # include # include diff --git a/libcxx/include/memory b/libcxx/include/memory index 8694cf6994a7c..48e808ef54cb1 100644 --- a/libcxx/include/memory +++ b/libcxx/include/memory @@ -900,6 +900,7 @@ template #endif #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include # include # include # include diff --git a/libcxx/include/module.modulemap.in b/libcxx/include/module.modulemap.in index 79c7c3d354c8d..120cf1916e8cd 100644 --- a/libcxx/include/module.modulemap.in +++ b/libcxx/include/module.modulemap.in @@ -1031,6 +1031,7 @@ module std [system] { module allocation_guard { private header "__memory/allocation_guard.h" } module allocator { private header "__memory/allocator.h" } module allocator_arg_t { private header "__memory/allocator_arg_t.h" } + module allocator_destructor { private header "__memory/allocator_destructor.h" } module allocator_traits { private header "__memory/allocator_traits.h" } module assume_aligned { private header "__memory/assume_aligned.h" } module auto_ptr { private header "__memory/auto_ptr.h" } diff --git a/libcxx/include/mutex b/libcxx/include/mutex index d0b53ba75d834..d11ffb20eff93 100644 --- a/libcxx/include/mutex +++ b/libcxx/include/mutex @@ -704,6 +704,7 @@ _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include # include #endif diff --git a/libcxx/include/numbers b/libcxx/include/numbers index 3c8527dfc2de1..1d9b6b0e5fb35 100644 --- a/libcxx/include/numbers +++ b/libcxx/include/numbers @@ -59,8 +59,8 @@ namespace std::numbers { */ #include <__assert> // all public C++ headers provide the assertion handler +#include <__concepts/arithmetic.h> #include <__config> -#include #include #include @@ -131,4 +131,8 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_STD_VER > 17 +#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include +#endif + #endif // _LIBCPP_NUMBERS diff --git a/libcxx/include/numeric b/libcxx/include/numeric index 64cd45b430828..2fb6f9ed01929 100644 --- a/libcxx/include/numeric +++ b/libcxx/include/numeric @@ -172,6 +172,7 @@ template #endif #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include # include # include #endif diff --git a/libcxx/include/ostream b/libcxx/include/ostream index 77ec87b35e4fe..1d943d8a8abd2 100644 --- a/libcxx/include/ostream +++ b/libcxx/include/ostream @@ -165,6 +165,7 @@ basic_ostream& operator<<(basic_ostream&, cons #include <__assert> // all public C++ headers provide the assertion handler #include <__config> +#include <__memory/shared_ptr.h> #include <__memory/unique_ptr.h> #include #include @@ -1188,6 +1189,7 @@ extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS basic_ostream; _LIBCPP_END_NAMESPACE_STD #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include # include #endif diff --git a/libcxx/include/queue b/libcxx/include/queue index d23f2385eda34..c58da5ec6ee3c 100644 --- a/libcxx/include/queue +++ b/libcxx/include/queue @@ -959,6 +959,7 @@ struct _LIBCPP_TEMPLATE_VIS uses_allocator # include #endif diff --git a/libcxx/include/random b/libcxx/include/random index ecab84a573550..f8077f10c17f3 100644 --- a/libcxx/include/random +++ b/libcxx/include/random @@ -1730,6 +1730,7 @@ class piecewise_linear_distribution #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include # include +# include # include # include # include diff --git a/libcxx/include/regex b/libcxx/include/regex index 91a10feef6b5d..3c3a2e4a79486 100644 --- a/libcxx/include/regex +++ b/libcxx/include/regex @@ -773,6 +773,7 @@ typedef regex_token_iterator wsregex_token_iterator; #include <__utility/move.h> #include <__utility/pair.h> #include <__utility/swap.h> +#include #include #include #include @@ -6858,7 +6859,12 @@ _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include +# include +# include # include +# include +# include # include #endif diff --git a/libcxx/include/set b/libcxx/include/set index 3933ef22c221e..1154c4e9509c7 100644 --- a/libcxx/include/set +++ b/libcxx/include/set @@ -1597,6 +1597,7 @@ _LIBCPP_END_NAMESPACE_STD #endif #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include # include # include #endif diff --git a/libcxx/include/span b/libcxx/include/span index 337a67434ec4c..8afd1942c1f31 100644 --- a/libcxx/include/span +++ b/libcxx/include/span @@ -590,6 +590,7 @@ _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include # include # include #endif diff --git a/libcxx/include/stack b/libcxx/include/stack index 7b08d57da152b..2abbcd025c4aa 100644 --- a/libcxx/include/stack +++ b/libcxx/include/stack @@ -362,6 +362,7 @@ struct _LIBCPP_TEMPLATE_VIS uses_allocator, _Alloc> _LIBCPP_END_NAMESPACE_STD #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include # include #endif diff --git a/libcxx/include/string b/libcxx/include/string index 8eb1d30970421..6aee5a489a754 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -109,6 +109,10 @@ public: const allocator_type& a = allocator_type()); // constexpr since C++20 basic_string(const basic_string& str, size_type pos, size_type n, const Allocator& a = Allocator()); // constexpr since C++20 + constexpr basic_string( + basic_string&& str, size_type pos, const Allocator& a = Allocator()); // since C++23 + constexpr basic_string( + basic_string&& str, size_type pos, size_type n, const Allocator& a = Allocator()); // since C++23 template basic_string(const T& t, size_type pos, size_type n, const Allocator& a = Allocator()); // C++17, constexpr since C++20 template @@ -261,8 +265,9 @@ public: basic_string& replace(const_iterator i1, const_iterator i2, initializer_list); // constexpr since C++20 size_type copy(value_type* s, size_type n, size_type pos = 0) const; // constexpr since C++20 - basic_string substr(size_type pos = 0, size_type n = npos) const; // constexpr since C++20 - + basic_string substr(size_type pos = 0, size_type n = npos) const; // constexpr in C++20, removed in C++23 + basic_string substr(size_type pos = 0, size_type n = npos) const&; // since C++23 + constexpr basic_string substr(size_type pos = 0, size_type n = npos) &&; // since C++23 void swap(basic_string& str) noexcept(allocator_traits::propagate_on_container_swap::value || allocator_traits::is_always_equal::value); // C++17, constexpr since C++20 @@ -897,6 +902,36 @@ public: std::__debug_db_insert_c(this); } +#if _LIBCPP_STD_VER > 20 + _LIBCPP_HIDE_FROM_ABI constexpr + basic_string(basic_string&& __str, size_type __pos, const _Allocator& __alloc = _Allocator()) + : basic_string(std::move(__str), __pos, npos, __alloc) {} + + _LIBCPP_HIDE_FROM_ABI constexpr + basic_string(basic_string&& __str, size_type __pos, size_type __n, const _Allocator& __alloc = _Allocator()) + : __r_(__default_init_tag(), __alloc) { + if (__pos > __str.size()) + __throw_out_of_range(); + + auto __len = std::min(__n, __str.size() - __pos); + if (__alloc_traits::is_always_equal::value || __alloc == __str.__alloc()) { + __r_.first() = __str.__r_.first(); + __str.__default_init(); + + _Traits::move(data(), data() + __pos, __len); + __set_size(__len); + _Traits::assign(data()[__len], value_type()); + } else { + // Perform a copy because the allocators are not compatible. + __init(__str.data() + __pos, __len); + } + + std::__debug_db_insert_c(this); + if (__is_long()) + std::__debug_db_swap(this, &__str); + } +#endif + template ::value, nullptr_t> > _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string(size_type __n, _CharT __c, const _Allocator& __a); @@ -975,9 +1010,12 @@ public: } #ifndef _LIBCPP_CXX03_LANG - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 - basic_string& operator=(basic_string&& __str) - _NOEXCEPT_((__noexcept_move_assign_container<_Allocator, __alloc_traits>::value)); + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& operator=(basic_string&& __str) + _NOEXCEPT_((__noexcept_move_assign_container<_Allocator, __alloc_traits>::value)) { + __move_assign(__str, integral_constant()); + return *this; + } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& operator=(initializer_list __il) {return assign(__il.begin(), __il.size());} #endif @@ -1030,7 +1068,17 @@ public: _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type size() const _NOEXCEPT {return __is_long() ? __get_long_size() : __get_short_size();} _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type length() const _NOEXCEPT {return size();} - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type max_size() const _NOEXCEPT; + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type max_size() const _NOEXCEPT { + size_type __m = __alloc_traits::max_size(__alloc()); + if (__m <= std::numeric_limits::max() / 2) { + return __m - __alignment; + } else { + bool __uses_lsb = __endian_factor == 2; + return __uses_lsb ? __m - __alignment : (__m / 2) - __alignment; + } + } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type capacity() const _NOEXCEPT { return (__is_long() ? __get_long_cap() : static_cast(__min_cap)) - 1; } @@ -1058,9 +1106,15 @@ public: _LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool empty() const _NOEXCEPT {return size() == 0;} - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 - const_reference operator[](size_type __pos) const _NOEXCEPT; - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference operator[](size_type __pos) _NOEXCEPT; + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reference operator[](size_type __pos) const _NOEXCEPT { + _LIBCPP_ASSERT(__pos <= size(), "string index out of bounds"); + return *(data() + __pos); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference operator[](size_type __pos) _NOEXCEPT { + _LIBCPP_ASSERT(__pos <= size(), "string index out of bounds"); + return *(__get_pointer() + __pos); + } _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reference at(size_type __n) const; _LIBCPP_CONSTEXPR_SINCE_CXX20 reference at(size_type __n); @@ -1095,8 +1149,9 @@ public: basic_string& operator+=(initializer_list __il) { return append(__il); } #endif // _LIBCPP_CXX03_LANG - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 - basic_string& append(const basic_string& __str); + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& append(const basic_string& __str) { + return append(__str.data(), __str.size()); + } template _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS _LIBCPP_CONSTEXPR_SINCE_CXX20 @@ -1154,10 +1209,26 @@ public: _LIBCPP_CONSTEXPR_SINCE_CXX20 void push_back(value_type __c); _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void pop_back(); - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference front() _NOEXCEPT; - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reference front() const _NOEXCEPT; - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference back() _NOEXCEPT; - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reference back() const _NOEXCEPT; + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference front() _NOEXCEPT { + _LIBCPP_ASSERT(!empty(), "string::front(): string is empty"); + return *__get_pointer(); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reference front() const _NOEXCEPT { + _LIBCPP_ASSERT(!empty(), "string::front(): string is empty"); + return *data(); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 reference back() _NOEXCEPT { + _LIBCPP_ASSERT(!empty(), "string::back(): string is empty"); + return *(__get_pointer() + size() - 1); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const_reference back() const _NOEXCEPT { + _LIBCPP_ASSERT(!empty(), "string::back(): string is empty"); + return *(data() + size() - 1); + } template _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS _LIBCPP_CONSTEXPR_SINCE_CXX20 @@ -1209,8 +1280,10 @@ public: basic_string& assign(initializer_list __il) {return assign(__il.begin(), __il.size());} #endif // _LIBCPP_CXX03_LANG - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 - basic_string& insert(size_type __pos1, const basic_string& __str); + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& + insert(size_type __pos1, const basic_string& __str) { + return insert(__pos1, __str.data(), __str.size()); + } template _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS _LIBCPP_CONSTEXPR_SINCE_CXX20 @@ -1236,8 +1309,16 @@ public: _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& insert(size_type __pos, const value_type* __s); _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& insert(size_type __pos, size_type __n, value_type __c); _LIBCPP_CONSTEXPR_SINCE_CXX20 iterator insert(const_iterator __pos, value_type __c); - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 - iterator insert(const_iterator __pos, size_type __n, value_type __c); + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 iterator + insert(const_iterator __pos, size_type __n, value_type __c) { + _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__pos) == this, + "string::insert(iterator, n, value) called with an iterator not referring to this string"); + difference_type __p = __pos - begin(); + insert(static_cast(__p), __n, __c); + return begin() + __p; + } + template _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS _LIBCPP_CONSTEXPR_SINCE_CXX20 __enable_if_t @@ -1266,8 +1347,10 @@ public: _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 iterator erase(const_iterator __first, const_iterator __last); - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 - basic_string& replace(size_type __pos1, size_type __n1, const basic_string& __str); + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& + replace(size_type __pos1, size_type __n1, const basic_string& __str) { + return replace(__pos1, __n1, __str.data(), __str.size()); + } template _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS _LIBCPP_CONSTEXPR_SINCE_CXX20 @@ -1291,8 +1374,12 @@ public: basic_string& replace(size_type __pos, size_type __n1, const value_type* __s, size_type __n2); _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& replace(size_type __pos, size_type __n1, const value_type* __s); _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& replace(size_type __pos, size_type __n1, size_type __n2, value_type __c); - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 - basic_string& replace(const_iterator __i1, const_iterator __i2, const basic_string& __str); + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& + replace(const_iterator __i1, const_iterator __i2, const basic_string& __str) { + return replace( + static_cast(__i1 - begin()), static_cast(__i2 - __i1), __str.data(), __str.size()); + } template _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS _LIBCPP_CONSTEXPR_SINCE_CXX20 @@ -1303,12 +1390,21 @@ public: > replace(const_iterator __i1, const_iterator __i2, const _Tp& __t) { __self_view __sv = __t; return replace(__i1 - begin(), __i2 - __i1, __sv); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 - basic_string& replace(const_iterator __i1, const_iterator __i2, const value_type* __s, size_type __n); - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 - basic_string& replace(const_iterator __i1, const_iterator __i2, const value_type* __s); - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 - basic_string& replace(const_iterator __i1, const_iterator __i2, size_type __n, value_type __c); + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& + replace(const_iterator __i1, const_iterator __i2, const value_type* __s, size_type __n) { + return replace(static_cast(__i1 - begin()), static_cast(__i2 - __i1), __s, __n); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& + replace(const_iterator __i1, const_iterator __i2, const value_type* __s) { + return replace(static_cast(__i1 - begin()), static_cast(__i2 - __i1), __s); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& + replace(const_iterator __i1, const_iterator __i2, size_type __n, value_type __c) { + return replace(static_cast(__i1 - begin()), static_cast(__i2 - __i1), __n, __c); + } + template _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS _LIBCPP_CONSTEXPR_SINCE_CXX20 __enable_if_t @@ -1324,8 +1420,24 @@ public: #endif // _LIBCPP_CXX03_LANG _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type copy(value_type* __s, size_type __n, size_type __pos = 0) const; + + // TODO: Maybe don't pass in the allocator. See https://llvm.org/PR57190 +#if _LIBCPP_STD_VER <= 20 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 - basic_string substr(size_type __pos = 0, size_type __n = npos) const; + basic_string substr(size_type __pos = 0, size_type __n = npos) const { + return basic_string(*this, __pos, __n, __alloc()); + } +#else + _LIBCPP_HIDE_FROM_ABI constexpr + basic_string substr(size_type __pos = 0, size_type __n = npos) const& { + return basic_string(*this, __pos, __n, __alloc()); + } + + _LIBCPP_HIDE_FROM_ABI constexpr + basic_string substr(size_type __pos = 0, size_type __n = npos) && { + return basic_string(std::move(*this), __pos, __n, __alloc()); + } +#endif _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void swap(basic_string& __str) @@ -1749,8 +1861,9 @@ private: template _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string& __assign_no_alias(const value_type* __s, size_type __n); - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 - void __erase_to_end(size_type __pos); + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __erase_to_end(size_type __pos) { + __null_terminate_at(std::__to_address(__get_pointer()), __pos); + } // __erase_external_with_move is invoked for erase() invocations where // `n ~= npos`, likely requiring memory moves on the string data. @@ -2460,17 +2573,6 @@ basic_string<_CharT, _Traits, _Allocator>::__move_assign(basic_string& __str, tr } } -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX20 -basic_string<_CharT, _Traits, _Allocator>& -basic_string<_CharT, _Traits, _Allocator>::operator=(basic_string&& __str) - _NOEXCEPT_((__noexcept_move_assign_container<_Allocator, __alloc_traits>::value)) -{ - __move_assign(__str, integral_constant()); - return *this; -} - #endif template @@ -2711,14 +2813,6 @@ basic_string<_CharT, _Traits, _Allocator>::append( return *this; } -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX20 -basic_string<_CharT, _Traits, _Allocator>& -basic_string<_CharT, _Traits, _Allocator>::append(const basic_string& __str) -{ - return append(__str.data(), __str.size()); -} - template _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string<_CharT, _Traits, _Allocator>& @@ -2876,14 +2970,6 @@ basic_string<_CharT, _Traits, _Allocator>::insert(const_iterator __pos, _Forward } } -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX20 -basic_string<_CharT, _Traits, _Allocator>& -basic_string<_CharT, _Traits, _Allocator>::insert(size_type __pos1, const basic_string& __str) -{ - return insert(__pos1, __str.data(), __str.size()); -} - template _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string<_CharT, _Traits, _Allocator>& @@ -2954,19 +3040,6 @@ basic_string<_CharT, _Traits, _Allocator>::insert(const_iterator __pos, value_ty return begin() + static_cast(__ip); } -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX20 -typename basic_string<_CharT, _Traits, _Allocator>::iterator -basic_string<_CharT, _Traits, _Allocator>::insert(const_iterator __pos, size_type __n, value_type __c) -{ - _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__pos) == this, - "string::insert(iterator, n, value) called with an iterator not" - " referring to this string"); - difference_type __p = __pos - begin(); - insert(static_cast(__p), __n, __c); - return begin() + __p; -} - // replace template @@ -3068,14 +3141,6 @@ basic_string<_CharT, _Traits, _Allocator>::replace(const_iterator __i1, const_it return replace(__i1, __i2, __temp); } -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX20 -basic_string<_CharT, _Traits, _Allocator>& -basic_string<_CharT, _Traits, _Allocator>::replace(size_type __pos1, size_type __n1, const basic_string& __str) -{ - return replace(__pos1, __n1, __str.data(), __str.size()); -} - template _LIBCPP_CONSTEXPR_SINCE_CXX20 basic_string<_CharT, _Traits, _Allocator>& @@ -3115,39 +3180,6 @@ basic_string<_CharT, _Traits, _Allocator>::replace(size_type __pos, size_type __ return replace(__pos, __n1, __s, traits_type::length(__s)); } -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX20 -basic_string<_CharT, _Traits, _Allocator>& -basic_string<_CharT, _Traits, _Allocator>::replace(const_iterator __i1, const_iterator __i2, const basic_string& __str) -{ - return replace(static_cast(__i1 - begin()), static_cast(__i2 - __i1), - __str.data(), __str.size()); -} - -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX20 -basic_string<_CharT, _Traits, _Allocator>& -basic_string<_CharT, _Traits, _Allocator>::replace(const_iterator __i1, const_iterator __i2, const value_type* __s, size_type __n) -{ - return replace(static_cast(__i1 - begin()), static_cast(__i2 - __i1), __s, __n); -} - -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX20 -basic_string<_CharT, _Traits, _Allocator>& -basic_string<_CharT, _Traits, _Allocator>::replace(const_iterator __i1, const_iterator __i2, const value_type* __s) -{ - return replace(static_cast(__i1 - begin()), static_cast(__i2 - __i1), __s); -} - -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX20 -basic_string<_CharT, _Traits, _Allocator>& -basic_string<_CharT, _Traits, _Allocator>::replace(const_iterator __i1, const_iterator __i2, size_type __n, value_type __c) -{ - return replace(static_cast(__i1 - begin()), static_cast(__i2 - __i1), __n, __c); -} - // erase // 'externally instantiated' erase() implementation, called when __n != npos. @@ -3244,14 +3276,6 @@ basic_string<_CharT, _Traits, _Allocator>::clear() _NOEXCEPT } } -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX20 -void -basic_string<_CharT, _Traits, _Allocator>::__erase_to_end(size_type __pos) -{ - __null_terminate_at(std::__to_address(__get_pointer()), __pos); -} - template _LIBCPP_CONSTEXPR_SINCE_CXX20 void @@ -3275,20 +3299,6 @@ basic_string<_CharT, _Traits, _Allocator>::__resize_default_init(size_type __n) __erase_to_end(__n); } -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX20 -typename basic_string<_CharT, _Traits, _Allocator>::size_type -basic_string<_CharT, _Traits, _Allocator>::max_size() const _NOEXCEPT -{ - size_type __m = __alloc_traits::max_size(__alloc()); - if (__m <= std::numeric_limits::max() / 2) { - return __m - __alignment; - } else { - bool __uses_lsb = __endian_factor == 2; - return __uses_lsb ? __m - __alignment : (__m / 2) - __alignment; - } -} - template _LIBCPP_CONSTEXPR_SINCE_CXX20 void @@ -3385,24 +3395,6 @@ basic_string<_CharT, _Traits, _Allocator>::__shrink_or_extend(size_type __target std::__debug_db_invalidate_all(this); } -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX20 -typename basic_string<_CharT, _Traits, _Allocator>::const_reference -basic_string<_CharT, _Traits, _Allocator>::operator[](size_type __pos) const _NOEXCEPT -{ - _LIBCPP_ASSERT(__pos <= size(), "string index out of bounds"); - return *(data() + __pos); -} - -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX20 -typename basic_string<_CharT, _Traits, _Allocator>::reference -basic_string<_CharT, _Traits, _Allocator>::operator[](size_type __pos) _NOEXCEPT -{ - _LIBCPP_ASSERT(__pos <= size(), "string index out of bounds"); - return *(__get_pointer() + __pos); -} - template _LIBCPP_CONSTEXPR_SINCE_CXX20 typename basic_string<_CharT, _Traits, _Allocator>::const_reference @@ -3423,42 +3415,6 @@ basic_string<_CharT, _Traits, _Allocator>::at(size_type __n) return (*this)[__n]; } -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX20 -typename basic_string<_CharT, _Traits, _Allocator>::reference -basic_string<_CharT, _Traits, _Allocator>::front() _NOEXCEPT -{ - _LIBCPP_ASSERT(!empty(), "string::front(): string is empty"); - return *__get_pointer(); -} - -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX20 -typename basic_string<_CharT, _Traits, _Allocator>::const_reference -basic_string<_CharT, _Traits, _Allocator>::front() const _NOEXCEPT -{ - _LIBCPP_ASSERT(!empty(), "string::front(): string is empty"); - return *data(); -} - -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX20 -typename basic_string<_CharT, _Traits, _Allocator>::reference -basic_string<_CharT, _Traits, _Allocator>::back() _NOEXCEPT -{ - _LIBCPP_ASSERT(!empty(), "string::back(): string is empty"); - return *(__get_pointer() + size() - 1); -} - -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX20 -typename basic_string<_CharT, _Traits, _Allocator>::const_reference -basic_string<_CharT, _Traits, _Allocator>::back() const _NOEXCEPT -{ - _LIBCPP_ASSERT(!empty(), "string::back(): string is empty"); - return *(data() + size() - 1); -} - template _LIBCPP_CONSTEXPR_SINCE_CXX20 typename basic_string<_CharT, _Traits, _Allocator>::size_type @@ -3472,14 +3428,6 @@ basic_string<_CharT, _Traits, _Allocator>::copy(value_type* __s, size_type __n, return __rlen; } -template -inline _LIBCPP_CONSTEXPR_SINCE_CXX20 -basic_string<_CharT, _Traits, _Allocator> -basic_string<_CharT, _Traits, _Allocator>::substr(size_type __pos, size_type __n) const -{ - return basic_string(*this, __pos, __n, __alloc()); -} - template inline _LIBCPP_CONSTEXPR_SINCE_CXX20 void @@ -4665,6 +4613,7 @@ _LIBCPP_POP_MACROS #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include +# include # include # include # include diff --git a/libcxx/include/string_view b/libcxx/include/string_view index 2c4f306458c48..8f39cdcc23f62 100644 --- a/libcxx/include/string_view +++ b/libcxx/include/string_view @@ -1023,6 +1023,7 @@ _LIBCPP_POP_MACROS #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include +# include # include # include #endif diff --git a/libcxx/include/unordered_map b/libcxx/include/unordered_map index a727c36223e5e..cd0aea1205d52 100644 --- a/libcxx/include/unordered_map +++ b/libcxx/include/unordered_map @@ -2646,6 +2646,7 @@ _LIBCPP_END_NAMESPACE_STD #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include # include +# include # include #endif diff --git a/libcxx/include/unordered_set b/libcxx/include/unordered_set index 4786a8a8c5f18..9a25510139428 100644 --- a/libcxx/include/unordered_set +++ b/libcxx/include/unordered_set @@ -1816,6 +1816,7 @@ _LIBCPP_END_NAMESPACE_STD #endif #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include # include # include #endif diff --git a/libcxx/include/valarray b/libcxx/include/valarray index f28e471dfb3f9..6c33d0531cb88 100644 --- a/libcxx/include/valarray +++ b/libcxx/include/valarray @@ -4931,6 +4931,7 @@ _LIBCPP_POP_MACROS #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include +# include # include #endif diff --git a/libcxx/include/vector b/libcxx/include/vector index b5a97c66cf53a..d433f0c8c1969 100644 --- a/libcxx/include/vector +++ b/libcxx/include/vector @@ -3281,6 +3281,8 @@ _LIBCPP_POP_MACROS #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include +# include +# include # include # include #endif diff --git a/libcxx/test/libcxx/diagnostics/nodiscard_extensions.pass.cpp b/libcxx/test/libcxx/diagnostics/nodiscard_extensions.compile.pass.cpp similarity index 97% rename from libcxx/test/libcxx/diagnostics/nodiscard_extensions.pass.cpp rename to libcxx/test/libcxx/diagnostics/nodiscard_extensions.compile.pass.cpp index a923301fe8b01..e0d457bd844b8 100644 --- a/libcxx/test/libcxx/diagnostics/nodiscard_extensions.pass.cpp +++ b/libcxx/test/libcxx/diagnostics/nodiscard_extensions.compile.pass.cpp @@ -7,10 +7,7 @@ //===----------------------------------------------------------------------===// // Test that entities declared [[nodiscard]] as at extension by libc++, are -// declared as such when _LIBCPP_DISABLE_NODISCARD_EXT is specified. - -// This test intentionally leaks memory, so it is unsupported under ASAN. -// UNSUPPORTED: asan +// not declared as such when _LIBCPP_DISABLE_NODISCARD_EXT is specified. // All entities to which libc++ applies [[nodiscard]] as an extension should // be tested here and in nodiscard_extensions.verify.cpp. They should also @@ -71,7 +68,7 @@ void test_algorithms() { std::find_if_not(std::begin(arr), std::end(arr), P()); std::find_if(std::begin(arr), std::end(arr), P()); std::find(std::begin(arr), std::end(arr), 1); - std::get_temporary_buffer(1); // intentional memory leak. + std::get_temporary_buffer(1); std::includes(std::begin(arr), std::end(arr), std::begin(arr), std::end(arr)); std::includes(std::begin(arr), std::end(arr), std::begin(arr), std::end(arr), std::greater()); diff --git a/libcxx/test/libcxx/diagnostics/ranges.nodiscard_extensions.compile.pass.cpp b/libcxx/test/libcxx/diagnostics/ranges.nodiscard_extensions.compile.pass.cpp new file mode 100644 index 0000000000000..1577601908c99 --- /dev/null +++ b/libcxx/test/libcxx/diagnostics/ranges.nodiscard_extensions.compile.pass.cpp @@ -0,0 +1,93 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Check that ranges algorithms aren't marked [[nodiscard]] when +// _LIBCPP_DISBALE_NODISCARD_EXT is defined + +// UNSUPPORTED: c++03, c++11, c++14 ,c++17 + +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_NODISCARD_EXT + +#include + +void test() { + int range[1]; + int* iter = range; + auto pred = [](auto...) { return true; }; + std::ranges::adjacent_find(range); + std::ranges::adjacent_find(iter, iter); + std::ranges::all_of(range, pred); + std::ranges::all_of(iter, iter, pred); + std::ranges::any_of(range, pred); + std::ranges::any_of(iter, iter, pred); + std::ranges::binary_search(range, 1); + std::ranges::binary_search(iter, iter, 1); + std::ranges::clamp(1, 2, 3); + std::ranges::count_if(range, pred); + std::ranges::count_if(iter, iter, pred); + std::ranges::count(range, 1); + std::ranges::count(iter, iter, 1); + std::ranges::equal_range(range, 1); + std::ranges::equal_range(iter, iter, 1); + std::ranges::equal(range, range); + std::ranges::equal(iter, iter, iter, iter); + std::ranges::find_end(range, range); + std::ranges::find_end(iter, iter, iter, iter); + std::ranges::find_first_of(range, range); + std::ranges::find_first_of(iter, iter, iter, iter); + std::ranges::find_if_not(range, pred); + std::ranges::find_if_not(iter, iter, pred); + std::ranges::find_if(range, pred); + std::ranges::find_if(iter, iter, pred); + std::ranges::find(range, 1); + std::ranges::find(iter, iter, 1); + std::ranges::includes(range, range); + std::ranges::includes(iter, iter, iter, iter); + std::ranges::is_heap_until(range); + std::ranges::is_heap_until(iter, iter); + std::ranges::is_heap(range); + std::ranges::is_heap(iter, iter); + std::ranges::is_partitioned(range, pred); + std::ranges::is_partitioned(iter, iter, pred); + std::ranges::is_permutation(range, range); + std::ranges::is_permutation(iter, iter, iter, iter); + std::ranges::is_sorted_until(range); + std::ranges::is_sorted_until(iter, iter); + std::ranges::is_sorted(range); + std::ranges::is_sorted(iter, iter); + std::ranges::lexicographical_compare(range, range); + std::ranges::lexicographical_compare(iter, iter, iter, iter); + std::ranges::lower_bound(range, 1); + std::ranges::lower_bound(iter, iter, 1); + std::ranges::max_element(range); + std::ranges::max_element(iter, iter); + std::ranges::max(1, 2); + std::ranges::max({1, 2, 3}); + std::ranges::max(range); + std::ranges::minmax_element(range); + std::ranges::minmax_element(iter, iter); + std::ranges::minmax(1, 2); + std::ranges::minmax({1, 2, 3}); + std::ranges::minmax(range); + std::ranges::mismatch(range, range); + std::ranges::mismatch(iter, iter, iter, iter); + std::ranges::none_of(range, pred); + std::ranges::none_of(iter, iter, pred); + std::ranges::remove_if(range, pred); + std::ranges::remove_if(iter, iter, pred); + std::ranges::remove(range, 1); + std::ranges::remove(iter, iter, 1); + std::ranges::search_n(range, 1, 1); + std::ranges::search_n(iter, iter, 1, 1); + std::ranges::search(range, range); + std::ranges::search(iter, iter, iter, iter); + std::ranges::unique(range); + std::ranges::unique(iter, iter); + std::ranges::upper_bound(range, 1); + std::ranges::upper_bound(iter, iter, 1); +} diff --git a/libcxx/test/libcxx/diagnostics/ranges.nodiscard_extensions.verify.cpp b/libcxx/test/libcxx/diagnostics/ranges.nodiscard_extensions.verify.cpp new file mode 100644 index 0000000000000..77ac5f3f77903 --- /dev/null +++ b/libcxx/test/libcxx/diagnostics/ranges.nodiscard_extensions.verify.cpp @@ -0,0 +1,90 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Check that ranges algorithms are marked [[nodiscard]] as a conforming extension + +// UNSUPPORTED: c++03, c++11, c++14 ,c++17 + +#include + +void test() { + int range[1]; + int* iter = range; + auto pred = [](auto...) { return true; }; + std::ranges::adjacent_find(range); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::adjacent_find(iter, iter); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::all_of(range, pred); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::all_of(iter, iter, pred); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::any_of(range, pred); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::any_of(iter, iter, pred); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::binary_search(range, 1); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::binary_search(iter, iter, 1); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::clamp(1, 2, 3); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::count_if(range, pred); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::count_if(iter, iter, pred); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::count(range, 1); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::count(iter, iter, 1); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::equal_range(range, 1); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::equal_range(iter, iter, 1); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::equal(range, range); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::equal(iter, iter, iter, iter); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::find_end(range, range); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::find_end(iter, iter, iter, iter); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::find_first_of(range, range); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::find_first_of(iter, iter, iter, iter); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::find_if_not(range, pred); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::find_if_not(iter, iter, pred); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::find_if(range, pred); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::find_if(iter, iter, pred); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::find(range, 1); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::find(iter, iter, 1); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::includes(range, range); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::includes(iter, iter, iter, iter); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::is_heap_until(range); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::is_heap_until(iter, iter); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::is_heap(range); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::is_heap(iter, iter); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::is_partitioned(range, pred); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::is_partitioned(iter, iter, pred); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::is_permutation(range, range); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::is_permutation(iter, iter, iter, iter); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::is_sorted_until(range); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::is_sorted_until(iter, iter); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::is_sorted(range); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::is_sorted(iter, iter); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::lexicographical_compare(range, range); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::lexicographical_compare(iter, iter, iter, iter); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::lower_bound(range, 1); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::lower_bound(iter, iter, 1); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::max_element(range); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::max_element(iter, iter); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::max(1, 2); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::max({1, 2, 3}); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::max(range); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::minmax_element(range); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::minmax_element(iter, iter); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::minmax(1, 2); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::minmax({1, 2, 3}); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::minmax(range); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::mismatch(range, range); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::mismatch(iter, iter, iter, iter); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::none_of(range, pred); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::none_of(iter, iter, pred); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::remove_if(range, pred); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::remove_if(iter, iter, pred); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::remove(range, 1); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::remove(iter, iter, 1); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::search_n(range, 1, 1); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::search_n(iter, iter, 1, 1); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::search(range, range); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::search(iter, iter, iter, iter); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::unique(range); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::unique(iter, iter); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::upper_bound(range, 1); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + std::ranges::upper_bound(iter, iter, 1); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} +} diff --git a/libcxx/test/libcxx/private_headers.verify.cpp b/libcxx/test/libcxx/private_headers.verify.cpp index 928ce0cd3600b..9b1120efd5234 100644 --- a/libcxx/test/libcxx/private_headers.verify.cpp +++ b/libcxx/test/libcxx/private_headers.verify.cpp @@ -427,6 +427,7 @@ END-SCRIPT #include <__memory/allocation_guard.h> // expected-error@*:* {{use of private header from outside its module: '__memory/allocation_guard.h'}} #include <__memory/allocator.h> // expected-error@*:* {{use of private header from outside its module: '__memory/allocator.h'}} #include <__memory/allocator_arg_t.h> // expected-error@*:* {{use of private header from outside its module: '__memory/allocator_arg_t.h'}} +#include <__memory/allocator_destructor.h> // expected-error@*:* {{use of private header from outside its module: '__memory/allocator_destructor.h'}} #include <__memory/allocator_traits.h> // expected-error@*:* {{use of private header from outside its module: '__memory/allocator_traits.h'}} #include <__memory/assume_aligned.h> // expected-error@*:* {{use of private header from outside its module: '__memory/assume_aligned.h'}} #include <__memory/auto_ptr.h> // expected-error@*:* {{use of private header from outside its module: '__memory/auto_ptr.h'}} diff --git a/libcxx/test/libcxx/strings/basic.string/string.cons/debug.iterator.substr.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.cons/debug.iterator.substr.pass.cpp new file mode 100644 index 0000000000000..8eeb26233acc7 --- /dev/null +++ b/libcxx/test/libcxx/strings/basic.string/string.cons/debug.iterator.substr.pass.cpp @@ -0,0 +1,49 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// Check that basic_string(basic_string&&, size_type, Allocator) and +// basic_string(basic_string&&, size_type, size_type, Allocator) inserts the container into the debug database + +// REQUIRES: has-unix-headers +// UNSUPPORTED: !libcpp-has-debug-mode, c++03 + +#include +#include + +#include "check_assertion.h" + +int main(int, char**) { + using namespace std::string_literals; + + { + std::string s = {"Banane"s, 1}; + auto i = s.begin(); + assert(i[0] == 'a'); + TEST_LIBCPP_ASSERT_FAILURE(i[5], "Attempted to subscript an iterator outside its valid range"); + } + { + std::string s = {"Banane"s, 0, 5}; + auto i = s.begin(); + assert(i[0] == 'B'); + TEST_LIBCPP_ASSERT_FAILURE(i[5], "Attempted to subscript an iterator outside its valid range"); + } + { + std::string s = {"long long string so no SSO"s, 21}; + auto i = s.begin(); + assert(i[0] == 'o'); + TEST_LIBCPP_ASSERT_FAILURE(i[5], "Attempted to subscript an iterator outside its valid range"); + } + { + std::string s = {"long long string so no SSO"s, 0, 5}; + auto i = s.begin(); + assert(i[0] == 'l'); + TEST_LIBCPP_ASSERT_FAILURE(i[5], "Attempted to subscript an iterator outside its valid range"); + } +} diff --git a/libcxx/test/libcxx/strings/basic.string/string.iterators/debug.iterator.index.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.iterators/debug.iterator.index.pass.cpp index f1b4e5666569b..13a2301937309 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.iterators/debug.iterator.index.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.iterators/debug.iterator.index.pass.cpp @@ -20,6 +20,7 @@ #include "min_allocator.h" int main(int, char**) { + using T = decltype(uint8_t() - uint8_t()); { typedef std::string C; C c(1, '\0'); diff --git a/libcxx/test/libcxx/transitive_includes.sh.cpp b/libcxx/test/libcxx/transitive_includes.sh.cpp index edb696fb8da54..04c5debbea855 100644 --- a/libcxx/test/libcxx/transitive_includes.sh.cpp +++ b/libcxx/test/libcxx/transitive_includes.sh.cpp @@ -52,7 +52,7 @@ import re # the file and run this test. # Note that this needs to be done for all supported language versions of libc++: # for std in c++03 c++11 c++14 c++17 c++20 c++2b; do /bin/llvm-lit --param std=$std ${path_to_this_file}; done -regenerate_expected_results = True +regenerate_expected_results = False # Used because the sequence of tokens RUN : can't appear anywhere or it'll confuse Lit. RUN = "RUN" @@ -560,5 +560,6 @@ END-SCRIPT #if defined(TEST_140) #include #endif -// RUN: %{python} %S/transitive_includes_to_csv.py %t > %S/transitive_includes/%{cxx_std}.csv +// RUN: %{python} %S/transitive_includes_to_csv.py %t > %t/transitive_includes.csv +// RUN: diff -w %S/transitive_includes/%{cxx_std}.csv %t/transitive_includes.csv // GENERATED-MARKER diff --git a/libcxx/test/libcxx/transitive_includes/cxx03.csv b/libcxx/test/libcxx/transitive_includes/cxx03.csv index 1bdb9cb7e7f71..a5f334a04f7a7 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx03.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx03.csv @@ -107,6 +107,8 @@ charconv iosfwd charconv limits charconv type_traits chrono compare +chrono concepts +chrono cstdint chrono ctime chrono limits chrono ratio @@ -334,7 +336,6 @@ filesystem version format array format bit format charconv -format concepts format cstddef format cstdint format cstdlib @@ -610,17 +611,24 @@ optional typeinfo optional utility optional variant optional version +ostream atomic ostream bitset +ostream concepts ostream cstddef ostream cstdint +ostream cstdlib ostream cstring +ostream initializer_list ostream ios +ostream iosfwd ostream iterator ostream limits ostream locale ostream new +ostream stdexcept ostream streambuf ostream type_traits +ostream typeinfo ostream version queue compare queue concepts @@ -649,7 +657,6 @@ random type_traits random vector random version ranges compare -ranges concepts ranges cstddef ranges cstdlib ranges initializer_list diff --git a/libcxx/test/libcxx/transitive_includes/cxx11.csv b/libcxx/test/libcxx/transitive_includes/cxx11.csv index b8b01397da9a0..6ad398a3b092c 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx11.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx11.csv @@ -107,6 +107,8 @@ charconv iosfwd charconv limits charconv type_traits chrono compare +chrono concepts +chrono cstdint chrono ctime chrono limits chrono ratio @@ -334,7 +336,6 @@ filesystem version format array format bit format charconv -format concepts format cstddef format cstdint format cstdlib @@ -611,17 +612,24 @@ optional typeinfo optional utility optional variant optional version +ostream atomic ostream bitset +ostream concepts ostream cstddef ostream cstdint +ostream cstdlib ostream cstring +ostream initializer_list ostream ios +ostream iosfwd ostream iterator ostream limits ostream locale ostream new +ostream stdexcept ostream streambuf ostream type_traits +ostream typeinfo ostream version queue compare queue concepts @@ -650,7 +658,6 @@ random type_traits random vector random version ranges compare -ranges concepts ranges cstddef ranges cstdlib ranges initializer_list diff --git a/libcxx/test/libcxx/transitive_includes/cxx14.csv b/libcxx/test/libcxx/transitive_includes/cxx14.csv index 0a9ece5a121a9..2b13557fd2460 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx14.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx14.csv @@ -107,6 +107,8 @@ charconv iosfwd charconv limits charconv type_traits chrono compare +chrono concepts +chrono cstdint chrono ctime chrono limits chrono ratio @@ -336,7 +338,6 @@ filesystem version format array format bit format charconv -format concepts format cstddef format cstdint format cstdlib @@ -613,17 +614,24 @@ optional typeinfo optional utility optional variant optional version +ostream atomic ostream bitset +ostream concepts ostream cstddef ostream cstdint +ostream cstdlib ostream cstring +ostream initializer_list ostream ios +ostream iosfwd ostream iterator ostream limits ostream locale ostream new +ostream stdexcept ostream streambuf ostream type_traits +ostream typeinfo ostream version queue compare queue concepts @@ -652,7 +660,6 @@ random type_traits random vector random version ranges compare -ranges concepts ranges cstddef ranges cstdlib ranges initializer_list diff --git a/libcxx/test/libcxx/transitive_includes/cxx17.csv b/libcxx/test/libcxx/transitive_includes/cxx17.csv index 0a9ece5a121a9..2b13557fd2460 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx17.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx17.csv @@ -107,6 +107,8 @@ charconv iosfwd charconv limits charconv type_traits chrono compare +chrono concepts +chrono cstdint chrono ctime chrono limits chrono ratio @@ -336,7 +338,6 @@ filesystem version format array format bit format charconv -format concepts format cstddef format cstdint format cstdlib @@ -613,17 +614,24 @@ optional typeinfo optional utility optional variant optional version +ostream atomic ostream bitset +ostream concepts ostream cstddef ostream cstdint +ostream cstdlib ostream cstring +ostream initializer_list ostream ios +ostream iosfwd ostream iterator ostream limits ostream locale ostream new +ostream stdexcept ostream streambuf ostream type_traits +ostream typeinfo ostream version queue compare queue concepts @@ -652,7 +660,6 @@ random type_traits random vector random version ranges compare -ranges concepts ranges cstddef ranges cstdlib ranges initializer_list diff --git a/libcxx/test/libcxx/transitive_includes/cxx20.csv b/libcxx/test/libcxx/transitive_includes/cxx20.csv index 4589f7b98b2be..9f3d88d78c5ae 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx20.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx20.csv @@ -349,7 +349,6 @@ filesystem version format array format bit format charconv -format concepts format cstddef format cstdint format cstdlib @@ -624,17 +623,24 @@ optional typeinfo optional utility optional variant optional version +ostream atomic ostream bitset +ostream concepts ostream cstddef ostream cstdint +ostream cstdlib ostream cstring +ostream initializer_list ostream ios +ostream iosfwd ostream iterator ostream limits ostream locale ostream new +ostream stdexcept ostream streambuf ostream type_traits +ostream typeinfo ostream version queue compare queue concepts @@ -663,7 +669,6 @@ random type_traits random vector random version ranges compare -ranges concepts ranges cstddef ranges cstdlib ranges initializer_list diff --git a/libcxx/test/libcxx/transitive_includes/cxx2b.csv b/libcxx/test/libcxx/transitive_includes/cxx2b.csv index e4e82d3cf4319..b00cba3aa7a8f 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx2b.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx2b.csv @@ -1,6 +1,5 @@ algorithm bit algorithm climits -algorithm concepts algorithm cstddef algorithm cstdint algorithm cstdlib @@ -11,14 +10,11 @@ algorithm limits algorithm new algorithm type_traits algorithm version -any atomic -any concepts any cstddef any cstdint any cstdlib any cstring any initializer_list -any iosfwd any limits any new any stdexcept @@ -26,7 +22,6 @@ any type_traits any typeinfo any version array compare -array concepts array cstddef array cstdlib array initializer_list @@ -59,7 +54,6 @@ bit limits bit type_traits bit version bitset climits -bitset concepts bitset cstddef bitset cstdint bitset cstdlib @@ -75,7 +69,6 @@ bitset version ccomplex complex charconv cerrno charconv cmath -charconv concepts charconv cstddef charconv cstdint charconv cstdlib @@ -88,7 +81,6 @@ chrono bit chrono charconv chrono cmath chrono compare -chrono concepts chrono cstddef chrono cstdint chrono cstdlib @@ -109,22 +101,12 @@ chrono version cinttypes cstdint cmath type_traits cmath version -codecvt atomic codecvt cctype -codecvt concepts codecvt cstddef codecvt cstdint -codecvt cstdlib codecvt cstring -codecvt initializer_list -codecvt iosfwd -codecvt limits codecvt mutex -codecvt new -codecvt stdexcept codecvt string -codecvt type_traits -codecvt typeinfo codecvt version compare cmath compare cstddef @@ -142,7 +124,6 @@ concepts cstddef concepts type_traits concepts version condition_variable atomic -condition_variable concepts condition_variable cstddef condition_variable cstdint condition_variable cstdlib @@ -169,21 +150,17 @@ ctgmath ccomplex ctgmath cmath cwchar cwctype cwctype cctype -deque atomic deque compare -deque concepts deque cstddef deque cstdint deque cstdlib deque cstring deque initializer_list -deque iosfwd deque limits deque new deque stdexcept deque tuple deque type_traits -deque typeinfo deque version exception cstddef exception cstdlib @@ -248,7 +225,6 @@ experimental/vector experimental/memory_resource experimental/vector vector ext/hash_map algorithm ext/hash_map cmath -ext/hash_map concepts ext/hash_map cstddef ext/hash_map cstdint ext/hash_map cstring @@ -261,7 +237,6 @@ ext/hash_map string ext/hash_map type_traits ext/hash_set algorithm ext/hash_set cmath -ext/hash_set concepts ext/hash_set cstddef ext/hash_set cstdint ext/hash_set cstring @@ -273,7 +248,6 @@ ext/hash_set string ext/hash_set type_traits filesystem cerrno filesystem compare -filesystem concepts filesystem cstddef filesystem cstdint filesystem cstdlib @@ -292,7 +266,6 @@ filesystem version format array format bit format charconv -format concepts format cstddef format cstdint format cstdlib @@ -306,25 +279,19 @@ format string format string_view format type_traits format version -forward_list atomic forward_list compare -forward_list concepts forward_list cstddef forward_list cstdint forward_list cstdlib forward_list cstring forward_list initializer_list -forward_list iosfwd forward_list limits forward_list new forward_list stdexcept forward_list tuple forward_list type_traits -forward_list typeinfo forward_list version -fstream atomic fstream cctype -fstream concepts fstream cstddef fstream cstdint fstream cstdio @@ -332,20 +299,15 @@ fstream cstdlib fstream cstring fstream filesystem fstream initializer_list -fstream iosfwd fstream istream -fstream limits fstream mutex -fstream new fstream ostream -fstream stdexcept fstream string fstream type_traits fstream typeinfo fstream version functional array functional atomic -functional concepts functional cstddef functional cstdint functional cstdlib @@ -378,21 +340,13 @@ iomanip istream iomanip version ios atomic ios cctype -ios concepts ios cstddef ios cstdint -ios cstdlib ios cstring -ios initializer_list ios iosfwd -ios limits ios mutex -ios new -ios stdexcept ios string ios system_error -ios type_traits -ios typeinfo ios version iosfwd version iostream ios @@ -400,7 +354,6 @@ iostream istream iostream ostream iostream streambuf iostream version -istream concepts istream cstddef istream iosfwd istream ostream @@ -421,25 +374,19 @@ latch limits latch version limits type_traits limits version -list atomic list compare -list concepts list cstddef list cstdint list cstdlib list cstring list initializer_list -list iosfwd list limits list new list stdexcept list tuple list type_traits -list typeinfo list version -locale atomic locale cctype -locale concepts locale cstddef locale cstdint locale cstdio @@ -452,14 +399,11 @@ locale iosfwd locale limits locale mutex locale new -locale stdexcept locale streambuf locale string locale type_traits -locale typeinfo locale version map compare -map concepts map cstddef map cstdlib map initializer_list @@ -472,7 +416,6 @@ map type_traits map version memory atomic memory compare -memory concepts memory cstddef memory cstdint memory cstdlib @@ -495,7 +438,6 @@ memory_resource stdexcept memory_resource tuple memory_resource version mutex atomic -mutex concepts mutex cstddef mutex cstdint mutex cstdlib @@ -516,11 +458,9 @@ new cstdlib new exception new type_traits new version -numbers concepts numbers type_traits numbers version numeric cmath -numeric concepts numeric cstddef numeric limits numeric type_traits @@ -535,19 +475,24 @@ optional new optional stdexcept optional type_traits optional version +ostream atomic ostream bitset ostream cstddef ostream cstdint +ostream cstdlib ostream cstring +ostream initializer_list ostream ios +ostream iosfwd ostream limits ostream locale ostream new +ostream stdexcept ostream streambuf ostream type_traits +ostream typeinfo ostream version queue compare -queue concepts queue cstddef queue cstdlib queue deque @@ -559,7 +504,6 @@ queue version random bit random climits random cmath -random concepts random cstddef random cstdint random cstdlib @@ -572,7 +516,6 @@ random type_traits random vector random version ranges compare -ranges concepts ranges cstddef ranges cstdlib ranges initializer_list @@ -590,24 +533,19 @@ ratio climits ratio cstdint ratio type_traits ratio version -regex atomic regex cctype regex compare -regex concepts regex cstddef regex cstdint regex cstdlib regex cstring regex deque regex initializer_list -regex iosfwd regex limits regex mutex -regex new regex stdexcept regex string regex type_traits -regex typeinfo regex vector regex version scoped_allocator cstddef @@ -624,7 +562,6 @@ semaphore ratio semaphore type_traits semaphore version set compare -set concepts set cstddef set cstdlib set initializer_list @@ -643,7 +580,6 @@ shared_mutex system_error shared_mutex type_traits shared_mutex version span array -span concepts span cstddef span initializer_list span limits @@ -656,7 +592,6 @@ sstream string sstream type_traits sstream version stack compare -stack concepts stack cstddef stack deque stack initializer_list @@ -671,7 +606,6 @@ streambuf iosfwd streambuf version string climits string compare -string concepts string cstddef string cstdint string cstdio @@ -688,7 +622,6 @@ string tuple string type_traits string version string_view compare -string_view concepts string_view cstddef string_view cstdint string_view cstdio @@ -744,7 +677,6 @@ typeinfo exception typeinfo type_traits unordered_map cmath unordered_map compare -unordered_map concepts unordered_map cstddef unordered_map cstdint unordered_map cstdlib @@ -759,7 +691,6 @@ unordered_map type_traits unordered_map version unordered_set cmath unordered_set compare -unordered_set concepts unordered_set cstddef unordered_set cstdint unordered_set cstdlib @@ -780,7 +711,6 @@ utility limits utility type_traits utility version valarray cmath -valarray concepts valarray cstddef valarray cstdlib valarray cstring @@ -801,10 +731,8 @@ variant new variant tuple variant type_traits variant version -vector atomic vector climits vector compare -vector concepts vector cstddef vector cstdint vector cstdlib @@ -816,5 +744,4 @@ vector new vector stdexcept vector tuple vector type_traits -vector typeinfo vector version diff --git a/libcxx/test/libcxx/utilities/function.objects/func.blocks.arc.pass.mm b/libcxx/test/libcxx/utilities/function.objects/func.blocks.arc.pass.mm index 186fe22e6e476..c9ace62000be3 100644 --- a/libcxx/test/libcxx/utilities/function.objects/func.blocks.arc.pass.mm +++ b/libcxx/test/libcxx/utilities/function.objects/func.blocks.arc.pass.mm @@ -12,7 +12,7 @@ // This test requires the Blocks runtime, which is (only?) available on Darwin // out-of-the-box. -// REQUIRES: has-fblocks && darwin +// REQUIRES: has-fblocks && has-fobjc-arc && darwin // ADDITIONAL_COMPILE_FLAGS: -fblocks -fobjc-arc diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.clamp/assert.ranges_clamp.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.clamp/assert.ranges_clamp.pass.cpp index b255b5933a49c..1ea5199a6fd21 100644 --- a/libcxx/test/std/algorithms/alg.sorting/alg.clamp/assert.ranges_clamp.pass.cpp +++ b/libcxx/test/std/algorithms/alg.sorting/alg.clamp/assert.ranges_clamp.pass.cpp @@ -21,14 +21,14 @@ #include "check_assertion.h" int main(int, char**) { - std::ranges::clamp(1, 2, 0, std::ranges::greater{}); + (void)std::ranges::clamp(1, 2, 0, std::ranges::greater{}); TEST_LIBCPP_ASSERT_FAILURE(std::ranges::clamp(1, 2, 0), "Bad bounds passed to std::ranges::clamp"); - std::ranges::clamp(1, 0, 2); - TEST_LIBCPP_ASSERT_FAILURE(std::ranges::clamp(1, 0, 2, std::ranges::greater{}), - "Bad bounds passed to std::ranges::clamp"); + (void)std::ranges::clamp(1, 0, 2); + TEST_LIBCPP_ASSERT_FAILURE( + std::ranges::clamp(1, 0, 2, std::ranges::greater{}), "Bad bounds passed to std::ranges::clamp"); - std::ranges::clamp(1, 1, 1); // Equal bounds should be fine. + (void)std::ranges::clamp(1, 1, 1); // Equal bounds should be fine. return 0; } diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.clamp/ranges.clamp.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.clamp/ranges.clamp.pass.cpp index 553ffb9d4b487..9da52e2772a9e 100644 --- a/libcxx/test/std/algorithms/alg.sorting/alg.clamp/ranges.clamp.pass.cpp +++ b/libcxx/test/std/algorithms/alg.sorting/alg.clamp/ranges.clamp.pass.cpp @@ -20,6 +20,7 @@ #include #include +#include #include #include diff --git a/libcxx/test/std/algorithms/ranges_robust_against_differing_projections.pass.cpp b/libcxx/test/std/algorithms/ranges_robust_against_differing_projections.pass.cpp index 64b3b9db0c30d..d940a7ab97006 100644 --- a/libcxx/test/std/algorithms/ranges_robust_against_differing_projections.pass.cpp +++ b/libcxx/test/std/algorithms/ranges_robust_against_differing_projections.pass.cpp @@ -24,8 +24,8 @@ // (in1, in2, ...) template constexpr void test(Func&& func, Input1& in1, Input2& in2, Args&& ...args) { - func(in1.begin(), in1.end(), in2.begin(), in2.end(), std::forward(args)...); - func(in1, in2, std::forward(args)...); + (void)func(in1.begin(), in1.end(), in2.begin(), in2.end(), std::forward(args)...); + (void)func(in1, in2, std::forward(args)...); } constexpr bool test_all() { diff --git a/libcxx/test/std/algorithms/ranges_robust_against_nonbool_predicates.pass.cpp b/libcxx/test/std/algorithms/ranges_robust_against_nonbool_predicates.pass.cpp index 448b8bc018ace..b69df4084052b 100644 --- a/libcxx/test/std/algorithms/ranges_robust_against_nonbool_predicates.pass.cpp +++ b/libcxx/test/std/algorithms/ranges_robust_against_nonbool_predicates.pass.cpp @@ -36,22 +36,22 @@ static_assert(std::convertible_to); // (in, ...) template constexpr void test(Func&& func, Input& in, Args&&... args) { - func(in.begin(), in.end(), std::forward(args)...); - func(in, std::forward(args)...); + (void)func(in.begin(), in.end(), std::forward(args)...); + (void)func(in, std::forward(args)...); } // (in1, in2, ...) template constexpr void test(Func&& func, Input& in1, Input& in2, Args&&... args) { - func(in1.begin(), in1.end(), in2.begin(), in2.end(), std::forward(args)...); - func(in1, in2, std::forward(args)...); + (void)func(in1.begin(), in1.end(), in2.begin(), in2.end(), std::forward(args)...); + (void)func(in1, in2, std::forward(args)...); } // (in, mid, ...) template constexpr void test_mid(Func&& func, Input& in, std::ranges::iterator_t mid, Args&&... args) { - func(in.begin(), mid, in.end(), std::forward(args)...); - func(in, mid, std::forward(args)...); + (void)func(in.begin(), mid, in.end(), std::forward(args)...); + (void)func(in, mid, std::forward(args)...); } constexpr bool test_all() { @@ -83,17 +83,17 @@ constexpr bool test_all() { test(std::ranges::binary_search, in, x, binary_pred); // min - std::ranges::min(1, 2, binary_pred); - std::ranges::min(std::initializer_list{1, 2}, binary_pred); - std::ranges::min(in, binary_pred); + (void)std::ranges::min(1, 2, binary_pred); + (void)std::ranges::min(std::initializer_list{1, 2}, binary_pred); + (void)std::ranges::min(in, binary_pred); // max - std::ranges::max(1, 2, binary_pred); - std::ranges::max(std::initializer_list{1, 2}, binary_pred); - std::ranges::max(in, binary_pred); + (void)std::ranges::max(1, 2, binary_pred); + (void)std::ranges::max(std::initializer_list{1, 2}, binary_pred); + (void)std::ranges::max(in, binary_pred); // minmax - std::ranges::minmax(1, 2, binary_pred); - std::ranges::minmax(std::initializer_list{1, 2}, binary_pred); - std::ranges::minmax(in, binary_pred); + (void)std::ranges::minmax(1, 2, binary_pred); + (void)std::ranges::minmax(std::initializer_list{1, 2}, binary_pred); + (void)std::ranges::minmax(in, binary_pred); test(std::ranges::min_element, in, binary_pred); test(std::ranges::max_element, in, binary_pred); @@ -108,7 +108,7 @@ constexpr bool test_all() { test(std::ranges::includes, in, in2, binary_pred); test(std::ranges::is_heap, in, binary_pred); test(std::ranges::is_heap_until, in, binary_pred); - std::ranges::clamp(2, 1, 3, binary_pred); + (void)std::ranges::clamp(2, 1, 3, binary_pred); test(std::ranges::is_permutation, in, in2, binary_pred); test(std::ranges::copy_if, in, out, unary_pred); test(std::ranges::remove_copy_if, in, out, unary_pred); diff --git a/libcxx/test/std/algorithms/ranges_robust_against_omitting_invoke.pass.cpp b/libcxx/test/std/algorithms/ranges_robust_against_omitting_invoke.pass.cpp index 9831020877579..35b9f928b739f 100644 --- a/libcxx/test/std/algorithms/ranges_robust_against_omitting_invoke.pass.cpp +++ b/libcxx/test/std/algorithms/ranges_robust_against_omitting_invoke.pass.cpp @@ -37,22 +37,22 @@ struct Bar { // (in, ...) template constexpr void test(Func&& func, Input& in, Args&&... args) { - func(in.begin(), in.end(), std::forward(args)...); - func(in, std::forward(args)...); + (void)func(in.begin(), in.end(), std::forward(args)...); + (void)func(in, std::forward(args)...); } // (in1, in2, ...) template constexpr void test(Func&& func, Input& in1, Input& in2, Args&&... args) { - func(in1.begin(), in1.end(), in2.begin(), in2.end(), std::forward(args)...); - func(in1, in2, std::forward(args)...); + (void)func(in1.begin(), in1.end(), in2.begin(), in2.end(), std::forward(args)...); + (void)func(in1, in2, std::forward(args)...); } // (in, mid, ...) template constexpr void test_mid(Func&& func, Input& in, std::ranges::iterator_t mid, Args&&... args) { - func(in.begin(), mid, in.end(), std::forward(args)...); - func(in, mid, std::forward(args)...); + (void)func(in.begin(), mid, in.end(), std::forward(args)...); + (void)func(in, mid, std::forward(args)...); } constexpr bool test_all() { @@ -89,17 +89,17 @@ constexpr bool test_all() { test(std::ranges::binary_search, in, x, &Foo::binary_pred, &Bar::val); // min - std::ranges::min(a, b, &Foo::binary_pred, &Bar::val); - std::ranges::min(std::initializer_list{a, b}, &Foo::binary_pred, &Bar::val); - std::ranges::min(in, &Foo::binary_pred, &Bar::val); + (void)std::ranges::min(a, b, &Foo::binary_pred, &Bar::val); + (void)std::ranges::min(std::initializer_list{a, b}, &Foo::binary_pred, &Bar::val); + (void)std::ranges::min(in, &Foo::binary_pred, &Bar::val); // max - std::ranges::max(a, b, &Foo::binary_pred, &Bar::val); - std::ranges::max(std::initializer_list{a, b}, &Foo::binary_pred, &Bar::val); - std::ranges::max(in, &Foo::binary_pred, &Bar::val); + (void)std::ranges::max(a, b, &Foo::binary_pred, &Bar::val); + (void)std::ranges::max(std::initializer_list{a, b}, &Foo::binary_pred, &Bar::val); + (void)std::ranges::max(in, &Foo::binary_pred, &Bar::val); // minmax - std::ranges::minmax(a, b, &Foo::binary_pred, &Bar::val); - std::ranges::minmax(std::initializer_list{a, b}, &Foo::binary_pred, &Bar::val); - std::ranges::minmax(in, &Foo::binary_pred, &Bar::val); + (void)std::ranges::minmax(a, b, &Foo::binary_pred, &Bar::val); + (void)std::ranges::minmax(std::initializer_list{a, b}, &Foo::binary_pred, &Bar::val); + (void)std::ranges::minmax(in, &Foo::binary_pred, &Bar::val); test(std::ranges::min_element, in, &Foo::binary_pred, &Bar::val); test(std::ranges::max_element, in, &Foo::binary_pred, &Bar::val); @@ -115,7 +115,7 @@ constexpr bool test_all() { test(std::ranges::includes, in, in2, &Foo::binary_pred, &Bar::val, &Bar::val); test(std::ranges::is_heap, in, &Foo::binary_pred, &Bar::val); test(std::ranges::is_heap_until, in, &Foo::binary_pred, &Bar::val); - std::ranges::clamp(b, a, c, &Foo::binary_pred, &Bar::val); + (void)std::ranges::clamp(b, a, c, &Foo::binary_pred, &Bar::val); test(std::ranges::is_permutation, in, in2, &Foo::binary_pred, &Bar::val, &Bar::val); test(std::ranges::for_each, in, &Foo::unary_pred, &Bar::val); std::ranges::for_each_n(in.begin(), count, &Foo::unary_pred, &Bar::val); diff --git a/libcxx/test/std/algorithms/ranges_robust_against_proxy_iterators.pass.cpp b/libcxx/test/std/algorithms/ranges_robust_against_proxy_iterators.pass.cpp index 3458336aefb8a..d383e18adbb99 100644 --- a/libcxx/test/std/algorithms/ranges_robust_against_proxy_iterators.pass.cpp +++ b/libcxx/test/std/algorithms/ranges_robust_against_proxy_iterators.pass.cpp @@ -29,22 +29,22 @@ // (in, ...) template constexpr void test(Func&& func, Input& in, Args&& ...args) { - func(in.begin(), in.end(), std::forward(args)...); - func(in, std::forward(args)...); + (void)func(in.begin(), in.end(), std::forward(args)...); + (void)func(in, std::forward(args)...); } // (in1, in2, ...) template constexpr void test(Func&& func, Range1& r1, Range2& r2, Args&& ...args) { - func(r1.begin(), r1.end(), r2.begin(), r2.end(), std::forward(args)...); - func(r1, r2, std::forward(args)...); + (void)func(r1.begin(), r1.end(), r2.begin(), r2.end(), std::forward(args)...); + (void)func(r1, r2, std::forward(args)...); } // (in, mid, ...) template constexpr void test_mid(Func&& func, Input& in, std::ranges::iterator_t mid, Args&& ...args) { - func(in.begin(), mid, in.end(), std::forward(args)...); - func(in, mid, std::forward(args)...); + (void)func(in.begin(), mid, in.end(), std::forward(args)...); + (void)func(in, mid, std::forward(args)...); } std::mt19937 rand_gen() { return std::mt19937(); } diff --git a/libcxx/test/std/depr/depr.c.headers/stdlib_h.aligned_alloc.compile.pass.cpp b/libcxx/test/std/depr/depr.c.headers/stdlib_h.aligned_alloc.compile.pass.cpp index e2565641aba66..a02a5bbbdbcc7 100644 --- a/libcxx/test/std/depr/depr.c.headers/stdlib_h.aligned_alloc.compile.pass.cpp +++ b/libcxx/test/std/depr/depr.c.headers/stdlib_h.aligned_alloc.compile.pass.cpp @@ -18,6 +18,9 @@ // ::aligned_alloc is not implemented on Windows // XFAIL: target={{.+}}-windows-{{.+}} +// ::aligned_alloc is available starting with Android P (API 28) +// XFAIL: target={{.+}}-android{{(eabi)?(21|22|23|24|25|26|27)}} + #include #include diff --git a/libcxx/test/std/language.support/cmp/cmp.alg/strong_order_long_double.verify.cpp b/libcxx/test/std/language.support/cmp/cmp.alg/strong_order_long_double.verify.cpp index 25019711a71eb..1a9289c7c966c 100644 --- a/libcxx/test/std/language.support/cmp/cmp.alg/strong_order_long_double.verify.cpp +++ b/libcxx/test/std/language.support/cmp/cmp.alg/strong_order_long_double.verify.cpp @@ -17,6 +17,8 @@ // ARM/AArch64 MinGW also has got long double equal to regular double, just // like MSVC (thus match both MinGW and MSVC here, for those architectures). // UNSUPPORTED: target={{aarch64|armv7}}-{{.*}}-windows-{{.+}} +// Android's 32-bit x86 target has long double equal to regular double. +// UNSUPPORTED: target=i686-{{.+}}-android{{.*}} // diff --git a/libcxx/test/std/language.support/support.runtime/cstdlib.aligned_alloc.compile.pass.cpp b/libcxx/test/std/language.support/support.runtime/cstdlib.aligned_alloc.compile.pass.cpp index 70bf3ecd539f0..f6681db6cf6bb 100644 --- a/libcxx/test/std/language.support/support.runtime/cstdlib.aligned_alloc.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.runtime/cstdlib.aligned_alloc.compile.pass.cpp @@ -18,6 +18,9 @@ // ::aligned_alloc is not implemented on Windows // XFAIL: target={{.+}}-windows-{{.+}} +// ::aligned_alloc is available starting with Android P (API 28) +// XFAIL: target={{.+}}-android{{(eabi)?(21|22|23|24|25|26|27)}} + #include #include diff --git a/libcxx/test/std/language.support/support.runtime/ctime.timespec.compile.pass.cpp b/libcxx/test/std/language.support/support.runtime/ctime.timespec.compile.pass.cpp index 111d0912cbbe2..37c7b67234782 100644 --- a/libcxx/test/std/language.support/support.runtime/ctime.timespec.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.runtime/ctime.timespec.compile.pass.cpp @@ -17,6 +17,9 @@ // unavailable until macOS 10.15 // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} +// ::timespec_get is available starting with Android Q (API 29) +// XFAIL: target={{.+}}-android{{(eabi)?(21|22|23|24|25|26|27|28)}} + #include #include diff --git a/libcxx/test/std/strings/basic.string/string.cons/substr_rvalue.pass.cpp b/libcxx/test/std/strings/basic.string/string.cons/substr_rvalue.pass.cpp new file mode 100644 index 0000000000000..6a431be5cf851 --- /dev/null +++ b/libcxx/test/std/strings/basic.string/string.cons/substr_rvalue.pass.cpp @@ -0,0 +1,233 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// constexpr basic_string(basic_string&& str, size_type pos, const Allocator& a = Allocator()); +// constexpr basic_string(basic_string&& str, size_type pos, size_type n, const Allocator& a = Allocator()); + +#include +#include + +#include "constexpr_char_traits.h" +#include "count_new.h" +#include "make_string.h" +#include "min_allocator.h" +#include "test_allocator.h" +#include "test_macros.h" + +#define STR(string) MAKE_CSTRING(typename S::value_type, string) + +constexpr struct should_throw_exception_t { +} should_throw_exception; + +template +constexpr void test_string_pos(S orig, typename S::size_type pos, S expected) { +#ifdef _LIBCPP_VERSION + ConstexprDisableAllocationGuard g; +#endif + S substr(std::move(orig), pos); + LIBCPP_ASSERT(orig.__invariants()); + LIBCPP_ASSERT(orig.empty()); + LIBCPP_ASSERT(substr.__invariants()); + assert(substr == expected); +} + +template +constexpr void test_string_pos(S orig, typename S::size_type pos, should_throw_exception_t) { +#ifndef TEST_HAS_NO_EXCEPTIONS + if (!std::is_constant_evaluated()) { + try { + [[maybe_unused]] S substr = S(std::move(orig), pos); + assert(false); + } catch (const std::out_of_range&) { + } + } +#else + (void)orig; + (void)pos; +#endif +} + +template +constexpr void +test_string_pos_alloc(S orig, typename S::size_type pos, const typename S::allocator_type& alloc, S expected) { + S substr(std::move(orig), pos, alloc); + LIBCPP_ASSERT(orig.__invariants()); + LIBCPP_ASSERT(substr.__invariants()); + assert(substr == expected); + assert(substr.get_allocator() == alloc); +} + +template +constexpr void test_string_pos_alloc( + S orig, typename S::size_type pos, const typename S::allocator_type& alloc, should_throw_exception_t) { +#ifndef TEST_HAS_NO_EXCEPTIONS + if (!std::is_constant_evaluated()) { + try { + [[maybe_unused]] S substr = S(std::move(orig), pos, alloc); + assert(false); + } catch (const std::out_of_range&) { + } + } +#else + (void)orig; + (void)pos; + (void)alloc; +#endif +} + +template +constexpr void test_string_pos_n(S orig, typename S::size_type pos, typename S::size_type n, S expected) { +#ifdef _LIBCPP_VERSION + ConstexprDisableAllocationGuard g; +#endif + S substr(std::move(orig), pos, n); + LIBCPP_ASSERT(orig.__invariants()); + LIBCPP_ASSERT(orig.empty()); + LIBCPP_ASSERT(substr.__invariants()); + assert(substr == expected); +} + +template +constexpr void test_string_pos_n(S orig, typename S::size_type pos, typename S::size_type n, should_throw_exception_t) { +#ifndef TEST_HAS_NO_EXCEPTIONS + if (!std::is_constant_evaluated()) { + try { + [[maybe_unused]] S substr = S(std::move(orig), pos, n); + assert(false); + } catch (const std::out_of_range&) { + } + } +#else + (void)orig; + (void)pos; + (void)n; +#endif +} + +template +constexpr void test_string_pos_n_alloc( + S orig, typename S::size_type pos, typename S::size_type n, const typename S::allocator_type& alloc, S expected) { + S substr(std::move(orig), pos, n, alloc); + LIBCPP_ASSERT(orig.__invariants()); + LIBCPP_ASSERT(substr.__invariants()); + assert(substr == expected); + assert(substr.get_allocator() == alloc); +} + +template +constexpr void test_string_pos_n_alloc( + S orig, + typename S::size_type pos, + typename S::size_type n, + const typename S::allocator_type& alloc, + should_throw_exception_t) { +#ifndef TEST_HAS_NO_EXCEPTIONS + if (!std::is_constant_evaluated()) { + try { + [[maybe_unused]] S substr = S(std::move(orig), pos, n, alloc); + assert(false); + } catch (const std::out_of_range&) { + } + } +#else + (void)orig; + (void)pos; + (void)n; + (void)alloc; +#endif +} + +template +constexpr void test_string(const typename S::allocator_type& alloc) { + test_string_pos(STR(""), 0, STR("")); + test_string_pos(STR(""), 1, should_throw_exception); + test_string_pos(STR("Banane"), 1, STR("anane")); + test_string_pos(STR("Banane"), 6, STR("")); + test_string_pos(STR("Banane"), 7, should_throw_exception); + test_string_pos(STR("long long string so no SSO"), 0, STR("long long string so no SSO")); + test_string_pos(STR("long long string so no SSO"), 10, STR("string so no SSO")); + test_string_pos(STR("long long string so no SSO"), 26, STR("")); + test_string_pos(STR("long long string so no SSO"), 27, should_throw_exception); + + test_string_pos_alloc(STR(""), 0, alloc, STR("")); + test_string_pos_alloc(STR(""), 1, alloc, should_throw_exception); + test_string_pos_alloc(STR("Banane"), 1, alloc, STR("anane")); + test_string_pos_alloc(STR("Banane"), 6, alloc, STR("")); + test_string_pos_alloc(STR("Banane"), 7, alloc, should_throw_exception); + test_string_pos_alloc(STR("long long string so no SSO"), 0, alloc, STR("long long string so no SSO")); + test_string_pos_alloc(STR("long long string so no SSO"), 10, alloc, STR("string so no SSO")); + test_string_pos_alloc(STR("long long string so no SSO"), 26, alloc, STR("")); + test_string_pos_alloc(STR("long long string so no SSO"), 27, alloc, should_throw_exception); + + test_string_pos_n(STR(""), 0, 0, STR("")); + test_string_pos_n(STR(""), 0, 1, STR("")); + test_string_pos_n(STR(""), 1, 0, should_throw_exception); + test_string_pos_n(STR(""), 1, 1, should_throw_exception); + test_string_pos_n(STR("Banane"), 1, 10, STR("anane")); + test_string_pos_n(STR("Banane"), 6, 0, STR("")); + test_string_pos_n(STR("Banane"), 6, 5, STR("")); + test_string_pos_n(STR("Banane"), 7, 10, should_throw_exception); + test_string_pos_n(STR("long long string so no SSO"), 0, 10, STR("long long ")); + test_string_pos_n(STR("long long string so no SSO"), 10, 8, STR("string s")); + test_string_pos_n(STR("long long string so no SSO"), 20, 10, STR("no SSO")); + test_string_pos_n(STR("long long string so no SSO"), 26, 10, STR("")); + test_string_pos_n(STR("long long string so no SSO"), 27, 10, should_throw_exception); + + test_string_pos_n_alloc(STR(""), 0, 0, alloc, STR("")); + test_string_pos_n_alloc(STR(""), 0, 1, alloc, STR("")); + test_string_pos_n_alloc(STR(""), 1, 0, alloc, should_throw_exception); + test_string_pos_n_alloc(STR(""), 1, 1, alloc, should_throw_exception); + test_string_pos_n_alloc(STR("Banane"), 1, 10, alloc, STR("anane")); + test_string_pos_n_alloc(STR("Banane"), 6, 0, alloc, STR("")); + test_string_pos_n_alloc(STR("Banane"), 6, 5, alloc, STR("")); + test_string_pos_n_alloc(STR("Banane"), 7, 10, alloc, should_throw_exception); + test_string_pos_n_alloc(STR("long long string so no SSO"), 0, 10, alloc, STR("long long ")); + test_string_pos_n_alloc(STR("long long string so no SSO"), 10, 8, alloc, STR("string s")); + test_string_pos_n_alloc(STR("long long string so no SSO"), 20, 10, alloc, STR("no SSO")); + test_string_pos_n_alloc(STR("long long string so no SSO"), 26, 10, alloc, STR("")); + test_string_pos_n_alloc(STR("long long string so no SSO"), 27, 10, alloc, should_throw_exception); +} + +template +constexpr void test_allocators() { + test_string>>(std::allocator{}); + test_string>>(min_allocator{}); + test_string>>(test_allocator{42}); +} + +template +constexpr bool test_char_traits() { + test_allocators>(); + test_allocators>(); + + return true; +} + +int main(int, char**) { + // TODO: put these into a single function when we increase the constexpr step limit + test_char_traits(); + static_assert(test_char_traits()); + test_char_traits(); + static_assert(test_char_traits()); + test_char_traits(); + static_assert(test_char_traits()); +#ifndef TEST_HAS_NO_WIDE_CHARACTERS + test_char_traits(); + static_assert(test_char_traits()); +#endif +#ifndef TEST_HAS_NO_CHAR8_T + test_char_traits(); + static_assert(test_char_traits()); +#endif + + return 0; +} diff --git a/libcxx/test/std/strings/basic.string/string.ops/string_substr/substr.pass.cpp b/libcxx/test/std/strings/basic.string/string.ops/string_substr/substr.pass.cpp index 4ae469d597a0f..7f6404abd8261 100644 --- a/libcxx/test/std/strings/basic.string/string.ops/string_substr/substr.pass.cpp +++ b/libcxx/test/std/strings/basic.string/string.ops/string_substr/substr.pass.cpp @@ -8,7 +8,8 @@ // -// basic_string substr(size_type pos = 0, size_type n = npos) const; // constexpr since C++20 +// basic_string substr(size_type pos = 0, size_type n = npos) const; // constexpr since C++20, removed in C++23 +// basic_string substr(size_type pos = 0, size_type n = npos) const&; // since in C++23 #include #include @@ -47,130 +48,72 @@ test(const S& s, typename S::size_type pos, typename S::size_type n) #endif } +template +TEST_CONSTEXPR_CXX20 void test_string() { + test(S(""), 0, 0); + test(S(""), 1, 0); + test(S("pniot"), 0, 0); + test(S("htaob"), 0, 1); + test(S("fodgq"), 0, 2); + test(S("hpqia"), 0, 4); + test(S("qanej"), 0, 5); + test(S("dfkap"), 1, 0); + test(S("clbao"), 1, 1); + test(S("ihqrf"), 1, 2); + test(S("mekdn"), 1, 3); + test(S("ngtjf"), 1, 4); + test(S("srdfq"), 2, 0); + test(S("qkdrs"), 2, 1); + test(S("ikcrq"), 2, 2); + test(S("cdaih"), 2, 3); + test(S("dmajb"), 4, 0); + test(S("karth"), 4, 1); + test(S("lhcdo"), 5, 0); + test(S("acbsj"), 6, 0); + test(S("pbsjikaole"), 0, 0); + test(S("pcbahntsje"), 0, 1); + test(S("mprdjbeiak"), 0, 5); + test(S("fhepcrntko"), 0, 9); + test(S("eqmpaidtls"), 0, 10); + test(S("joidhalcmq"), 1, 0); + test(S("omigsphflj"), 1, 1); + test(S("kocgbphfji"), 1, 4); + test(S("onmjekafbi"), 1, 8); + test(S("fbslrjiqkm"), 1, 9); + test(S("oqmrjahnkg"), 5, 0); + test(S("jeidpcmalh"), 5, 1); + test(S("schfalibje"), 5, 2); + test(S("crliponbqe"), 5, 4); + test(S("igdscopqtm"), 5, 5); + test(S("qngpdkimlc"), 9, 0); + test(S("thdjgafrlb"), 9, 1); + test(S("hcjitbfapl"), 10, 0); + test(S("mgojkldsqh"), 11, 0); + test(S("gfshlcmdjreqipbontak"), 0, 0); + test(S("nadkhpfemgclosibtjrq"), 0, 1); + test(S("nkodajteqplrbifhmcgs"), 0, 10); + test(S("ofdrqmkeblthacpgijsn"), 0, 19); + test(S("gbmetiprqdoasckjfhln"), 0, 20); + test(S("bdfjqgatlksriohemnpc"), 1, 0); + test(S("crnklpmegdqfiashtojb"), 1, 1); + test(S("ejqcnahdrkfsmptilgbo"), 1, 9); + test(S("jsbtafedocnirgpmkhql"), 1, 18); + test(S("prqgnlbaejsmkhdctoif"), 1, 19); + test(S("qnmodrtkebhpasifgcjl"), 10, 0); + test(S("pejafmnokrqhtisbcdgl"), 10, 1); + test(S("cpebqsfmnjdolhkratgi"), 10, 5); + test(S("odnqkgijrhabfmcestlp"), 10, 9); + test(S("lmofqdhpkibagnrcjste"), 10, 10); + test(S("lgjqketopbfahrmnsicd"), 19, 0); + test(S("ktsrmnqagdecfhijpobl"), 19, 1); + test(S("lsaijeqhtrbgcdmpfkno"), 20, 0); + test(S("dplqartnfgejichmoskb"), 21, 0); +} + TEST_CONSTEXPR_CXX20 bool test() { - { - typedef std::string S; - test(S(""), 0, 0); - test(S(""), 1, 0); - test(S("pniot"), 0, 0); - test(S("htaob"), 0, 1); - test(S("fodgq"), 0, 2); - test(S("hpqia"), 0, 4); - test(S("qanej"), 0, 5); - test(S("dfkap"), 1, 0); - test(S("clbao"), 1, 1); - test(S("ihqrf"), 1, 2); - test(S("mekdn"), 1, 3); - test(S("ngtjf"), 1, 4); - test(S("srdfq"), 2, 0); - test(S("qkdrs"), 2, 1); - test(S("ikcrq"), 2, 2); - test(S("cdaih"), 2, 3); - test(S("dmajb"), 4, 0); - test(S("karth"), 4, 1); - test(S("lhcdo"), 5, 0); - test(S("acbsj"), 6, 0); - test(S("pbsjikaole"), 0, 0); - test(S("pcbahntsje"), 0, 1); - test(S("mprdjbeiak"), 0, 5); - test(S("fhepcrntko"), 0, 9); - test(S("eqmpaidtls"), 0, 10); - test(S("joidhalcmq"), 1, 0); - test(S("omigsphflj"), 1, 1); - test(S("kocgbphfji"), 1, 4); - test(S("onmjekafbi"), 1, 8); - test(S("fbslrjiqkm"), 1, 9); - test(S("oqmrjahnkg"), 5, 0); - test(S("jeidpcmalh"), 5, 1); - test(S("schfalibje"), 5, 2); - test(S("crliponbqe"), 5, 4); - test(S("igdscopqtm"), 5, 5); - test(S("qngpdkimlc"), 9, 0); - test(S("thdjgafrlb"), 9, 1); - test(S("hcjitbfapl"), 10, 0); - test(S("mgojkldsqh"), 11, 0); - test(S("gfshlcmdjreqipbontak"), 0, 0); - test(S("nadkhpfemgclosibtjrq"), 0, 1); - test(S("nkodajteqplrbifhmcgs"), 0, 10); - test(S("ofdrqmkeblthacpgijsn"), 0, 19); - test(S("gbmetiprqdoasckjfhln"), 0, 20); - test(S("bdfjqgatlksriohemnpc"), 1, 0); - test(S("crnklpmegdqfiashtojb"), 1, 1); - test(S("ejqcnahdrkfsmptilgbo"), 1, 9); - test(S("jsbtafedocnirgpmkhql"), 1, 18); - test(S("prqgnlbaejsmkhdctoif"), 1, 19); - test(S("qnmodrtkebhpasifgcjl"), 10, 0); - test(S("pejafmnokrqhtisbcdgl"), 10, 1); - test(S("cpebqsfmnjdolhkratgi"), 10, 5); - test(S("odnqkgijrhabfmcestlp"), 10, 9); - test(S("lmofqdhpkibagnrcjste"), 10, 10); - test(S("lgjqketopbfahrmnsicd"), 19, 0); - test(S("ktsrmnqagdecfhijpobl"), 19, 1); - test(S("lsaijeqhtrbgcdmpfkno"), 20, 0); - test(S("dplqartnfgejichmoskb"), 21, 0); - } + test_string(); #if TEST_STD_VER >= 11 - { - typedef std::basic_string, min_allocator> S; - test(S(""), 0, 0); - test(S(""), 1, 0); - test(S("pniot"), 0, 0); - test(S("htaob"), 0, 1); - test(S("fodgq"), 0, 2); - test(S("hpqia"), 0, 4); - test(S("qanej"), 0, 5); - test(S("dfkap"), 1, 0); - test(S("clbao"), 1, 1); - test(S("ihqrf"), 1, 2); - test(S("mekdn"), 1, 3); - test(S("ngtjf"), 1, 4); - test(S("srdfq"), 2, 0); - test(S("qkdrs"), 2, 1); - test(S("ikcrq"), 2, 2); - test(S("cdaih"), 2, 3); - test(S("dmajb"), 4, 0); - test(S("karth"), 4, 1); - test(S("lhcdo"), 5, 0); - test(S("acbsj"), 6, 0); - test(S("pbsjikaole"), 0, 0); - test(S("pcbahntsje"), 0, 1); - test(S("mprdjbeiak"), 0, 5); - test(S("fhepcrntko"), 0, 9); - test(S("eqmpaidtls"), 0, 10); - test(S("joidhalcmq"), 1, 0); - test(S("omigsphflj"), 1, 1); - test(S("kocgbphfji"), 1, 4); - test(S("onmjekafbi"), 1, 8); - test(S("fbslrjiqkm"), 1, 9); - test(S("oqmrjahnkg"), 5, 0); - test(S("jeidpcmalh"), 5, 1); - test(S("schfalibje"), 5, 2); - test(S("crliponbqe"), 5, 4); - test(S("igdscopqtm"), 5, 5); - test(S("qngpdkimlc"), 9, 0); - test(S("thdjgafrlb"), 9, 1); - test(S("hcjitbfapl"), 10, 0); - test(S("mgojkldsqh"), 11, 0); - test(S("gfshlcmdjreqipbontak"), 0, 0); - test(S("nadkhpfemgclosibtjrq"), 0, 1); - test(S("nkodajteqplrbifhmcgs"), 0, 10); - test(S("ofdrqmkeblthacpgijsn"), 0, 19); - test(S("gbmetiprqdoasckjfhln"), 0, 20); - test(S("bdfjqgatlksriohemnpc"), 1, 0); - test(S("crnklpmegdqfiashtojb"), 1, 1); - test(S("ejqcnahdrkfsmptilgbo"), 1, 9); - test(S("jsbtafedocnirgpmkhql"), 1, 18); - test(S("prqgnlbaejsmkhdctoif"), 1, 19); - test(S("qnmodrtkebhpasifgcjl"), 10, 0); - test(S("pejafmnokrqhtisbcdgl"), 10, 1); - test(S("cpebqsfmnjdolhkratgi"), 10, 5); - test(S("odnqkgijrhabfmcestlp"), 10, 9); - test(S("lmofqdhpkibagnrcjste"), 10, 10); - test(S("lgjqketopbfahrmnsicd"), 19, 0); - test(S("ktsrmnqagdecfhijpobl"), 19, 1); - test(S("lsaijeqhtrbgcdmpfkno"), 20, 0); - test(S("dplqartnfgejichmoskb"), 21, 0); - } + test_string, min_allocator>>(); #endif return true; diff --git a/libcxx/test/std/strings/basic.string/string.ops/string_substr/substr_rvalue.pass.cpp b/libcxx/test/std/strings/basic.string/string.ops/string_substr/substr_rvalue.pass.cpp new file mode 100644 index 0000000000000..13019ae351077 --- /dev/null +++ b/libcxx/test/std/strings/basic.string/string.ops/string_substr/substr_rvalue.pass.cpp @@ -0,0 +1,103 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// constexpr basic_string substr(size_type pos = 0, size_type n = npos) &&; + +#include +#include + +#include "constexpr_char_traits.h" +#include "make_string.h" +#include "min_allocator.h" +#include "test_allocator.h" + +#define STR(string) MAKE_CSTRING(typename S::value_type, string) + +constexpr struct should_throw_exception_t { +} should_throw_exception; + +template +constexpr void test(S orig, size_t pos, ptrdiff_t n, S expected) { + S str = std::move(orig).substr(pos, n); + LIBCPP_ASSERT(orig.__invariants()); + LIBCPP_ASSERT(str.__invariants()); + assert(str == expected); +} + +template +constexpr void test(S orig, size_t pos, ptrdiff_t n, should_throw_exception_t) { +#ifndef TEST_HAS_NO_EXCEPTIONS + if (!std::is_constant_evaluated()) { + try { + S str = std::move(orig).substr(pos, n); + assert(false); + } catch (const std::out_of_range&) { + } + } +#else + (void)orig; + (void)pos; + (void)n; +#endif +} + +template +constexpr void test_string() { + test(STR(""), 0, 0, STR("")); + test(STR(""), 0, 1, STR("")); + test(STR(""), 1, 0, should_throw_exception); + test(STR(""), 1, 1, should_throw_exception); + test(STR("short string"), 0, 1, STR("s")); + test(STR("short string"), 5, 5, STR(" stri")); + test(STR("short string"), 12, 5, STR("")); + test(STR("short string"), 13, 5, should_throw_exception); + test(STR("long long string so no SSO"), 0, 0, STR("")); + test(STR("long long string so no SSO"), 0, 10, STR("long long ")); + test(STR("long long string so no SSO"), 10, 10, STR("string so ")); + test(STR("long long string so no SSO"), 20, 10, STR("no SSO")); + test(STR("long long string so no SSO"), 26, 10, STR("")); + test(STR("long long string so no SSO"), 27, 0, should_throw_exception); +} + +template +constexpr void test_allocators() { + test_string>>(); + test_string>>(); + test_string>>(); +} + +template +constexpr void test_char_traits() { + test_allocators>(); + test_allocators>(); +} + +constexpr bool test() { + test_char_traits(); + test_char_traits(); + test_char_traits(); +#ifndef TEST_HAS_NO_WIDE_CHARACTERS + test_char_traits(); +#endif +#ifndef TEST_HAS_NO_CHAR8_T + test_char_traits(); +#endif + + return true; +} + +int main(int, char**) { + test(); + static_assert(test()); + + return 0; +} diff --git a/libcxx/test/support/count_new.h b/libcxx/test/support/count_new.h index aadebe444708f..645062a01446d 100644 --- a/libcxx/test/support/count_new.h +++ b/libcxx/test/support/count_new.h @@ -472,6 +472,40 @@ struct DisableAllocationGuard { DisableAllocationGuard& operator=(DisableAllocationGuard const&); }; +#if TEST_STD_VER >= 20 + +struct ConstexprDisableAllocationGuard { + TEST_CONSTEXPR_CXX14 explicit ConstexprDisableAllocationGuard(bool disable = true) : m_disabled(disable) + { + if (!TEST_IS_CONSTANT_EVALUATED) { + // Don't re-disable if already disabled. + if (globalMemCounter.disable_allocations == true) m_disabled = false; + if (m_disabled) globalMemCounter.disableAllocations(); + } else { + m_disabled = false; + } + } + + TEST_CONSTEXPR_CXX14 void release() { + if (!TEST_IS_CONSTANT_EVALUATED) { + if (m_disabled) globalMemCounter.enableAllocations(); + m_disabled = false; + } + } + + TEST_CONSTEXPR_CXX20 ~ConstexprDisableAllocationGuard() { + release(); + } + +private: + bool m_disabled; + + ConstexprDisableAllocationGuard(ConstexprDisableAllocationGuard const&); + ConstexprDisableAllocationGuard& operator=(ConstexprDisableAllocationGuard const&); +}; + +#endif + struct RequireAllocationGuard { explicit RequireAllocationGuard(std::size_t RequireAtLeast = 1) : m_req_alloc(RequireAtLeast), diff --git a/libcxx/test/support/make_string.h b/libcxx/test/support/make_string.h index 00c2a48e3d004..728b6540abe07 100644 --- a/libcxx/test/support/make_string.h +++ b/libcxx/test/support/make_string.h @@ -89,7 +89,7 @@ struct MultiStringType { // This helper is used in unit tests to make them generic. The input should be // valid ASCII which means the input is also valid UTF-8. #define MAKE_CSTRING(CharT, Str) \ - MKSTR(Str).as_ptr((const CharT*)0) + MKSTR(Str).as_ptr(static_cast(nullptr)) // Like MAKE_CSTRING but makes a basic_string. Embedded nulls are OK. #define MAKE_STRING(CharT, Str) \ diff --git a/libcxxabi/cmake/config-ix.cmake b/libcxxabi/cmake/config-ix.cmake index ff9a1bf349e52..f4ee8946c1fea 100644 --- a/libcxxabi/cmake/config-ix.cmake +++ b/libcxxabi/cmake/config-ix.cmake @@ -81,7 +81,7 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror=unknown-pragmas") check_c_source_compiles(" #pragma comment(lib, \"c\") -int main() { return 0; } +int main(void) { return 0; } " C_SUPPORTS_COMMENT_LIB_PRAGMA) cmake_pop_check_state() endif() diff --git a/libcxxabi/test/catch_reference_nullptr.pass.cpp b/libcxxabi/test/catch_reference_nullptr.pass.cpp index 708d5d798a1d1..e9c3ba31b06b7 100644 --- a/libcxxabi/test/catch_reference_nullptr.pass.cpp +++ b/libcxxabi/test/catch_reference_nullptr.pass.cpp @@ -6,11 +6,13 @@ // //===----------------------------------------------------------------------===// -// UNSUPPORTED: c++03, +// UNSUPPORTED: c++03 // UNSUPPORTED: no-exceptions #include +#include #include +#include struct A {}; @@ -27,13 +29,13 @@ static void catch_nullptr_test() { int main(int, char**) { - using nullptr_t = decltype(nullptr); + static_assert(std::is_same::value, ""); // A reference to nullptr_t can catch nullptr. - catch_nullptr_test(); - catch_nullptr_test(); - catch_nullptr_test(); - catch_nullptr_test(); + catch_nullptr_test(); + catch_nullptr_test(); + catch_nullptr_test(); + catch_nullptr_test(); // No other reference type can. #if 0 diff --git a/libunwind/cmake/config-ix.cmake b/libunwind/cmake/config-ix.cmake index 96cb8afcb485c..d311477f02c69 100644 --- a/libunwind/cmake/config-ix.cmake +++ b/libunwind/cmake/config-ix.cmake @@ -85,7 +85,7 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror=unknown-pragmas") check_c_source_compiles(" #pragma comment(lib, \"c\") -int main() { return 0; } +int main(void) { return 0; } " C_SUPPORTS_COMMENT_LIB_PRAGMA) cmake_pop_check_state() endif() diff --git a/lld/COFF/CMakeLists.txt b/lld/COFF/CMakeLists.txt index d289bd5910348..55aec26854c8d 100644 --- a/lld/COFF/CMakeLists.txt +++ b/lld/COFF/CMakeLists.txt @@ -44,6 +44,7 @@ add_lld_library(lldCOFF LINK_LIBS lldCommon ${LLVM_PTHREAD_LIB} + ${LLVM_ATOMIC_LIB} DEPENDS COFFOptionsTableGen diff --git a/lld/Common/CMakeLists.txt b/lld/Common/CMakeLists.txt index 1ae7da1f5f7f0..9c23ed3952235 100644 --- a/lld/Common/CMakeLists.txt +++ b/lld/Common/CMakeLists.txt @@ -1,9 +1,3 @@ -set(LLD_SYSTEM_LIBS ${LLVM_PTHREAD_LIB}) - -if(NOT HAVE_CXX_ATOMICS64_WITHOUT_LIB) - list(APPEND LLD_SYSTEM_LIBS atomic) -endif() - find_first_existing_vc_file("${LLVM_MAIN_SRC_DIR}" llvm_vc) find_first_existing_vc_file("${LLD_SOURCE_DIR}" lld_vc) @@ -54,7 +48,8 @@ add_lld_library(lldCommon Target LINK_LIBS - ${LLD_SYSTEM_LIBS} + ${LLVM_PTHREAD_LIB} + ${LLVM_ATOMIC_LIB} DEPENDS intrinsics_gen diff --git a/lld/ELF/AArch64ErrataFix.cpp b/lld/ELF/AArch64ErrataFix.cpp index 41a29a7328c28..2d10bedfcb2eb 100644 --- a/lld/ELF/AArch64ErrataFix.cpp +++ b/lld/ELF/AArch64ErrataFix.cpp @@ -55,7 +55,7 @@ static bool isADRP(uint32_t instr) { return (instr & 0x9f000000) == 0x90000000; } -// Load and store bit patterns from ARMv8-A ARM ARM. +// Load and store bit patterns from ARMv8-A. // Instructions appear in order of appearance starting from table in // C4.1.3 Loads and Stores. diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index b3dbd5fa65d84..79ff9614fdb9f 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1249,7 +1249,7 @@ static void readConfigs(opt::InputArgList &args) { config->trace = args.hasArg(OPT_trace); config->undefined = args::getStrings(args, OPT_undefined); config->undefinedVersion = - args.hasFlag(OPT_undefined_version, OPT_no_undefined_version, true); + args.hasFlag(OPT_undefined_version, OPT_no_undefined_version, false); config->unique = args.hasArg(OPT_unique); config->useAndroidRelrTags = args.hasFlag( OPT_use_android_relr_tags, OPT_no_use_android_relr_tags, false); diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td index 2e9e057a09615..a8fc63e6a9196 100644 --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -443,7 +443,7 @@ defm unresolved_symbols: Eq<"unresolved-symbols", "Determine how to handle unresolved symbols">; defm undefined_version: B<"undefined-version", - "Allow unused version in version script (default)", + "Allow unused version in version script (disabled by default)", "Report version scripts that refer undefined symbols">; defm rsp_quoting: EEq<"rsp-quoting", "Quoting style for response files">, diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index 12cfc97bf5848..316d89411cdad 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -2835,6 +2835,8 @@ createSymbols( // Returns a newly-created .gdb_index section. template GdbIndexSection *GdbIndexSection::create() { + llvm::TimeTraceScope timeScope("Create gdb index"); + // Collect InputFiles with .debug_info. See the comment in // LLDDwarfObj::LLDDwarfObj. If we do lightweight parsing in the future, // note that isec->data() may uncompress the full content, which should be @@ -3651,7 +3653,7 @@ size_t PPC64LongBranchTargetSection::getSize() const { void PPC64LongBranchTargetSection::writeTo(uint8_t *buf) { // If linking non-pic we have the final addresses of the targets and they get // written to the table directly. For pic the dynamic linker will allocate - // the section and fill it it. + // the section and fill it. if (config->isPic) return; diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 918490b972efb..2d99f6d6f7d5e 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -1794,10 +1794,9 @@ static void removeUnusedSyntheticSections() { // all regular ones. Reverse iterate to find the first synthetic section // after a non-synthetic one which will be our starting point. auto start = - std::find_if( - ctx.inputSections.rbegin(), ctx.inputSections.rend(), - [](InputSectionBase *s) { return !isa(s); }) - .base(); + llvm::find_if(llvm::reverse(ctx.inputSections), [](InputSectionBase *s) { + return !isa(s); + }).base(); // Remove unused synthetic sections from ctx.inputSections; DenseSet unused; diff --git a/lld/MachO/OutputSegment.cpp b/lld/MachO/OutputSegment.cpp index be541d29f19e8..3b28dfd306c38 100644 --- a/lld/MachO/OutputSegment.cpp +++ b/lld/MachO/OutputSegment.cpp @@ -125,6 +125,7 @@ static int sectionOrder(OutputSection *osec) { } } else if (segname == segment_names::linkEdit) { return StringSwitch(osec->name) + .Case(section_names::chainFixups, -11) .Case(section_names::rebase, -10) .Case(section_names::binding, -9) .Case(section_names::weakBinding, -8) diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index 141f34103da15..0157e40fa6612 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -33,6 +33,9 @@ ELF Improvements (`D133548 `_) * ``--no-warnings``/``-w`` is now available to suppress warnings. (`D136569 `_) +* ``--no-undefined-version`` is now the default; symbols named in version + scripts that have no matching symbol in the output will be reported. Use + ``--undefined-version`` to revert to the old behavior. Breaking changes ---------------- diff --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1 index 2b530af39ad64..edeb7c4bfe37c 100644 --- a/lld/docs/ld.lld.1 +++ b/lld/docs/ld.lld.1 @@ -352,8 +352,8 @@ Do not set the text data sections to be writable, page align sections. Disable target-specific relaxations. For x86-64 this disables R_X86_64_GOTPCRELX and R_X86_64_REX_GOTPCRELX GOT optimization. .It Fl -no-rosegment Do not put read-only non-executable sections in their own segment. -.It Fl -no-undefined-version -Report version scripts that refer undefined symbols. +.It Fl -undefined-version +Do not report version scripts that refer to undefined symbols. .It Fl -no-undefined Report unresolved symbols even if the linker is creating a shared library. .It Fl -no-warn-symbol-ordering diff --git a/lld/test/ELF/basic.s b/lld/test/ELF/basic.s index 6b17bf75ee7bb..587fd1641500a 100644 --- a/lld/test/ELF/basic.s +++ b/lld/test/ELF/basic.s @@ -6,7 +6,7 @@ # RUN: | FileCheck %s # RUN: ld.lld %t -o /dev/null -# exits with return code 42 on linux +## exits with return code 42 on linux .globl _start _start: mov $60, %rax @@ -201,23 +201,29 @@ _start: # CHECK-NEXT: } # CHECK-NEXT: ] -# Test for the response file (POSIX quoting style) +## Test for the response file (POSIX quoting style) # RUN: echo " -o %t2" > %t.responsefile # RUN: ld.lld %t --rsp-quoting=posix @%t.responsefile # RUN: llvm-readobj --file-headers --sections -l --symbols %t2 \ # RUN: | FileCheck %s -# Test for the response file (Windows quoting style) +## Test for the response file (Windows quoting style) # RUN: echo " c:\blah\foo" > %t.responsefile # RUN: not ld.lld --rsp-quoting=windows %t @%t.responsefile 2>&1 | FileCheck \ # RUN: %s --check-prefix=WINRSP # WINRSP: cannot open c:\blah\foo -# Test for the response file (invalid quoting style) +## Test for the response file (invalid quoting style) # RUN: not ld.lld --rsp-quoting=patatino %t 2>&1 | FileCheck %s \ # RUN: --check-prefix=INVRSP # INVRSP: invalid response file quoting: patatino +## Test erroring on a recursive response file, but only once. +# RUN: echo @%t.responsefile > %t.responsefile +# RUN: not ld.lld %t @%t.responsefile 2>&1 | FileCheck %s --check-prefix=RECRSP +# RECRSP: recursive expansion of: '{{.*}}.responsefile' +# RECRSP-NOT: recursive expansion of + # RUN: not ld.lld %t.foo -o /dev/null 2>&1 | \ # RUN: FileCheck -DMSG=%errc_ENOENT --check-prefix=MISSING %s # MISSING: cannot open {{.*}}.foo: [[MSG]] diff --git a/lld/test/ELF/verdef-defaultver.s b/lld/test/ELF/verdef-defaultver.s index 7becdcf96422b..661f6c4e7da42 100644 --- a/lld/test/ELF/verdef-defaultver.s +++ b/lld/test/ELF/verdef-defaultver.s @@ -4,7 +4,7 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %p/Inputs/verdef-defaultver.s -o %t1 # RUN: echo "V1 { global: a; b; local: *; };" > %t.script # RUN: echo "V2 { global: b; c; } V1;" >> %t.script -# RUN: ld.lld --hash-style=sysv -shared -soname shared %t1 --version-script %t.script -o %t.so +# RUN: ld.lld --hash-style=sysv -shared -soname shared %t1 --version-script %t.script --undefined-version -o %t.so # RUN: llvm-readobj -V --dyn-syms %t.so | FileCheck --check-prefix=DSO %s # DSO: DynamicSymbols [ @@ -195,9 +195,9 @@ # EXE-NEXT: ] # RUN: llvm-mc -filetype=obj -triple=x86_64 b.s -o b.o -# RUN: ld.lld -shared --version-script=%t.script --fatal-warnings %t.so b.o -o b.so +# RUN: ld.lld -shared --version-script=%t.script --fatal-warnings --undefined-version %t.so b.o -o b.so # RUN: llvm-readelf --dyn-syms b.so | FileCheck %s --check-prefix=PREEMPT -# RUN: ld.lld -shared --version-script=%t.script --fatal-warnings b.o %t.so -o b.so +# RUN: ld.lld -shared --version-script=%t.script --fatal-warnings --undefined-version b.o %t.so -o b.so # RUN: llvm-readelf --dyn-syms b.so | FileCheck %s --check-prefix=PREEMPT # PREEMPT-DAG: a@@V1 diff --git a/lld/test/ELF/verdef-dependency.s b/lld/test/ELF/verdef-dependency.s index d716436202535..89ebc3043ad44 100644 --- a/lld/test/ELF/verdef-dependency.s +++ b/lld/test/ELF/verdef-dependency.s @@ -3,7 +3,7 @@ # RUN: echo "LIBSAMPLE_1.0 { global: a; local: *; };" > %t.script # RUN: echo "LIBSAMPLE_2.0 { global: b; local: *; } LIBSAMPLE_1.0;" >> %t.script # RUN: echo "LIBSAMPLE_3.0 { global: c; } LIBSAMPLE_2.0;" >> %t.script -# RUN: ld.lld --version-script %t.script -shared -soname shared %t.o -o %t.so +# RUN: ld.lld --version-script %t.script --undefined-version -shared -soname shared %t.o -o %t.so # RUN: llvm-readobj -V --dyn-syms %t.so | FileCheck --check-prefix=DSO %s # DSO: VersionDefinitions [ diff --git a/lld/test/ELF/verneed.s b/lld/test/ELF/verneed.s index 6a90cc48e68fb..734387a62785f 100644 --- a/lld/test/ELF/verneed.s +++ b/lld/test/ELF/verneed.s @@ -1,9 +1,9 @@ # REQUIRES: x86 # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %S/Inputs/verneed1.s -o %t1.o # RUN: echo "v1 {}; v2 {}; v3 { global: f1; local: *; };" > %t.script -# RUN: ld.lld -shared %t1.o --version-script %t.script -o %t1.so -soname verneed1.so.0 +# RUN: ld.lld -shared %t1.o --version-script %t.script --undefined-version -o %t1.so -soname verneed1.so.0 # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %S/Inputs/verneed2.s -o %t2.o -# RUN: ld.lld -shared %t2.o --version-script %t.script -o %t2.so -soname verneed2.so.0 +# RUN: ld.lld -shared %t2.o --version-script %t.script --undefined-version -o %t2.so -soname verneed2.so.0 # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o # RUN: ld.lld --hash-style=sysv %t.o %t1.so %t2.so -o %t diff --git a/lld/test/ELF/version-script-extern-undefined.s b/lld/test/ELF/version-script-extern-undefined.s index 58b4d2e0fe53f..38114229e0ce3 100644 --- a/lld/test/ELF/version-script-extern-undefined.s +++ b/lld/test/ELF/version-script-extern-undefined.s @@ -2,7 +2,7 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o # RUN: echo "FOO { global: extern \"C++\" { \"abb(int)\"; }; };" > %t.script -# RUN: ld.lld --version-script %t.script -shared %t.o -o %t.so +# RUN: ld.lld --version-script %t.script --undefined-version -shared %t.o -o %t.so # RUN: llvm-readobj -V %t.so | FileCheck %s # CHECK: VersionSymbols [ diff --git a/lld/test/ELF/version-script-local-preemptible.s b/lld/test/ELF/version-script-local-preemptible.s index ffb16648dc800..033c9459fb56c 100644 --- a/lld/test/ELF/version-script-local-preemptible.s +++ b/lld/test/ELF/version-script-local-preemptible.s @@ -10,7 +10,7 @@ # RUN: echo "{ global: main; local: *; };" > %t.script # RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o -# RUN: ld.lld %t.o %t.so -o %t -version-script %t.script +# RUN: ld.lld %t.o %t.so -o %t -version-script %t.script --undefined-version # RUN: llvm-readelf -r --symbols %t | FileCheck %s # CHECK: Relocation section '.rela.plt' at offset {{.*}} contains 1 entries: diff --git a/lld/test/ELF/version-script-noundef.s b/lld/test/ELF/version-script-noundef.s index 18916b66f064e..b99fb1779f6eb 100644 --- a/lld/test/ELF/version-script-noundef.s +++ b/lld/test/ELF/version-script-noundef.s @@ -2,7 +2,8 @@ # RUN: echo "VERSION_1.0 { global: bar; };" > %t.script # RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o -# RUN: ld.lld --version-script %t.script -shared %t.o -o /dev/null --fatal-warnings +# RUN: not ld.lld --version-script %t.script -shared %t.o -o /dev/null \ +# RUN: --fatal-warnings 2>&1 | FileCheck -check-prefix=ERR1 %s # RUN: ld.lld --version-script %t.script -shared --undefined-version %t.o -o %t.so # RUN: not ld.lld --version-script %t.script -shared --no-undefined-version \ # RUN: %t.o -o %t.so 2>&1 | FileCheck -check-prefix=ERR1 %s diff --git a/lld/test/ELF/version-script-reassign.s b/lld/test/ELF/version-script-reassign.s index 2ed5b15faceda..371390019a4dd 100644 --- a/lld/test/ELF/version-script-reassign.s +++ b/lld/test/ELF/version-script-reassign.s @@ -24,7 +24,7 @@ # RUN: llvm-readelf --dyn-syms %t.so | FileCheck --check-prefix=V1-SYM %s # RUN: ld.lld -shared %t.o --version-script %t1.ver --version-script %t2w.ver \ -# RUN: -o %t.so --fatal-warnings +# RUN: -o %t.so --fatal-warnings --undefined-version # RUN: llvm-readelf --dyn-syms %t.so | FileCheck --check-prefix=V1-SYM %s # LOCAL: warning: attempt to reassign symbol 'foo' of VER_NDX_LOCAL to version 'V1' diff --git a/lld/test/MachO/linkedit-contiguity.s b/lld/test/MachO/linkedit-contiguity.s index 9cf3b500b922b..e85b312d8add2 100644 --- a/lld/test/MachO/linkedit-contiguity.s +++ b/lld/test/MachO/linkedit-contiguity.s @@ -2,7 +2,7 @@ # RUN: rm -rf %t; split-file %s %t ## codesign requires that each section in __LINKEDIT ends where the next one -## starts. This test enforces that invariant. +## starts and that they follow a certain order. This test enforces that invariant. ## It also checks that the last section in __LINKEDIT covers the last byte of ## the segment. @@ -10,9 +10,12 @@ # RUN: %lld %t/foo.o -dylib -o %t/libfoo.dylib # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o + # RUN: %lld -lSystem -adhoc_codesign -o %t/test %t/libfoo.dylib %t/test.o +# RUN: llvm-objdump --macho --all-headers %t/test | FileCheck --check-prefixes=CHECK,OPCODE %s -# RUN: llvm-objdump --macho --all-headers %t/test | FileCheck %s +# RUN: %lld -lSystem -adhoc_codesign -fixup_chains -o %t/chained_test %t/libfoo.dylib %t/test.o +# RUN: llvm-objdump --macho --all-headers %t/chained_test | FileCheck --check-prefixes=CHECK,CHAINED %s # CHECK: segname __LINKEDIT # CHECK-NEXT: vmaddr @@ -20,18 +23,28 @@ # CHECK-NEXT: fileoff [[#LINKEDIT_OFF:]] # CHECK-NEXT: filesize [[#LINKEDIT_SIZE:]] -# CHECK: cmd LC_DYLD_INFO_ONLY -# CHECK-NEXT: cmdsize 48 -# CHECK-NEXT: rebase_off [[#REBASE_OFF:]] -# CHECK-NEXT: rebase_size [[#REBASE_SIZE:]] -# CHECK-NEXT: bind_off [[#BIND_OFF: REBASE_OFF + REBASE_SIZE]] -# CHECK-NEXT: bind_size [[#BIND_SIZE:]] -# CHECK-NEXT: weak_bind_off [[#WEAK_OFF: BIND_OFF + BIND_SIZE]] -# CHECK-NEXT: weak_bind_size [[#WEAK_SIZE:]] -# CHECK-NEXT: lazy_bind_off [[#LAZY_OFF: WEAK_OFF + WEAK_SIZE]] -# CHECK-NEXT: lazy_bind_size [[#LAZY_SIZE:]] -# CHECK-NEXT: export_off [[#EXPORT_OFF: LAZY_OFF + LAZY_SIZE]] -# CHECK-NEXT: export_size [[#EXPORT_SIZE:]] +# OPCODE: cmd LC_DYLD_INFO_ONLY +# OPCODE-NEXT: cmdsize 48 +# OPCODE-NEXT: rebase_off [[#REBASE_OFF:]] +# OPCODE-NEXT: rebase_size [[#REBASE_SIZE:]] +# OPCODE-NEXT: bind_off [[#BIND_OFF: REBASE_OFF + REBASE_SIZE]] +# OPCODE-NEXT: bind_size [[#BIND_SIZE:]] +# OPCODE-NEXT: weak_bind_off [[#WEAK_OFF: BIND_OFF + BIND_SIZE]] +# OPCODE-NEXT: weak_bind_size [[#WEAK_SIZE:]] +# OPCODE-NEXT: lazy_bind_off [[#LAZY_OFF: WEAK_OFF + WEAK_SIZE]] +# OPCODE-NEXT: lazy_bind_size [[#LAZY_SIZE:]] +# OPCODE-NEXT: export_off [[#EXPORT_OFF: LAZY_OFF + LAZY_SIZE]] +# OPCODE-NEXT: export_size [[#EXPORT_SIZE:]] + +# CHAINED: cmd LC_DYLD_CHAINED_FIXUPS +# CHAINED-NEXT: cmdsize +# CHAINED-NEXT: dataoff [[#FIXUPS_OFF: LINKEDIT_OFF]] +# CHAINED-NEXT: datasize [[#FIXUPS_SIZE:]] + +# CHAINED: cmd LC_DYLD_EXPORTS_TRIE +# CHAINED-NEXT: cmdsize +# CHAINED-NEXT: dataoff [[#EXPORT_OFF: FIXUPS_OFF + FIXUPS_SIZE]] +# CHAINED-NEXT: datasize [[#EXPORT_SIZE:]] # CHECK: cmd LC_FUNCTION_STARTS # CHECK-NEXT: cmdsize diff --git a/lld/test/wasm/function-imports-first.ll b/lld/test/wasm/function-imports-first.ll index 7552fb428b0ba..b4e984092f82d 100644 --- a/lld/test/wasm/function-imports-first.ll +++ b/lld/test/wasm/function-imports-first.ll @@ -1,5 +1,5 @@ ; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ret32.s -o %t.ret32.o -; RUN: llc -filetype=obj %s -o %t.o +; RUN: llc -mcpu=mvp -filetype=obj %s -o %t.o ; RUN: wasm-ld -o %t.wasm %t.o %t.ret32.o ; RUN: obj2yaml %t.wasm | FileCheck %s diff --git a/lld/test/wasm/gc-sections.ll b/lld/test/wasm/gc-sections.ll index de8298697bf12..762933d0ef44b 100644 --- a/lld/test/wasm/gc-sections.ll +++ b/lld/test/wasm/gc-sections.ll @@ -1,4 +1,4 @@ -; RUN: llc -filetype=obj %s -o %t.o +; RUN: llc -mcpu=mvp -filetype=obj %s -o %t.o ; RUN: yaml2obj %S/Inputs/globals.yaml -o %t_globals.o ; RUN: wasm-ld -print-gc-sections -o %t1.wasm %t.o %t_globals.o | \ ; RUN: FileCheck %s -check-prefix=PRINT-GC diff --git a/lld/test/wasm/init-fini.ll b/lld/test/wasm/init-fini.ll index 5631d58d68e5c..15154f36d3d3b 100644 --- a/lld/test/wasm/init-fini.ll +++ b/lld/test/wasm/init-fini.ll @@ -1,5 +1,5 @@ -; RUN: llc -filetype=obj -o %t.o %s -; RUN: llc -filetype=obj %S/Inputs/global-ctor-dtor.ll -o %t.global-ctor-dtor.o +; RUN: llc -mcpu=mvp -filetype=obj -o %t.o %s +; RUN: llc -mcpu=mvp -filetype=obj %S/Inputs/global-ctor-dtor.ll -o %t.global-ctor-dtor.o target triple = "wasm32-unknown-unknown" diff --git a/lld/test/wasm/local-symbols.ll b/lld/test/wasm/local-symbols.ll index f504603045858..93ed3c9d4eee7 100644 --- a/lld/test/wasm/local-symbols.ll +++ b/lld/test/wasm/local-symbols.ll @@ -1,5 +1,5 @@ ; Test that internal symbols can still be GC'd when with --export-dynamic. -; RUN: llc -filetype=obj %s -o %t.o +; RUN: llc -mcpu=mvp -filetype=obj %s -o %t.o ; RUN: wasm-ld --export-dynamic -o %t.wasm %t.o ; RUN: obj2yaml %t.wasm | FileCheck %s diff --git a/lld/test/wasm/locals-duplicate.test b/lld/test/wasm/locals-duplicate.test index 7de8ef15b1840..5c3135a424e69 100644 --- a/lld/test/wasm/locals-duplicate.test +++ b/lld/test/wasm/locals-duplicate.test @@ -1,5 +1,5 @@ -; RUN: llc -filetype=obj %p/Inputs/locals-duplicate1.ll -o %t1.o -; RUN: llc -filetype=obj %p/Inputs/locals-duplicate2.ll -o %t2.o +; RUN: llc -mcpu=mvp -filetype=obj %p/Inputs/locals-duplicate1.ll -o %t1.o +; RUN: llc -mcpu=mvp -filetype=obj %p/Inputs/locals-duplicate2.ll -o %t2.o ; RUN: wasm-ld --export-dynamic --no-entry -o %t.wasm %t1.o %t2.o ; RUN: obj2yaml %t.wasm | FileCheck %s diff --git a/lld/test/wasm/reproduce.ll b/lld/test/wasm/reproduce.ll deleted file mode 100644 index b00727e3b3e26..0000000000000 --- a/lld/test/wasm/reproduce.ll +++ /dev/null @@ -1,27 +0,0 @@ -; REQUIRES: shell -; RUN: rm -rf %t.dir -; RUN: mkdir -p %t.dir -; RUN: llc -filetype=obj %s -o %t.dir/foo.o -; RUN: wasm-ld --reproduce=%t.dir/repro.tar -o %t.dir/out.wasm %t.dir/foo.o - -; RUN: cd %t.dir -; RUN: tar tf repro.tar | FileCheck --check-prefix=TAR %s - -; TAR: repro/response.txt -; TAR: repro/version.txt -; TAR: repro/{{.*}}/foo.o - -; RUN: tar xf repro.tar -; RUN: FileCheck --check-prefix=RSP %s < repro/response.txt - -; RSP: -o {{.*}}out.wasm -; RSP: {{.*}}/foo.o - -; RUN: FileCheck %s --check-prefix=VERSION < repro/version.txt -; VERSION: LLD - -target triple = "wasm32-unknown-unknown" - -define void @_start() { - ret void -} diff --git a/lld/test/wasm/reproduce.s b/lld/test/wasm/reproduce.s new file mode 100644 index 0000000000000..a89843fe219e3 --- /dev/null +++ b/lld/test/wasm/reproduce.s @@ -0,0 +1,32 @@ +# REQUIRES: shell +# RUN: rm -rf %t.dir +# RUN: mkdir -p %t.dir +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.dir/foo.o %s +# RUN: wasm-ld --reproduce=%t.dir/repro.tar -o %t.dir/out.wasm %t.dir/foo.o +# RUN: env LLD_REPRODUCE=%t.dir/repro2.tar wasm-ld -o %t.dir/out.wasm %t.dir/foo.o + +# RUN: cd %t.dir +# RUN: tar tf repro.tar | FileCheck --check-prefix=TAR %s +# RUN: tar tf repro2.tar | FileCheck --check-prefix=TAR2 %s + +# TAR: repro/response.txt +# TAR: repro/version.txt +# TAR: repro/{{.*}}/foo.o + +# TAR2: repro2/response.txt +# TAR2: repro2/version.txt +# TAR2: repro2/{{.*}}/foo.o + +# RUN: tar xf repro.tar +# RUN: FileCheck --check-prefix=RSP %s < repro/response.txt + +# RSP: -o {{.*}}out.wasm +# RSP: {{.*}}/foo.o + +# RUN: FileCheck %s --check-prefix=VERSION < repro/version.txt +# VERSION: LLD + +.globl _start +_start: + .functype _start () -> () + end_function diff --git a/lld/test/wasm/signature-mismatch-export.ll b/lld/test/wasm/signature-mismatch-export.ll index 1d5e2a77a6c49..b77b5092a092a 100644 --- a/lld/test/wasm/signature-mismatch-export.ll +++ b/lld/test/wasm/signature-mismatch-export.ll @@ -1,5 +1,5 @@ ; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ret32.s -o %t.ret32.o -; RUN: llc -filetype=obj %s -o %t.main.o +; RUN: llc -mcpu=mvp -filetype=obj %s -o %t.main.o ; RUN: wasm-ld --export=ret32 -o %t.wasm %t.main.o %t.ret32.o ; RUN: obj2yaml %t.wasm | FileCheck %s diff --git a/lld/test/wasm/weak-alias-overide.ll b/lld/test/wasm/weak-alias-overide.ll index fcf2293892910..ca6f4bf4230a2 100644 --- a/lld/test/wasm/weak-alias-overide.ll +++ b/lld/test/wasm/weak-alias-overide.ll @@ -1,5 +1,5 @@ -; RUN: llc -filetype=obj -o %t.o %s -; RUN: llc -filetype=obj %S/Inputs/weak-alias.ll -o %t2.o +; RUN: llc -mcpu=mvp -filetype=obj -o %t.o %s +; RUN: llc -mcpu=mvp -filetype=obj %S/Inputs/weak-alias.ll -o %t2.o ; RUN: wasm-ld --export-dynamic %t.o %t2.o -o %t.wasm ; RUN: obj2yaml %t.wasm | FileCheck %s diff --git a/lld/test/wasm/weak-alias.ll b/lld/test/wasm/weak-alias.ll index aa0a271396d1a..cba39acda8e9c 100644 --- a/lld/test/wasm/weak-alias.ll +++ b/lld/test/wasm/weak-alias.ll @@ -1,5 +1,5 @@ -; RUN: llc -filetype=obj -o %t.o %s -; RUN: llc -filetype=obj %S/Inputs/weak-alias.ll -o %t2.o +; RUN: llc -mcpu=mvp -filetype=obj -o %t.o %s +; RUN: llc -mcpu=mvp -filetype=obj %S/Inputs/weak-alias.ll -o %t2.o ; RUN: wasm-ld --export-dynamic %t.o %t2.o -o %t.wasm ; RUN: obj2yaml %t.wasm | FileCheck %s diff --git a/lld/tools/lld/lld.cpp b/lld/tools/lld/lld.cpp index b0e28d15fa29e..700c0b770e201 100644 --- a/lld/tools/lld/lld.cpp +++ b/lld/tools/lld/lld.cpp @@ -89,7 +89,9 @@ static bool isPETarget(std::vector &v) { SmallVector expandedArgs(v.data(), v.data() + v.size()); BumpPtrAllocator a; StringSaver saver(a); - cl::ExpandResponseFiles(saver, getDefaultQuotingStyle(), expandedArgs); + cl::ExpansionContext ECtx(saver.getAllocator(), getDefaultQuotingStyle()); + if (Error Err = ECtx.expandResponseFiles(expandedArgs)) + die(toString(std::move(Err))); for (auto it = expandedArgs.begin(); it + 1 != expandedArgs.end(); ++it) { if (StringRef(*it) != "-m") continue; diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp index db1ef5ffff778..62cd6192b01d8 100644 --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -618,6 +618,12 @@ static void checkOptions(opt::InputArgList &args) { } } +static const char *getReproduceOption(opt::InputArgList &args) { + if (auto *arg = args.getLastArg(OPT_reproduce)) + return arg->getValue(); + return getenv("LLD_REPRODUCE"); +} + // Force Sym to be entered in the output. Used for -u or equivalent. static Symbol *handleUndefined(StringRef name) { Symbol *sym = symtab->find(name); @@ -955,8 +961,7 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { } // Handle --reproduce - if (auto *arg = args.getLastArg(OPT_reproduce)) { - StringRef path = arg->getValue(); + if (const char *path = getReproduceOption(args)) { Expected> errOrWriter = TarWriter::create(path, path::stem(path)); if (errOrWriter) { diff --git a/lldb/examples/python/crashlog.py b/lldb/examples/python/crashlog.py index e80ecd91579ad..b62ebd758f22c 100755 --- a/lldb/examples/python/crashlog.py +++ b/lldb/examples/python/crashlog.py @@ -462,6 +462,12 @@ def parse(self): self.parse_images(self.data['usedImages']) self.parse_main_image(self.data) self.parse_threads(self.data['threads']) + if 'asi' in self.data: + self.crashlog.asi = self.data['asi'] + if 'asiBacktraces' in self.data: + self.parse_app_specific_backtraces(self.data['asiBacktraces']) + if 'lastExceptionBacktrace' in self.data: + self.crashlog.asb = self.data['lastExceptionBacktrace'] self.parse_errors(self.data) thread = self.crashlog.threads[self.crashlog.crashed_thread_idx] reason = self.parse_crash_reason(self.data['exception']) @@ -573,6 +579,31 @@ def parse_threads(self, json_threads): self.crashlog.threads.append(thread) idx += 1 + def parse_asi_backtrace(self, thread, bt): + for line in bt.split('\n'): + frame_match = TextCrashLogParser.frame_regex.search(line) + if not frame_match: + print("error: can't parse application specific backtrace.") + return False + + (frame_id, frame_img_name, frame_addr, + frame_ofs) = frame_match.groups() + + thread.add_ident(frame_img_name) + if frame_img_name not in self.crashlog.idents: + self.crashlog.idents.append(frame_img_name) + thread.frames.append(self.crashlog.Frame(int(frame_id), int( + frame_addr, 0), frame_ofs)) + + return True + + def parse_app_specific_backtraces(self, json_app_specific_bts): + for idx, backtrace in enumerate(json_app_specific_bts): + thread = self.crashlog.Thread(idx, True) + thread.queue = "Application Specific Backtrace" + if self.parse_asi_backtrace(thread, backtrace): + self.crashlog.threads.append(thread) + def parse_thread_registers(self, json_thread_state, prefix=None): registers = dict() for key, state in json_thread_state.items(): @@ -613,17 +644,17 @@ class TextCrashLogParser(CrashLogParser): frame_regex = re.compile(r'^(\d+)\s+' # id r'(.+?)\s+' # img_name r'(?:' +version+ r'\s+)?' # img_version - r'(0x[0-9a-fA-F]{7,})' # addr (7 chars or more) - r' +(.*)' # offs + r'(0x[0-9a-fA-F]{4,})' # addr (4 chars or more) + r'(?: +(.*))?' # offs ) - null_frame_regex = re.compile(r'^\d+\s+\?\?\?\s+0{7,} +') + null_frame_regex = re.compile(r'^\d+\s+\?\?\?\s+0{4,} +') image_regex_uuid = re.compile(r'(0x[0-9a-fA-F]+)' # img_lo r'\s+-\s+' # - r'(0x[0-9a-fA-F]+)\s+' # img_hi r'[+]?(.+?)\s+' # img_name r'(?:(' +version+ r')\s+)?' # img_version r'(?:<([-0-9a-fA-F]+)>\s+)?' # img_uuid - r'(/.*)' # img_path + r'(\?+|/.*)' # img_path ) exception_type_regex = re.compile(r'^Exception Type:\s+(EXC_[A-Z_]+)(?:\s+\((.*)\))?') exception_codes_regex = re.compile(r'^Exception Codes:\s+(0x[0-9a-fA-F]+),\s*(0x[0-9a-fA-F]+)') @@ -1073,7 +1104,7 @@ def load_crashlog_in_scripted_process(debugger, crash_log_file, options, result) raise InteractiveCrashLogException("couldn't import crashlog scripted process module") structured_data = lldb.SBStructuredData() - structured_data.SetFromJSON(json.dumps({ "crashlog_path" : crashlog_path, + structured_data.SetFromJSON(json.dumps({ "file_path" : crashlog_path, "load_all_images": options.load_all_images })) launch_info = lldb.SBLaunchInfo(None) launch_info.SetProcessPluginName("ScriptedProcess") @@ -1102,8 +1133,8 @@ def synchronous(debugger): run_options.SetEchoCommands(True) commands_stream = lldb.SBStream() - commands_stream.Print("process status\n") - commands_stream.Print("thread backtrace\n") + commands_stream.Print("process status --verbose\n") + commands_stream.Print("thread backtrace --extended true\n") error = debugger.SetInputString(commands_stream.GetData()) if error.Success(): debugger.RunCommandInterpreter(True, False, run_options, 0, False, True) diff --git a/lldb/examples/python/scripted_process/crashlog_scripted_process.py b/lldb/examples/python/scripted_process/crashlog_scripted_process.py index e64b9b7822af1..7ed5cc930da72 100644 --- a/lldb/examples/python/scripted_process/crashlog_scripted_process.py +++ b/lldb/examples/python/scripted_process/crashlog_scripted_process.py @@ -18,6 +18,11 @@ def parse_crashlog(self): self.crashed_thread_idx = crash_log.crashed_thread_idx self.loaded_images = [] self.exception = crash_log.exception + self.app_specific_thread = None + if hasattr(crash_log, 'asi'): + self.metadata['asi'] = crash_log.asi + if hasattr(crash_log, 'asb'): + self.extended_thread_info = crash_log.asb def load_images(self, images): #TODO: Add to self.loaded_images and load images in lldb @@ -40,8 +45,23 @@ def load_images(self, images): for ident in thread.idents: load_images(self, crash_log.find_images_with_identifier(ident)) + if hasattr(thread, 'app_specific_backtrace') and thread.app_specific_backtrace: + # We don't want to include the Application Specific Backtrace + # Thread into the Scripted Process' Thread list. + # Instead, we will try to extract the stackframe pcs from the + # backtrace and inject that as the extended thread info. + self.app_specific_thread = thread + continue + self.threads[thread.index] = CrashLogScriptedThread(self, None, thread) + + if self.app_specific_thread: + self.extended_thread_info = \ + CrashLogScriptedThread.resolve_stackframes(self.app_specific_thread, + self.addr_mask, + self.target) + def __init__(self, target: lldb.SBTarget, args : lldb.SBStructuredData): super().__init__(target, args) @@ -51,7 +71,7 @@ def __init__(self, target: lldb.SBTarget, args : lldb.SBStructuredData): self.crashlog_path = None - crashlog_path = args.GetValueForKey("crashlog_path") + crashlog_path = args.GetValueForKey("file_path") if crashlog_path and crashlog_path.IsValid(): if crashlog_path.GetType() == lldb.eStructuredDataTypeString: self.crashlog_path = crashlog_path.GetStringValue(4096) @@ -71,6 +91,7 @@ def __init__(self, target: lldb.SBTarget, args : lldb.SBStructuredData): self.pid = super().get_process_id() self.crashed_thread_idx = 0 self.exception = None + self.extended_thread_info = None self.parse_crashlog() def get_memory_region_containing_address(self, addr: int) -> lldb.SBMemoryRegionInfo: @@ -103,6 +124,9 @@ def is_alive(self) -> bool: def get_scripted_thread_plugin(self): return CrashLogScriptedThread.__module__ + "." + CrashLogScriptedThread.__name__ + def get_process_metadata(self): + return self.metadata + class CrashLogScriptedThread(ScriptedThread): def create_register_ctx(self): if not self.has_crashed: @@ -120,6 +144,19 @@ def create_register_ctx(self): return self.register_ctx + def resolve_stackframes(thread, addr_mask, target): + frames = [] + for frame in thread.frames: + frame_pc = frame.pc & addr_mask + pc = frame_pc if frame.index == 0 or frame_pc == 0 else frame_pc - 1 + sym_addr = lldb.SBAddress() + sym_addr.SetLoadAddress(pc, target) + if not sym_addr.IsValid(): + continue + frames.append({"idx": frame.index, "pc": pc}) + return frames + + def create_stackframes(self): if not (self.scripted_process.load_all_images or self.has_crashed): return None @@ -127,14 +164,9 @@ def create_stackframes(self): if not self.backing_thread or not len(self.backing_thread.frames): return None - for frame in self.backing_thread.frames: - frame_pc = frame.pc & self.scripted_process.addr_mask - pc = frame_pc if frame.index == 0 or frame_pc == 0 else frame_pc - 1 - sym_addr = lldb.SBAddress() - sym_addr.SetLoadAddress(pc, self.target) - if not sym_addr.IsValid(): - continue - self.frames.append({"idx": frame.index, "pc": pc}) + self.frames = CrashLogScriptedThread.resolve_stackframes(self.backing_thread, + self.scripted_process.addr_mask, + self.target) return self.frames @@ -144,7 +176,10 @@ def __init__(self, process, args, crashlog_thread): self.backing_thread = crashlog_thread self.idx = self.backing_thread.index self.tid = self.backing_thread.id - self.name = self.backing_thread.name + if self.backing_thread.app_specific_backtrace: + self.name = "Application Specific Backtrace - " + str(self.idx) + else: + self.name = self.backing_thread.name self.queue = self.backing_thread.queue self.has_crashed = (self.scripted_process.crashed_thread_idx == self.idx) self.create_stackframes() @@ -168,3 +203,9 @@ def get_register_context(self) -> str: self.register_ctx = self.create_register_ctx() return struct.pack("{}Q".format(len(self.register_ctx)), *self.register_ctx.values()) + + def get_extended_info(self): + if (self.has_crashed): + self.extended_info = self.scripted_process.extended_thread_info + return self.extended_info + diff --git a/lldb/examples/python/scripted_process/scripted_process.py b/lldb/examples/python/scripted_process/scripted_process.py index 48966f8385cb0..43eb97dbd7723 100644 --- a/lldb/examples/python/scripted_process/scripted_process.py +++ b/lldb/examples/python/scripted_process/scripted_process.py @@ -18,6 +18,7 @@ class ScriptedProcess(metaclass=ABCMeta): stack_memory_dump = None loaded_images = None threads = None + metadata = None @abstractmethod def __init__(self, target, args): @@ -41,6 +42,7 @@ def __init__(self, target, args): self.args = args self.threads = {} self.loaded_images = [] + self.metadata = {} @abstractmethod def get_memory_region_containing_address(self, addr): @@ -138,7 +140,6 @@ def get_process_id(self): """ return 0 - def launch(self): """ Simulate the scripted process launch. @@ -191,6 +192,15 @@ def get_scripted_thread_plugin(self): """ return None + def get_process_metadata(self): + """ Get some metadata for the scripted process. + + Returns: + Dict: A dictionary containing metadata for the scripted process. + None is the process as no metadata. + """ + return self.metadata + class ScriptedThread(metaclass=ABCMeta): """ @@ -226,6 +236,7 @@ def __init__(self, scripted_process, args): self.register_info = None self.register_ctx = {} self.frames = [] + self.extended_info = [] if isinstance(scripted_process, ScriptedProcess): self.target = scripted_process.target @@ -334,6 +345,15 @@ def get_register_context(self): """ pass + def get_extended_info(self): + """ Get scripted thread extended information. + + Returns: + List: A list containing the extended information for the scripted process. + None is the thread as no extended information. + """ + return self.extended_info + ARM64_GPR = [ {'name': 'x0', 'bitsize': 64, 'offset': 0, 'encoding': 'uint', 'format': 'hex', 'set': 0, 'gcc': 0, 'dwarf': 0, 'generic': 'arg0', 'alt-name': 'arg0'}, {'name': 'x1', 'bitsize': 64, 'offset': 8, 'encoding': 'uint', 'format': 'hex', 'set': 0, 'gcc': 1, 'dwarf': 1, 'generic': 'arg1', 'alt-name': 'arg1'}, {'name': 'x2', 'bitsize': 64, 'offset': 16, 'encoding': 'uint', 'format': 'hex', 'set': 0, 'gcc': 2, 'dwarf': 2, 'generic': 'arg2', 'alt-name': 'arg2'}, diff --git a/lldb/include/lldb/Core/Module.h b/lldb/include/lldb/Core/Module.h index e877a14dcda10..523e04c6e6b4c 100644 --- a/lldb/include/lldb/Core/Module.h +++ b/lldb/include/lldb/Core/Module.h @@ -29,6 +29,7 @@ #include "lldb/lldb-types.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Chrono.h" @@ -814,6 +815,8 @@ class Module : public std::enable_shared_from_this, llvm::Expected GetTypeSystemForLanguage(lldb::LanguageType language); + void ForEachTypeSystem(llvm::function_ref callback); + // Special error functions that can do printf style formatting that will // prepend the message with something appropriate for this module (like the // architecture, path and object name (if any)). This centralizes code so diff --git a/lldb/include/lldb/Core/StructuredDataImpl.h b/lldb/include/lldb/Core/StructuredDataImpl.h index e755c53aaa9f6..16dbc5263b285 100644 --- a/lldb/include/lldb/Core/StructuredDataImpl.h +++ b/lldb/include/lldb/Core/StructuredDataImpl.h @@ -80,7 +80,7 @@ class StructuredDataImpl { error.SetErrorString("No data to describe."); return error; } - m_data_sp->Dump(stream, true); + m_data_sp->GetDescription(stream); return error; } // Get the data's description. diff --git a/lldb/include/lldb/Interpreter/CommandInterpreter.h b/lldb/include/lldb/Interpreter/CommandInterpreter.h index 255f50099ebb9..a72800b5409ca 100644 --- a/lldb/include/lldb/Interpreter/CommandInterpreter.h +++ b/lldb/include/lldb/Interpreter/CommandInterpreter.h @@ -559,6 +559,9 @@ class CommandInterpreter : public Broadcaster, bool GetSaveSessionOnQuit() const; void SetSaveSessionOnQuit(bool enable); + bool GetOpenTranscriptInEditor() const; + void SetOpenTranscriptInEditor(bool enable); + FileSpec GetSaveSessionDirectory() const; void SetSaveSessionDirectory(llvm::StringRef path); diff --git a/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h b/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h index 905623e575f71..2795c2f487dff 100644 --- a/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h +++ b/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h @@ -24,7 +24,7 @@ class ScriptedProcessInterface : virtual public ScriptedInterface { CreatePluginObject(llvm::StringRef class_name, ExecutionContext &exe_ctx, StructuredData::DictionarySP args_sp, StructuredData::Generic *script_obj = nullptr) override { - return nullptr; + return {}; } virtual Status Launch() { return Status("ScriptedProcess did not launch"); } @@ -41,22 +41,22 @@ class ScriptedProcessInterface : virtual public ScriptedInterface { return {}; } - virtual StructuredData::DictionarySP GetThreadsInfo() { return nullptr; } + virtual StructuredData::DictionarySP GetThreadsInfo() { return {}; } virtual StructuredData::DictionarySP GetThreadWithID(lldb::tid_t tid) { - return nullptr; + return {}; } virtual StructuredData::DictionarySP GetRegistersForThread(lldb::tid_t tid) { - return nullptr; + return {}; } virtual lldb::DataExtractorSP ReadMemoryAtAddress(lldb::addr_t address, size_t size, Status &error) { - return nullptr; + return {}; } - virtual StructuredData::ArraySP GetLoadedImages() { return nullptr; } + virtual StructuredData::ArraySP GetLoadedImages() { return {}; } virtual lldb::pid_t GetProcessID() { return LLDB_INVALID_PROCESS_ID; } @@ -66,10 +66,12 @@ class ScriptedProcessInterface : virtual public ScriptedInterface { return llvm::None; } + virtual StructuredData::DictionarySP GetMetadata() { return {}; } + protected: friend class ScriptedThread; virtual lldb::ScriptedThreadInterfaceSP CreateScriptedThreadInterface() { - return nullptr; + return {}; } }; @@ -79,7 +81,7 @@ class ScriptedThreadInterface : virtual public ScriptedInterface { CreatePluginObject(llvm::StringRef class_name, ExecutionContext &exe_ctx, StructuredData::DictionarySP args_sp, StructuredData::Generic *script_obj = nullptr) override { - return nullptr; + return {}; } virtual lldb::tid_t GetThreadID() { return LLDB_INVALID_THREAD_ID; } @@ -90,15 +92,17 @@ class ScriptedThreadInterface : virtual public ScriptedInterface { virtual llvm::Optional GetQueue() { return llvm::None; } - virtual StructuredData::DictionarySP GetStopReason() { return nullptr; } + virtual StructuredData::DictionarySP GetStopReason() { return {}; } - virtual StructuredData::ArraySP GetStackFrames() { return nullptr; } + virtual StructuredData::ArraySP GetStackFrames() { return {}; } - virtual StructuredData::DictionarySP GetRegisterInfo() { return nullptr; } + virtual StructuredData::DictionarySP GetRegisterInfo() { return {}; } virtual llvm::Optional GetRegisterContext() { return llvm::None; } + + virtual StructuredData::ArraySP GetExtendedInfo() { return {}; } }; } // namespace lldb_private diff --git a/lldb/include/lldb/Symbol/SymbolFileOnDemand.h b/lldb/include/lldb/Symbol/SymbolFileOnDemand.h index 05708395687f2..a215c7e32b26a 100644 --- a/lldb/include/lldb/Symbol/SymbolFileOnDemand.h +++ b/lldb/include/lldb/Symbol/SymbolFileOnDemand.h @@ -117,6 +117,9 @@ class SymbolFileOnDemand : public lldb_private::SymbolFile { lldb::SymbolContextItem resolve_scope, lldb_private::SymbolContext &sc) override; + lldb_private::Status + CalculateFrameVariableError(lldb_private::StackFrame &frame) override; + uint32_t ResolveSymbolContext( const lldb_private::SourceLocationSpec &src_location_spec, lldb::SymbolContextItem resolve_scope, diff --git a/lldb/include/lldb/Symbol/TypeSystem.h b/lldb/include/lldb/Symbol/TypeSystem.h index fd31b130c4ffd..0da0e35a4f9ca 100644 --- a/lldb/include/lldb/Symbol/TypeSystem.h +++ b/lldb/include/lldb/Symbol/TypeSystem.h @@ -19,6 +19,7 @@ #include "llvm/ADT/SmallBitVector.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Error.h" +#include "llvm/Support/JSON.h" #include "lldb/Core/PluginInterface.h" #include "lldb/Expression/Expression.h" @@ -508,6 +509,8 @@ class TypeSystem : public PluginInterface { // meaningless type itself, instead preferring to use the dynamic type virtual bool IsMeaninglessWithoutDynamicResolution(void *type); + virtual llvm::Optional ReportStatistics(); + protected: SymbolFile *m_sym_file = nullptr; }; diff --git a/lldb/include/lldb/Target/Process.h b/lldb/include/lldb/Target/Process.h index 6975eb8029de0..b9995c2a44326 100644 --- a/lldb/include/lldb/Target/Process.h +++ b/lldb/include/lldb/Target/Process.h @@ -2423,6 +2423,13 @@ void PruneThreadPlans(); return Status("Not supported"); } + /// Fetch process defined metadata. + /// + /// \return + /// A StructuredDataSP object which, if non-empty, will contain the + /// information related to the process. + virtual StructuredData::DictionarySP GetMetadata() { return nullptr; } + size_t AddImageToken(lldb::addr_t image_ptr); lldb::addr_t GetImagePtrFromToken(size_t token) const; diff --git a/lldb/include/lldb/Target/Statistics.h b/lldb/include/lldb/Target/Statistics.h index db6494ce7899e..4bf2f3a69c9b1 100644 --- a/lldb/include/lldb/Target/Statistics.h +++ b/lldb/include/lldb/Target/Statistics.h @@ -12,6 +12,7 @@ #include "lldb/Utility/ConstString.h" #include "lldb/Utility/Stream.h" #include "lldb/lldb-forward.h" +#include "llvm/ADT/StringMap.h" #include "llvm/Support/JSON.h" #include #include @@ -107,6 +108,7 @@ struct ModuleStats { // identifiers of these modules in the global module list. This allows us to // track down all of the stats that contribute to this module. std::vector symfile_modules; + llvm::StringMap type_system_stats; double symtab_parse_time = 0.0; double symtab_index_time = 0.0; double debug_parse_time = 0.0; diff --git a/lldb/include/lldb/Utility/StructuredData.h b/lldb/include/lldb/Utility/StructuredData.h index 9f6300f4f115b..5420c0dcf8d5a 100644 --- a/lldb/include/lldb/Utility/StructuredData.h +++ b/lldb/include/lldb/Utility/StructuredData.h @@ -158,6 +158,12 @@ class StructuredData { Serialize(jso); } + virtual void GetDescription(lldb_private::Stream &s) const { + s.IndentMore(); + Dump(s, false); + s.IndentLess(); + } + private: lldb::StructuredDataType m_type; }; @@ -277,6 +283,8 @@ class StructuredData { void Serialize(llvm::json::OStream &s) const override; + void GetDescription(lldb_private::Stream &s) const override; + protected: typedef std::vector collection; collection m_items; @@ -295,6 +303,8 @@ class StructuredData { void Serialize(llvm::json::OStream &s) const override; + void GetDescription(lldb_private::Stream &s) const override; + protected: uint64_t m_value; }; @@ -312,6 +322,8 @@ class StructuredData { void Serialize(llvm::json::OStream &s) const override; + void GetDescription(lldb_private::Stream &s) const override; + protected: double m_value; }; @@ -329,6 +341,8 @@ class StructuredData { void Serialize(llvm::json::OStream &s) const override; + void GetDescription(lldb_private::Stream &s) const override; + protected: bool m_value; }; @@ -345,6 +359,8 @@ class StructuredData { void Serialize(llvm::json::OStream &s) const override; + void GetDescription(lldb_private::Stream &s) const override; + protected: std::string m_value; }; @@ -524,6 +540,8 @@ class StructuredData { void Serialize(llvm::json::OStream &s) const override; + void GetDescription(lldb_private::Stream &s) const override; + protected: typedef std::map collection; collection m_dict; @@ -538,6 +556,8 @@ class StructuredData { bool IsValid() const override { return false; } void Serialize(llvm::json::OStream &s) const override; + + void GetDescription(lldb_private::Stream &s) const override; }; class Generic : public Object { @@ -553,12 +573,15 @@ class StructuredData { void Serialize(llvm::json::OStream &s) const override; + void GetDescription(lldb_private::Stream &s) const override; + private: void *m_object; }; static ObjectSP ParseJSON(const std::string &json_text); static ObjectSP ParseJSONFromFile(const FileSpec &file, Status &error); + static bool IsRecordType(const ObjectSP object_sp); }; } // namespace lldb_private diff --git a/lldb/packages/Python/lldbsuite/test/decorators.py b/lldb/packages/Python/lldbsuite/test/decorators.py index 3e3db099cd4a6..dd47f6845ef2f 100644 --- a/lldb/packages/Python/lldbsuite/test/decorators.py +++ b/lldb/packages/Python/lldbsuite/test/decorators.py @@ -80,7 +80,9 @@ def _match_decorator_property(expected, actual): if isinstance(expected, no_match): return not _match_decorator_property(expected.item, actual) - if isinstance(expected, (re.Pattern, str)): + # Python 3.6 doesn't declare a `re.Pattern` type, get the dynamic type. + pattern_type = type(re.compile('')) + if isinstance(expected, (pattern_type, str)): return re.search(expected, actual) is not None if hasattr(expected, "__iter__"): diff --git a/lldb/packages/Python/lldbsuite/test/lldbtest.py b/lldb/packages/Python/lldbsuite/test/lldbtest.py index 2d054f971cd02..63bad9d0241de 100644 --- a/lldb/packages/Python/lldbsuite/test/lldbtest.py +++ b/lldb/packages/Python/lldbsuite/test/lldbtest.py @@ -282,11 +282,14 @@ def check_value(self, test_base, val, error_msg=None): test_base.assertSuccess(val.GetError()) + # Python 3.6 doesn't declare a `re.Pattern` type, get the dynamic type. + pattern_type = type(re.compile('')) + if self.expect_name: test_base.assertEqual(self.expect_name, val.GetName(), this_error_msg) if self.expect_value: - if isinstance(self.expect_value, re.Pattern): + if isinstance(self.expect_value, pattern_type): test_base.assertRegex(val.GetValue(), self.expect_value, this_error_msg) else: @@ -296,7 +299,7 @@ def check_value(self, test_base, val, error_msg=None): test_base.assertEqual(self.expect_type, val.GetDisplayTypeName(), this_error_msg) if self.expect_summary: - if isinstance(self.expect_summary, re.Pattern): + if isinstance(self.expect_summary, pattern_type): test_base.assertRegex(val.GetSummary(), self.expect_summary, this_error_msg) else: diff --git a/lldb/source/Commands/CommandObjectProcess.cpp b/lldb/source/Commands/CommandObjectProcess.cpp index 28a99ea3d94a5..92544c564e532 100644 --- a/lldb/source/Commands/CommandObjectProcess.cpp +++ b/lldb/source/Commands/CommandObjectProcess.cpp @@ -1537,8 +1537,9 @@ class CommandObjectProcessStatus : public CommandObjectParsed { StructuredData::DictionarySP crash_info_sp = *expected_crash_info; if (crash_info_sp) { + strm.EOL(); strm.PutCString("Extended Crash Information:\n"); - crash_info_sp->Dump(strm); + crash_info_sp->GetDescription(strm); } } diff --git a/lldb/source/Commands/CommandObjectThread.cpp b/lldb/source/Commands/CommandObjectThread.cpp index bfe85043f3703..5e817635bbe6b 100644 --- a/lldb/source/Commands/CommandObjectThread.cpp +++ b/lldb/source/Commands/CommandObjectThread.cpp @@ -189,6 +189,7 @@ class CommandObjectThreadBacktrace : public CommandObjectIterateOverThreads { if (ext_thread_sp && ext_thread_sp->IsValid()) { const uint32_t num_frames_with_source = 0; const bool stop_format = false; + strm.PutChar('\n'); if (ext_thread_sp->GetStatus(strm, m_options.m_start, m_options.m_count, num_frames_with_source, stop_format)) { diff --git a/lldb/source/Core/Module.cpp b/lldb/source/Core/Module.cpp index d5b4621880dcd..20bd02f101fcc 100644 --- a/lldb/source/Core/Module.cpp +++ b/lldb/source/Core/Module.cpp @@ -369,6 +369,11 @@ Module::GetTypeSystemForLanguage(LanguageType language) { return m_type_system_map.GetTypeSystemForLanguage(language, this, true); } +void Module::ForEachTypeSystem( + llvm::function_ref callback) { + m_type_system_map.ForEach(callback); +} + void Module::ParseAllDebugSymbols() { std::lock_guard guard(m_mutex); size_t num_comp_units = GetNumCompileUnits(); diff --git a/lldb/source/Expression/DWARFExpression.cpp b/lldb/source/Expression/DWARFExpression.cpp index 1ccda944cd013..3f302e53c00e1 100644 --- a/lldb/source/Expression/DWARFExpression.cpp +++ b/lldb/source/Expression/DWARFExpression.cpp @@ -847,10 +847,12 @@ bool DWARFExpression::Evaluate( Process *process = nullptr; StackFrame *frame = nullptr; + Target *target = nullptr; if (exe_ctx) { process = exe_ctx->GetProcessPtr(); frame = exe_ctx->GetFramePtr(); + target = exe_ctx->GetTargetPtr(); } if (reg_ctx == nullptr && frame) reg_ctx = frame->GetRegisterContext().get(); @@ -906,12 +908,19 @@ bool DWARFExpression::Evaluate( // address and whose size is the size of an address on the target machine. case DW_OP_addr: stack.push_back(Scalar(opcodes.GetAddress(&offset))); - stack.back().SetValueType(Value::ValueType::FileAddress); - // Convert the file address to a load address, so subsequent - // DWARF operators can operate on it. - if (frame) - stack.back().ConvertToLoadAddress(module_sp.get(), - frame->CalculateTarget().get()); + if (target && + target->GetArchitecture().GetCore() == ArchSpec::eCore_wasm32) { + // wasm file sections aren't mapped into memory, therefore addresses can + // never point into a file section and are always LoadAddresses. + stack.back().SetValueType(Value::ValueType::LoadAddress); + } else { + stack.back().SetValueType(Value::ValueType::FileAddress); + // Convert the file address to a load address, so subsequent + // DWARF operators can operate on it. + if (frame) + stack.back().ConvertToLoadAddress(module_sp.get(), + frame->CalculateTarget().get()); + } break; // The DW_OP_addr_sect_offset4 is used for any location expressions in @@ -2507,7 +2516,14 @@ bool DWARFExpression::Evaluate( uint64_t index = opcodes.GetULEB128(&offset); lldb::addr_t value = dwarf_cu->ReadAddressFromDebugAddrSection(index); stack.push_back(Scalar(value)); - stack.back().SetValueType(Value::ValueType::FileAddress); + if (target && + target->GetArchitecture().GetCore() == ArchSpec::eCore_wasm32) { + // wasm file sections aren't mapped into memory, therefore addresses can + // never point into a file section and are always LoadAddresses. + stack.back().SetValueType(Value::ValueType::LoadAddress); + } else { + stack.back().SetValueType(Value::ValueType::FileAddress); + } } break; // OPCODE: DW_OP_GNU_const_index diff --git a/lldb/source/Host/common/HostInfoBase.cpp b/lldb/source/Host/common/HostInfoBase.cpp index e8088344422a7..9a7b77c19de1d 100644 --- a/lldb/source/Host/common/HostInfoBase.cpp +++ b/lldb/source/Host/common/HostInfoBase.cpp @@ -340,6 +340,7 @@ void HostInfoBase::ComputeHostArchitectureSupport(ArchSpec &arch_32, case llvm::Triple::ppc64le: case llvm::Triple::x86_64: case llvm::Triple::riscv64: + case llvm::Triple::loongarch64: arch_64.SetTriple(triple); arch_32.SetTriple(triple.get32BitArchVariant()); break; diff --git a/lldb/source/Interpreter/CommandInterpreter.cpp b/lldb/source/Interpreter/CommandInterpreter.cpp index eaad0195c1b74..3d0b61fa7d3c3 100644 --- a/lldb/source/Interpreter/CommandInterpreter.cpp +++ b/lldb/source/Interpreter/CommandInterpreter.cpp @@ -170,6 +170,17 @@ void CommandInterpreter::SetSaveSessionOnQuit(bool enable) { m_collection_sp->SetPropertyAtIndexAsBoolean(nullptr, idx, enable); } +bool CommandInterpreter::GetOpenTranscriptInEditor() const { + const uint32_t idx = ePropertyOpenTranscriptInEditor; + return m_collection_sp->GetPropertyAtIndexAsBoolean( + nullptr, idx, g_interpreter_properties[idx].default_uint_value != 0); +} + +void CommandInterpreter::SetOpenTranscriptInEditor(bool enable) { + const uint32_t idx = ePropertyOpenTranscriptInEditor; + m_collection_sp->SetPropertyAtIndexAsBoolean(nullptr, idx, enable); +} + FileSpec CommandInterpreter::GetSaveSessionDirectory() const { const uint32_t idx = ePropertySaveSessionDirectory; return m_collection_sp->GetPropertyAtIndexAsFileSpec(nullptr, idx); @@ -3226,6 +3237,13 @@ bool CommandInterpreter::SaveTranscript( result.AppendMessageWithFormat("Session's transcripts saved to %s\n", output_file->c_str()); + if (GetOpenTranscriptInEditor() && Host::IsInteractiveGraphicSession()) { + const FileSpec file_spec; + error = file->GetFileSpec(const_cast(file_spec)); + if (error.Success()) + Host::OpenFileInExternalEditor(file_spec, 1); + } + return true; } diff --git a/lldb/source/Interpreter/InterpreterProperties.td b/lldb/source/Interpreter/InterpreterProperties.td index c0acc044fb7fe..2155ee61ccffb 100644 --- a/lldb/source/Interpreter/InterpreterProperties.td +++ b/lldb/source/Interpreter/InterpreterProperties.td @@ -13,6 +13,10 @@ let Definition = "interpreter" in { Global, DefaultFalse, Desc<"If true, LLDB will save the session's transcripts before quitting.">; + def OpenTranscriptInEditor: Property<"open-transcript-in-editor", "Boolean">, + Global, + DefaultTrue, + Desc<"If true, LLDB will open the saved session's transcripts in the external editor.">; def SaveSessionDirectory: Property<"save-session-directory", "FileSpec">, DefaultStringValue<"">, Desc<"A path where LLDB will save the session's transcripts. This is particularly useful when you can't set the session file, for example when using `save-session-on-quit`.">; diff --git a/lldb/source/Plugins/Instruction/RISCV/EmulateInstructionRISCV.cpp b/lldb/source/Plugins/Instruction/RISCV/EmulateInstructionRISCV.cpp index 1d8f3a2750277..c05b43f300fda 100644 --- a/lldb/source/Plugins/Instruction/RISCV/EmulateInstructionRISCV.cpp +++ b/lldb/source/Plugins/Instruction/RISCV/EmulateInstructionRISCV.cpp @@ -504,7 +504,9 @@ llvm::Optional EmulateInstructionRISCV::Decode(uint32_t inst) { for (const InstrPattern &pat : PATTERNS) { if ((inst & pat.type_mask) == pat.eigen) { - LLDB_LOGF(log, "EmulateInstructionRISCV::%s: inst(%x at %lx) was decoded to %s", + LLDB_LOGF(log, + "EmulateInstructionRISCV::%s: inst(%x at %" PRIx64 + ") was decoded to %s", __FUNCTION__, inst, m_addr, pat.name); auto decoded = is_rvc ? pat.decode(try_rvc) : pat.decode(inst); return DecodeResult{decoded, inst, is_rvc, pat}; diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp index 21b733a62bbbb..9d89148616be1 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp @@ -857,21 +857,20 @@ PlatformDarwin::ParseVersionBuildDir(llvm::StringRef dir) { llvm::Expected PlatformDarwin::FetchExtendedCrashInformation(Process &process) { - Log *log = GetLog(LLDBLog::Process); - - StructuredData::ArraySP annotations = ExtractCrashInfoAnnotations(process); - - if (!annotations || !annotations->GetSize()) { - LLDB_LOG(log, "Couldn't extract crash information annotations"); - return nullptr; - } - StructuredData::DictionarySP extended_crash_info = std::make_shared(); - extended_crash_info->AddItem("crash-info annotations", annotations); + StructuredData::ArraySP annotations = ExtractCrashInfoAnnotations(process); + if (annotations && annotations->GetSize()) + extended_crash_info->AddItem("Crash-Info Annotations", annotations); + + StructuredData::DictionarySP app_specific_info = + ExtractAppSpecificInfo(process); + if (app_specific_info && app_specific_info->GetSize()) + extended_crash_info->AddItem("Application Specific Information", + app_specific_info); - return extended_crash_info; + return extended_crash_info->GetSize() ? extended_crash_info : nullptr; } StructuredData::ArraySP @@ -978,6 +977,38 @@ PlatformDarwin::ExtractCrashInfoAnnotations(Process &process) { return array_sp; } +StructuredData::DictionarySP +PlatformDarwin::ExtractAppSpecificInfo(Process &process) { + StructuredData::DictionarySP metadata_sp = process.GetMetadata(); + + if (!metadata_sp || !metadata_sp->GetSize() || !metadata_sp->HasKey("asi")) + return {}; + + StructuredData::Dictionary *asi; + if (!metadata_sp->GetValueForKeyAsDictionary("asi", asi)) + return {}; + + StructuredData::DictionarySP dict_sp = + std::make_shared(); + + auto flatten_asi_dict = [&dict_sp](ConstString key, + StructuredData::Object *val) -> bool { + if (!val) + return false; + + StructuredData::Array *arr = val->GetAsArray(); + if (!arr || !arr->GetSize()) + return false; + + dict_sp->AddItem(key.AsCString(), arr->GetItemAtIndex(0)); + return true; + }; + + asi->ForEach(flatten_asi_dict); + + return dict_sp; +} + void PlatformDarwin::AddClangModuleCompilationOptionsForSDKType( Target *target, std::vector &options, XcodeSDK::Type sdk_type) { const std::vector apple_arguments = { diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.h b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.h index 334410e91b4a2..36b52f4ca9eb3 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.h +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.h @@ -154,6 +154,10 @@ class PlatformDarwin : public PlatformPOSIX { /// \b nullptr if process has no crash information annotations. StructuredData::ArraySP ExtractCrashInfoAnnotations(Process &process); + /// Extract the `Application Specific Information` messages from a crash + /// report. + StructuredData::DictionarySP ExtractAppSpecificInfo(Process &process); + void ReadLibdispatchOffsetsAddress(Process *process); void ReadLibdispatchOffsets(Process *process); diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp index 8ae211f102cb3..3c9cc8e77189e 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp @@ -986,11 +986,8 @@ bool PlatformDarwinKernel::LoadPlatformBinaryAndSetup(Process *process, std::vector PlatformDarwinKernel::GetSupportedArchitectures( const ArchSpec &process_host_arch) { std::vector result; -#if defined(__arm__) || defined(__arm64__) || defined(__aarch64__) ARMGetSupportedArchitectures(result); -#else x86GetSupportedArchitectures(result); -#endif return result; } diff --git a/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp b/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp index 11692cbb69d48..e31d8bb769f85 100644 --- a/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp +++ b/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp @@ -411,7 +411,7 @@ ScriptedProcess::GetLoadedDynamicLibrariesInfos() { StructuredData::ArraySP loaded_images_sp = GetInterface().GetLoadedImages(); if (!loaded_images_sp || !loaded_images_sp->GetSize()) - return GetInterface().ErrorWithMessage( + return ScriptedInterface::ErrorWithMessage( LLVM_PRETTY_FUNCTION, "No loaded images.", error); ModuleList module_list; @@ -477,7 +477,7 @@ ScriptedProcess::GetLoadedDynamicLibrariesInfos() { }; if (!loaded_images_sp->ForEach(reload_image)) - return GetInterface().ErrorWithMessage( + return ScriptedInterface::ErrorWithMessage( LLVM_PRETTY_FUNCTION, "Couldn't reload all images.", error); target.ModulesDidLoad(module_list); @@ -485,6 +485,19 @@ ScriptedProcess::GetLoadedDynamicLibrariesInfos() { return loaded_images_sp; } +lldb_private::StructuredData::DictionarySP ScriptedProcess::GetMetadata() { + CheckInterpreterAndScriptObject(); + + StructuredData::DictionarySP metadata_sp = GetInterface().GetMetadata(); + + Status error; + if (!metadata_sp || !metadata_sp->GetSize()) + return ScriptedInterface::ErrorWithMessage( + LLVM_PRETTY_FUNCTION, "No metadata.", error); + + return metadata_sp; +} + ScriptedProcessInterface &ScriptedProcess::GetInterface() const { return m_interpreter->GetScriptedProcessInterface(); } diff --git a/lldb/source/Plugins/Process/scripted/ScriptedProcess.h b/lldb/source/Plugins/Process/scripted/ScriptedProcess.h index 465ef7b64ecd7..e8f8dd4a965d5 100644 --- a/lldb/source/Plugins/Process/scripted/ScriptedProcess.h +++ b/lldb/source/Plugins/Process/scripted/ScriptedProcess.h @@ -59,8 +59,6 @@ class ScriptedProcess : public Process { llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); } - SystemRuntime *GetSystemRuntime() override { return nullptr; } - Status DoLoadCore() override; Status DoLaunch(Module *exe_module, ProcessLaunchInfo &launch_info) override; @@ -88,6 +86,8 @@ class ScriptedProcess : public Process { lldb_private::StructuredData::ObjectSP GetLoadedDynamicLibrariesInfos() override; + lldb_private::StructuredData::DictionarySP GetMetadata() override; + protected: ScriptedProcess(lldb::TargetSP target_sp, lldb::ListenerSP listener_sp, const ScriptedProcess::ScriptedProcessInfo &launch_info, diff --git a/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp b/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp index c655ec12ecda3..f13cdd3a4c33c 100644 --- a/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp +++ b/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp @@ -171,9 +171,7 @@ bool ScriptedThread::LoadArtificialStackFrames() { StackFrameListSP frames = GetStackFrameList(); for (size_t idx = 0; idx < arr_size; idx++) { - StructuredData::Dictionary *dict; - if (!arr_sp->GetItemAtIndexAsDictionary(idx, dict) || !dict) return ScriptedInterface::ErrorWithMessage( LLVM_PRETTY_FUNCTION, @@ -334,11 +332,10 @@ std::shared_ptr ScriptedThread::GetDynamicRegisterInfo() { Status error; if (!reg_info) - return GetInterface() - ->ErrorWithMessage>( - LLVM_PRETTY_FUNCTION, - "Failed to get scripted thread registers info.", error, - LLDBLog::Thread); + return ScriptedInterface::ErrorWithMessage< + std::shared_ptr>( + LLVM_PRETTY_FUNCTION, "Failed to get scripted thread registers info.", + error, LLDBLog::Thread); m_register_info_sp = std::make_shared( *reg_info, m_scripted_process.GetTarget().GetArchitecture()); @@ -346,3 +343,16 @@ std::shared_ptr ScriptedThread::GetDynamicRegisterInfo() { return m_register_info_sp; } + +StructuredData::ObjectSP ScriptedThread::FetchThreadExtendedInfo() { + CheckInterpreterAndScriptObject(); + + Status error; + StructuredData::ArraySP extended_info_sp = GetInterface()->GetExtendedInfo(); + + if (!extended_info_sp || !extended_info_sp->GetSize()) + return ScriptedInterface::ErrorWithMessage( + LLVM_PRETTY_FUNCTION, "No extended information found", error); + + return extended_info_sp; +} diff --git a/lldb/source/Plugins/Process/scripted/ScriptedThread.h b/lldb/source/Plugins/Process/scripted/ScriptedThread.h index 959f498edf240..cd224d60ceef8 100644 --- a/lldb/source/Plugins/Process/scripted/ScriptedThread.h +++ b/lldb/source/Plugins/Process/scripted/ScriptedThread.h @@ -58,6 +58,8 @@ class ScriptedThread : public lldb_private::Thread { void ClearStackFrames() override; + StructuredData::ObjectSP FetchThreadExtendedInfo() override; + private: void CheckInterpreterAndScriptObject() const; lldb::ScriptedThreadInterfaceSP GetInterface() const; diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp index 576bf69c9258e..ffce8c468cab8 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp @@ -177,4 +177,15 @@ ScriptedProcessPythonInterface::CreateScriptedThreadInterface() { return std::make_shared(m_interpreter); } +StructuredData::DictionarySP ScriptedProcessPythonInterface::GetMetadata() { + Status error; + StructuredData::DictionarySP dict = + Dispatch("get_process_metadata", error); + + if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, dict, error)) + return {}; + + return dict; +} + #endif diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h index 7f458b1dd9bdb..622d225853040 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h @@ -57,6 +57,8 @@ class ScriptedProcessPythonInterface : public ScriptedProcessInterface, llvm::Optional GetScriptedThreadPluginName() override; + StructuredData::DictionarySP GetMetadata() override; + private: lldb::ScriptedThreadInterfaceSP CreateScriptedThreadInterface() override; }; diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.cpp index 3ff592fb83cd7..d52a9c2d81f97 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.cpp @@ -144,4 +144,15 @@ ScriptedThreadPythonInterface::GetRegisterContext() { return obj->GetAsString()->GetValue().str(); } +StructuredData::ArraySP ScriptedThreadPythonInterface::GetExtendedInfo() { + Status error; + StructuredData::ArraySP arr = + Dispatch("get_extended_info", error); + + if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, arr, error)) + return {}; + + return arr; +} + #endif diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.h index 59bb182ae3f3d..63ce1c1ab288f 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.h @@ -42,6 +42,8 @@ class ScriptedThreadPythonInterface : public ScriptedThreadInterface, StructuredData::DictionarySP GetRegisterInfo() override; llvm::Optional GetRegisterContext() override; + + StructuredData::ArraySP GetExtendedInfo() override; }; } // namespace lldb_private diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index 5d5a47bc0c92c..066fc9f434cae 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -2389,6 +2389,24 @@ void SymbolFileDWARF::FindFunctions(const Module::LookupInfo &lookup_info, ResolveFunction(die, include_inlines, sc_list); return true; }); + // With -gsimple-template-names, a templated type's DW_AT_name will not + // contain the template parameters. Try again stripping '<' and anything + // after, filtering out entries with template parameters that don't match. + { + const llvm::StringRef name_ref = name.GetStringRef(); + auto it = name_ref.find('<'); + if (it != llvm::StringRef::npos) { + const llvm::StringRef name_no_template_params = name_ref.slice(0, it); + + Module::LookupInfo no_tp_lookup_info(lookup_info); + no_tp_lookup_info.SetLookupName(ConstString(name_no_template_params)); + m_index->GetFunctions(no_tp_lookup_info, *this, parent_decl_ctx, [&](DWARFDIE die) { + if (resolved_dies.insert(die.GetDIE()).second) + ResolveFunction(die, include_inlines, sc_list); + return true; + }); + } + } // Return the number of variable that were appended to the list const uint32_t num_matches = sc_list.GetSize() - original_size; diff --git a/lldb/source/Plugins/SystemRuntime/MacOSX/SystemRuntimeMacOSX.cpp b/lldb/source/Plugins/SystemRuntime/MacOSX/SystemRuntimeMacOSX.cpp index 7a56264f87c9b..d4d164a77d732 100644 --- a/lldb/source/Plugins/SystemRuntime/MacOSX/SystemRuntimeMacOSX.cpp +++ b/lldb/source/Plugins/SystemRuntime/MacOSX/SystemRuntimeMacOSX.cpp @@ -502,6 +502,46 @@ ThreadSP SystemRuntimeMacOSX::GetExtendedBacktraceThread(ThreadSP real_thread, m_page_to_free_size = ret.item_buffer_size; } } + } else if (type == "Application Specific Backtrace") { + StructuredData::ObjectSP thread_extended_sp = + real_thread->GetExtendedInfo(); + + if (!thread_extended_sp) + return {}; + + StructuredData::Array *thread_extended_info = + thread_extended_sp->GetAsArray(); + + if (!thread_extended_info || !thread_extended_info->GetSize()) + return {}; + + std::vector app_specific_backtrace_pcs; + + auto extract_frame_pc = + [&app_specific_backtrace_pcs](StructuredData::Object *obj) -> bool { + if (!obj) + return false; + + StructuredData::Dictionary *dict = obj->GetAsDictionary(); + if (!dict) + return false; + + lldb::addr_t pc = LLDB_INVALID_ADDRESS; + if (!dict->GetValueForKeyAsInteger("pc", pc)) + return false; + + app_specific_backtrace_pcs.push_back(pc); + + return pc != LLDB_INVALID_ADDRESS; + }; + + if (!thread_extended_info->ForEach(extract_frame_pc)) + return {}; + + originating_thread_sp = + std::make_shared(*m_process, real_thread->GetIndexID(), + app_specific_backtrace_pcs, true); + originating_thread_sp->SetQueueName(type.AsCString()); } return originating_thread_sp; } @@ -674,6 +714,7 @@ const std::vector & SystemRuntimeMacOSX::GetExtendedBacktraceTypes() { if (m_types.size() == 0) { m_types.push_back(ConstString("libdispatch")); + m_types.push_back(ConstString("Application Specific Backtrace")); // We could have pthread as another type in the future if we have a way of // gathering that information & it's useful to distinguish between them. } diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index 5175ad81606d1..cd142b73ab824 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -9221,14 +9221,8 @@ void TypeSystemClang::DumpTypeDescription(lldb::opaque_compiler_type_t type, if (level == eDescriptionLevelVerbose) record_decl->dump(llvm_ostrm); else { - if (auto *cxx_record_decl = - llvm::dyn_cast(record_decl)) - cxx_record_decl->print(llvm_ostrm, - getASTContext().getPrintingPolicy(), - s->GetIndentLevel()); - else - record_decl->print(llvm_ostrm, getASTContext().getPrintingPolicy(), - s->GetIndentLevel()); + record_decl->print(llvm_ostrm, getASTContext().getPrintingPolicy(), + s->GetIndentLevel()); } } break; diff --git a/lldb/source/Symbol/SymbolFileOnDemand.cpp b/lldb/source/Symbol/SymbolFileOnDemand.cpp index b4c9ed002a8ea..737cb1042ca76 100644 --- a/lldb/source/Symbol/SymbolFileOnDemand.cpp +++ b/lldb/source/Symbol/SymbolFileOnDemand.cpp @@ -274,6 +274,15 @@ SymbolFileOnDemand::ResolveSymbolContext(const Address &so_addr, return m_sym_file_impl->ResolveSymbolContext(so_addr, resolve_scope, sc); } +Status SymbolFileOnDemand::CalculateFrameVariableError(StackFrame &frame) { + if (!m_debug_info_enabled) { + LLDB_LOG(GetLog(), "[{0}] {1} is skipped", GetSymbolFileName(), + __FUNCTION__); + return Status(); + } + return m_sym_file_impl->CalculateFrameVariableError(frame); +} + uint32_t SymbolFileOnDemand::ResolveSymbolContext( const SourceLocationSpec &src_location_spec, SymbolContextItem resolve_scope, SymbolContextList &sc_list) { diff --git a/lldb/source/Symbol/TypeSystem.cpp b/lldb/source/Symbol/TypeSystem.cpp index 412373533aaba..ae5ae5cbd659a 100644 --- a/lldb/source/Symbol/TypeSystem.cpp +++ b/lldb/source/Symbol/TypeSystem.cpp @@ -178,6 +178,10 @@ TypeSystem::CreateUtilityFunction(std::string text, std::string name) { return {}; } +llvm::Optional TypeSystem::ReportStatistics() { + return llvm::None; +} + #pragma mark TypeSystemMap TypeSystemMap::TypeSystemMap() : m_mutex(), m_map() {} diff --git a/lldb/source/Target/Statistics.cpp b/lldb/source/Target/Statistics.cpp index 0ea09743d1300..118d6c396172c 100644 --- a/lldb/source/Target/Statistics.cpp +++ b/lldb/source/Target/Statistics.cpp @@ -75,6 +75,17 @@ json::Value ModuleStats::ToJSON() const { symfile_ids.emplace_back(symfile_id); module.try_emplace("symbolFileModuleIdentifiers", std::move(symfile_ids)); } + + if (!type_system_stats.empty()) { + json::Array type_systems; + for (const auto &entry : type_system_stats) { + json::Object obj; + obj.try_emplace(entry.first().str(), entry.second); + type_systems.emplace_back(std::move(obj)); + } + module.try_emplace("typeSystemInfo", std::move(type_systems)); + } + return module; } @@ -256,6 +267,11 @@ llvm::json::Value DebuggerStats::ReportStatistics(Debugger &debugger, debug_parse_time += module_stat.debug_parse_time; debug_index_time += module_stat.debug_index_time; debug_info_size += module_stat.debug_info_size; + module->ForEachTypeSystem([&](TypeSystem *ts) { + if (auto stats = ts->ReportStatistics()) + module_stat.type_system_stats.insert({ts->GetPluginName(), *stats}); + return true; + }); json_modules.emplace_back(module_stat.ToJSON()); } diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp index f1a311b7252cb..33a792b683ca4 100644 --- a/lldb/source/Target/Target.cpp +++ b/lldb/source/Target/Target.cpp @@ -1436,7 +1436,8 @@ void Target::SetExecutableModule(ModuleSP &executable_sp, if (!m_arch.GetSpec().IsValid()) { m_arch = executable_sp->GetArchitecture(); LLDB_LOG(log, - "setting architecture to {0} ({1}) based on executable file", + "Target::SetExecutableModule setting architecture to {0} ({1}) " + "based on executable file", m_arch.GetSpec().GetArchitectureName(), m_arch.GetSpec().GetTriple().getTriple()); } @@ -1536,7 +1537,9 @@ bool Target::SetArchitecture(const ArchSpec &arch_spec, bool set_platform, // specified if (replace_local_arch) m_arch = other; - LLDB_LOG(log, "set architecture to {0} ({1})", + LLDB_LOG(log, + "Target::SetArchitecture merging compatible arch; arch " + "is now {0} ({1})", m_arch.GetSpec().GetArchitectureName(), m_arch.GetSpec().GetTriple().getTriple()); return true; @@ -1544,9 +1547,13 @@ bool Target::SetArchitecture(const ArchSpec &arch_spec, bool set_platform, // If we have an executable file, try to reset the executable to the desired // architecture - LLDB_LOGF(log, "Target::SetArchitecture changing architecture to %s (%s)", - arch_spec.GetArchitectureName(), - arch_spec.GetTriple().getTriple().c_str()); + LLDB_LOGF( + log, + "Target::SetArchitecture changing architecture to %s (%s) from %s (%s)", + arch_spec.GetArchitectureName(), + arch_spec.GetTriple().getTriple().c_str(), + m_arch.GetSpec().GetArchitectureName(), + m_arch.GetSpec().GetTriple().getTriple().c_str()); m_arch = other; ModuleSP executable_sp = GetExecutableModule(); diff --git a/lldb/source/Utility/StructuredData.cpp b/lldb/source/Utility/StructuredData.cpp index fc10fa539e9e5..acc09289e6b98 100644 --- a/lldb/source/Utility/StructuredData.cpp +++ b/lldb/source/Utility/StructuredData.cpp @@ -50,6 +50,11 @@ StructuredData::ParseJSONFromFile(const FileSpec &input_spec, Status &error) { return StructuredData::ObjectSP(); } +bool StructuredData::IsRecordType(const ObjectSP object_sp) { + return object_sp->GetType() == lldb::eStructuredDataTypeArray || + object_sp->GetType() == lldb::eStructuredDataTypeDictionary; +} + static StructuredData::ObjectSP ParseJSONValue(json::Value &value) { if (json::Object *O = value.getAsObject()) return ParseJSONObject(O); @@ -69,6 +74,9 @@ static StructuredData::ObjectSP ParseJSONValue(json::Value &value) { if (auto d = value.getAsNumber()) return std::make_shared(*d); + if (auto n = value.getAsNull()) + return std::make_shared(); + return StructuredData::ObjectSP(); } @@ -172,3 +180,98 @@ void StructuredData::Null::Serialize(json::OStream &s) const { void StructuredData::Generic::Serialize(json::OStream &s) const { s.value(llvm::formatv("{0:X}", m_object)); } + +void StructuredData::Integer::GetDescription(lldb_private::Stream &s) const { + s.Printf("%" PRId64, static_cast(m_value)); +} + +void StructuredData::Float::GetDescription(lldb_private::Stream &s) const { + s.Printf("%f", m_value); +} + +void StructuredData::Boolean::GetDescription(lldb_private::Stream &s) const { + s.Printf(m_value ? "True" : "False"); +} + +void StructuredData::String::GetDescription(lldb_private::Stream &s) const { + s.Printf("%s", m_value.empty() ? "\"\"" : m_value.c_str()); +} + +void StructuredData::Array::GetDescription(lldb_private::Stream &s) const { + size_t index = 0; + size_t indentation_level = s.GetIndentLevel(); + for (const auto &item_sp : m_items) { + // Sanitize. + if (!item_sp) + continue; + + // Reset original indentation level. + s.SetIndentLevel(indentation_level); + s.Indent(); + + // Print key + s.Printf("[%zu]:", index++); + + // Return to new line and increase indentation if value is record type. + // Otherwise add spacing. + bool should_indent = IsRecordType(item_sp); + if (should_indent) { + s.EOL(); + s.IndentMore(); + } else { + s.PutChar(' '); + } + + // Print value and new line if now last pair. + item_sp->GetDescription(s); + if (item_sp != *(--m_items.end())) + s.EOL(); + + // Reset indentation level if it was incremented previously. + if (should_indent) + s.IndentLess(); + } +} + +void StructuredData::Dictionary::GetDescription(lldb_private::Stream &s) const { + size_t indentation_level = s.GetIndentLevel(); + for (const auto &pair : m_dict) { + // Sanitize. + if (pair.first.IsNull() || pair.first.IsEmpty() || !pair.second) + continue; + + // Reset original indentation level. + s.SetIndentLevel(indentation_level); + s.Indent(); + + // Print key. + s.Printf("%s:", pair.first.AsCString()); + + // Return to new line and increase indentation if value is record type. + // Otherwise add spacing. + bool should_indent = IsRecordType(pair.second); + if (should_indent) { + s.EOL(); + s.IndentMore(); + } else { + s.PutChar(' '); + } + + // Print value and new line if now last pair. + pair.second->GetDescription(s); + if (pair != *(--m_dict.end())) + s.EOL(); + + // Reset indentation level if it was incremented previously. + if (should_indent) + s.IndentLess(); + } +} + +void StructuredData::Null::GetDescription(lldb_private::Stream &s) const { + s.Printf("NULL"); +} + +void StructuredData::Generic::GetDescription(lldb_private::Stream &s) const { + s.Printf("%p", m_object); +} diff --git a/lldb/test/API/functionalities/breakpoint/cpp/TestCPPBreakpointLocations.py b/lldb/test/API/functionalities/breakpoint/cpp/TestCPPBreakpointLocations.py index 6c86f5016a606..1dedc5d7f9bbd 100644 --- a/lldb/test/API/functionalities/breakpoint/cpp/TestCPPBreakpointLocations.py +++ b/lldb/test/API/functionalities/breakpoint/cpp/TestCPPBreakpointLocations.py @@ -12,7 +12,16 @@ class TestCPPBreakpointLocations(TestBase): @expectedFailureAll(oslist=["windows"], bugnumber="llvm.org/pr24764") def test(self): - self.build() + self.do_test(dict()) + + @expectedFailureAll(oslist=["windows"], bugnumber="llvm.org/pr24764") + @skipIf(compiler=no_match("clang")) + @skipIf(compiler_version=["<", "15.0"]) + def test_simple_template_names(self): + self.do_test(dict(CFLAGS_EXTRAS="-gsimple-template-names")) + + def do_test(self, debug_flags): + self.build(dictionary=debug_flags) self.breakpoint_id_tests() def verify_breakpoint_locations(self, target, bp_dict): @@ -57,7 +66,11 @@ def breakpoint_id_tests(self): # Template cases {'name': 'func', 'loc_names': []}, + {'name': 'Foo::func', 'loc_names': []}, + {'name': 'ns::Foo::func', 'loc_names': []}, {'name': 'func', 'loc_names': ['auto ns::Foo::func()']}, + {'name': 'Foo::func', 'loc_names': ['auto ns::Foo::func()']}, + {'name': 'ns::Foo::func', 'loc_names': ['auto ns::Foo::func()']}, {'name': 'func', 'loc_names': ['auto ns::Foo::func()', 'auto ns::Foo::func>()']}, @@ -71,6 +84,15 @@ def breakpoint_id_tests(self): {'name': 'operator<<', 'loc_names': ['void ns::Foo::operator<<(int)']}, {'name': 'ns::Foo::operator<<', 'loc_names': ['void ns::Foo::operator<<(int)', 'void ns::Foo::operator<<>(ns::Foo)']}, + + {'name': 'g', 'loc_names': []}, + {'name': 'g', 'loc_names': ['void ns::g()']}, + {'name': 'g', 'loc_names': ['void ns::g()']}, + {'name': 'g', 'loc_names': ['void ns::g()', 'void ns::g()']}, + {'name': 'ns::g', 'loc_names': []}, + {'name': 'ns::g', 'loc_names': ['void ns::g()']}, + {'name': 'ns::g', 'loc_names': ['void ns::g()']}, + {'name': 'ns::g', 'loc_names': ['void ns::g()', 'void ns::g()']}, ] for bp_dict in bp_dicts: diff --git a/lldb/test/API/functionalities/breakpoint/cpp/main.cpp b/lldb/test/API/functionalities/breakpoint/cpp/main.cpp index 7ee61e92ffd57..b2cee995198ad 100644 --- a/lldb/test/API/functionalities/breakpoint/cpp/main.cpp +++ b/lldb/test/API/functionalities/breakpoint/cpp/main.cpp @@ -94,6 +94,8 @@ template struct Foo { template void operator<<(T t) {} }; + +template void g() {} } // namespace ns int main (int argc, char const *argv[]) @@ -123,5 +125,8 @@ int main (int argc, char const *argv[]) f.operator<<(5); f.operator<< >({}); + ns::g(); + ns::g(); + return 0; } diff --git a/lldb/test/API/functionalities/process_crash_info/TestProcessCrashInfo.py b/lldb/test/API/functionalities/process_crash_info/TestProcessCrashInfo.py index 30190e7c4df9b..c0d380aca2849 100644 --- a/lldb/test/API/functionalities/process_crash_info/TestProcessCrashInfo.py +++ b/lldb/test/API/functionalities/process_crash_info/TestProcessCrashInfo.py @@ -37,7 +37,9 @@ def test_cli(self): patterns=["Process .* launched: .*a.out"]) self.expect('process status --verbose', - patterns=["\"message\".*pointer being freed was not allocated"]) + patterns=["Extended Crash Information", + "Crash-Info Annotations", + "pointer being freed was not allocated"]) @skipIfAsan # The test process intentionally hits a memory bug. diff --git a/lldb/test/API/tools/lldb-vscode/variables/TestVSCode_variables.py b/lldb/test/API/tools/lldb-vscode/variables/TestVSCode_variables.py index 9b9195561606b..a6a7e159169b6 100644 --- a/lldb/test/API/tools/lldb-vscode/variables/TestVSCode_variables.py +++ b/lldb/test/API/tools/lldb-vscode/variables/TestVSCode_variables.py @@ -85,6 +85,40 @@ def verify_variables(self, verify_dict, variables, varref_dict=None): 'variable "%s" in verify dictionary' % (name)) self.verify_values(verify_dict[name], variable, varref_dict) + def darwin_dwarf_missing_obj(self, initCommands): + self.build(debug_info="dwarf") + program = self.getBuildArtifact("a.out") + main_obj = self.getBuildArtifact("main.o") + self.assertTrue(os.path.exists(main_obj)) + # Delete the main.o file that contains the debug info so we force an + # error when we run to main and try to get variables + os.unlink(main_obj) + + self.create_debug_adaptor() + self.assertTrue(os.path.exists(program), 'executable must exist') + + self.launch(program=program, + initCommands=initCommands) + + functions = ['main'] + breakpoint_ids = self.set_function_breakpoints(functions) + self.assertEquals(len(breakpoint_ids), len(functions), "expect one breakpoint") + self.continue_to_breakpoints(breakpoint_ids) + + locals = self.vscode.get_local_variables() + + verify_locals = { + '': { + 'equals': {'type': 'const char *'}, + 'contains': { 'value': [ + 'debug map object file ', + 'main.o" containing debug info does not exist, debug info will not be loaded'] + } + }, + } + varref_dict = {} + self.verify_variables(verify_locals, locals, varref_dict) + @skipIfWindows @skipIfRemote def test_scopes_variables_setVariable_evaluate(self): @@ -529,33 +563,16 @@ def test_darwin_dwarf_missing_obj(self): changing compiler options and are designed to give better feedback to the user. ''' - self.build(debug_info="dwarf") - program = self.getBuildArtifact("a.out") - main_obj = self.getBuildArtifact("main.o") - self.assertTrue(os.path.exists(main_obj)) - # Delete the main.o file that contains the debug info so we force an - # error when we run to main and try to get variables - os.unlink(main_obj) - - self.create_debug_adaptor() - self.assertTrue(os.path.exists(program), 'executable must exist') - self.launch(program) - - functions = ['main'] - breakpoint_ids = self.set_function_breakpoints(functions) - self.assertEquals(len(breakpoint_ids), len(functions), "expect one breakpoint") - self.continue_to_breakpoints(breakpoint_ids) + self.darwin_dwarf_missing_obj(None) - locals = self.vscode.get_local_variables() - verify_locals = { - '': { - 'equals': {'type': 'const char *'}, - 'contains': { 'value': [ - 'debug map object file ', - 'main.o" containing debug info does not exist, debug info will not be loaded'] - } - }, - } - varref_dict = {} - self.verify_variables(verify_locals, locals, varref_dict) + @no_debug_info_test + @skipUnlessDarwin + def test_darwin_dwarf_missing_obj_with_symbol_ondemand_enabled(self): + ''' + Test that if we build a binary with DWARF in .o files and we remove + the .o file for main.cpp, that we get a variable named "" + whose value matches the appriopriate error. Test with symbol_ondemand_enabled. + ''' + initCommands = ['settings set symbols.load-on-demand true'] + self.darwin_dwarf_missing_obj(initCommands) diff --git a/lldb/test/Shell/Diagnostics/TestDump.test b/lldb/test/Shell/Diagnostics/TestDump.test index 9bb34aafc8c3a..2adde6b86d35a 100644 --- a/lldb/test/Shell/Diagnostics/TestDump.test +++ b/lldb/test/Shell/Diagnostics/TestDump.test @@ -12,4 +12,4 @@ # RUN: %lldb -o 'diagnostics dump -d %t.nonexisting' # RUN: file %t.nonexisting | FileCheck %s -# CHECK: : directory +# CHECK: directory diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/a.out.crash b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/a.out.crash index c02150c7f15a9..4361ed5020028 100644 --- a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/a.out.crash +++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/a.out.crash @@ -29,7 +29,7 @@ Terminating Process: exc handler [21606] Thread 0 Crashed:: Dispatch queue: com.apple.main-thread 0 a.out @foo@ foo + 16 (test.c:3) -1 a.out @bar@ bar + 9 (test.c:6) +1 a.out @bar@ 2 a.out @main@ main + 20 (test.c:8) 3 libdyld.dylib 0x1000000 start + 1 @@ -47,3 +47,4 @@ Trap Number: 14 Binary Images: 0x100000000 - 0x200000000 +a.out (0) <@UUID@> @EXEC@ + 0x0 - 0xffffffffffffffff ??? (*) <00000000-0000-0000-0000-000000000000> ??? diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/application_specific_info/asi.ips b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/application_specific_info/asi.ips new file mode 100644 index 0000000000000..8d151a3be0370 --- /dev/null +++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/application_specific_info/asi.ips @@ -0,0 +1,131 @@ +{"app_name":"asi","timestamp":"2022-10-07 11:31:53.00 -0700","app_version":"","slice_uuid":"2cee52c2-2d9c-3e64-bdd0-c43ccd1b37ec","build_version":"","platform":1,"share_with_app_devs":0,"is_first_party":1,"bug_type":"309","os_version":"macOS 13.0","roots_installed":0,"incident_id":"E62DF457-8BBC-4E92-AECA-11D1B55246E3","name":"asi"} +{ + "uptime" : 90000, + "procRole" : "Unspecified", + "version" : 2, + "userID" : 501, + "deployVersion" : 210, + "modelCode" : "Mac13,1", + "coalitionID" : 495, + "osVersion" : { + "train" : "macOS 13.0", + "build" : "", + "releaseType" : "" + }, + "captureTime" : "2022-10-07 11:31:52.6211 -0700", + "incident" : "E62DF457-8BBC-4E92-AECA-11D1B55246E3", + "pid" : 96535, + "translated" : false, + "cpuType" : "ARM-64", + "roots_installed" : 0, + "bug_type" : "309", + "procLaunch" : "2022-10-07 11:31:52.4969 -0700", + "procStartAbsTime" : 2167631132529, + "procExitAbsTime" : 2167634104978, + "procName" : "asi", + "procPath" : "\/Users\/USER\/*\/asi", + "parentProc" : "zsh", + "parentPid" : 96199, + "coalitionName" : "com.apple.Terminal", + "crashReporterKey" : "533C17C1-DBB1-4134-1FDE-68346F18AAA2", + "responsiblePid" : 615, + "responsibleProc" : "Terminal", + "wakeTime" : 1351, + "sleepWakeUUID" : "AD23E0A0-A4A5-4B6B-925F-2FC3665C17BF", + "sip" : "enabled", + "exception" : {"codes":"0x0000000000000000, 0x0000000000000000","rawCodes":[0,0],"type":"EXC_CRASH","signal":"SIGABRT"}, + "asi" : {"CoreFoundation":["*** Terminating app due to uncaught exception 'NSRangeException', reason: '*** __boundsFail: index 10 beyond bounds [0 .. 3]'"],"libsystem_c.dylib":["abort() called"],"libc++abi.dylib":["terminating with uncaught exception of type NSException"]}, + "asiBacktraces" : ["0 CoreFoundation 0x00000001a0a58418 __exceptionPreprocess + 176\n1 libobjc.A.dylib 0x00000001a05a2ea8 objc_exception_throw + 60\n2 CoreFoundation 0x00000001a0b3dcc4 -[__NSCFString characterAtIndex:].cold.1 + 0\n3 CoreFoundation 0x00000001a0b46af4 -[__NSArrayI getObjects:range:].cold.1 + 0\n4 CoreFoundation 0x00000001a09a12a4 __CFPropertyListIsArrayPlistAux + 0\n5 asi 0x00000001047e3ed0 main + 128\n6 dyld 0x00000001a05d3e50 start + 2544"], + "extMods" : {"caller":{"thread_create":0,"thread_set_state":0,"task_for_pid":0},"system":{"thread_create":0,"thread_set_state":4,"task_for_pid":4},"targeted":{"thread_create":0,"thread_set_state":0,"task_for_pid":0},"warnings":0}, + "lastExceptionBacktrace" : [{"imageOffset":1033228,"symbol":"__exceptionPreprocess","symbolLocation":164,"imageIndex":5},{"imageOffset":110248,"symbol":"objc_exception_throw","symbolLocation":60,"imageIndex":4},{"imageOffset":1973444,"symbol":"-[__NSCFString characterAtIndex:].cold.1","symbolLocation":0,"imageIndex":5},{"imageOffset":2009844,"symbol":"-[__NSArrayI getObjects:range:].cold.1","symbolLocation":0,"imageIndex":5},{"imageOffset":283300,"symbol":"__CFPropertyListIsArrayPlistAux","symbolLocation":0,"imageIndex":5},{"imageOffset":16080,"symbol":"main","symbolLocation":128,"imageIndex":6},{"imageOffset":24144,"symbol":"start","symbolLocation":2544,"imageIndex":7}], + "faultingThread" : 0, + "threads" : [{"triggered":true,"id":1767667,"threadState":{"x":[{"value":0},{"value":0},{"value":0},{"value":0},{"value":6988476661},{"value":6096540848},{"value":110},{"value":512},{"value":502518818286880576},{"value":502518810403597248},{"value":512},{"value":11},{"value":11},{"value":2095104},{"value":2043},{"value":2195963912},{"value":328},{"value":8604857144},{"value":0},{"value":6},{"value":8522738816,"symbolLocation":0,"symbol":"_main_thread"},{"value":259},{"value":8522739040,"symbolLocation":224,"symbol":"_main_thread"},{"value":105553117118464},{"value":8528036928,"symbolLocation":0,"symbol":"gProcessInfo"},{"value":0},{"value":0},{"value":0},{"value":0}],"flavor":"ARM_THREAD_STATE64","lr":{"value":6988750060},"cpsr":{"value":1073745920},"fp":{"value":6096540704},"sp":{"value":6096540672},"esr":{"value":1442840704,"description":" Address size fault"},"pc":{"value":6988526116,"matchesCrashFrame":1},"far":{"value":5452680264}},"queue":"com.apple.main-thread","frames":[{"imageOffset":37412,"symbol":"__pthread_kill","symbolLocation":8,"imageIndex":0},{"imageOffset":27884,"symbol":"pthread_kill","symbolLocation":288,"imageIndex":1},{"imageOffset":496328,"symbol":"abort","symbolLocation":180,"imageIndex":2},{"imageOffset":72472,"symbol":"abort_message","symbolLocation":132,"imageIndex":3},{"imageOffset":6668,"symbol":"demangling_terminate_handler()","symbolLocation":336,"imageIndex":3},{"imageOffset":145252,"symbol":"_objc_terminate()","symbolLocation":144,"imageIndex":4},{"imageOffset":69300,"symbol":"std::__terminate(void (*)())","symbolLocation":20,"imageIndex":3},{"imageOffset":80940,"symbol":"__cxxabiv1::failed_throw(__cxxabiv1::__cxa_exception*)","symbolLocation":36,"imageIndex":3},{"imageOffset":80856,"symbol":"__cxa_throw","symbolLocation":140,"imageIndex":3},{"imageOffset":110600,"symbol":"objc_exception_throw","symbolLocation":412,"imageIndex":4},{"imageOffset":1973444,"symbol":"_CFThrowFormattedException","symbolLocation":108,"imageIndex":5},{"imageOffset":2009844,"symbol":"__boundsFail","symbolLocation":92,"imageIndex":5},{"imageOffset":283300,"symbol":"-[__NSArrayI objectAtIndex:]","symbolLocation":60,"imageIndex":5},{"imageOffset":16080,"symbol":"main","symbolLocation":128,"imageIndex":6},{"imageOffset":24144,"symbol":"start","symbolLocation":2544,"imageIndex":7}]}], + "usedImages" : [ + { + "source" : "P", + "arch" : "arm64e", + "base" : 6988488704, + "size" : 233468, + "uuid" : "15147572-bf8d-359e-a6bb-97f4489e7f78", + "path" : "\/usr\/lib\/system\/libsystem_kernel.dylib", + "name" : "libsystem_kernel.dylib" + }, + { + "source" : "P", + "arch" : "arm64e", + "base" : 6988722176, + "size" : 53244, + "uuid" : "19a65066-147a-37e1-be56-bd78821ef285", + "path" : "\/usr\/lib\/system\/libsystem_pthread.dylib", + "name" : "libsystem_pthread.dylib" + }, + { + "source" : "P", + "arch" : "arm64e", + "base" : 6987440128, + "size" : 528372, + "uuid" : "cd2fafb3-239f-3929-9b9d-ed1768c25159", + "path" : "\/usr\/lib\/system\/libsystem_c.dylib", + "name" : "libsystem_c.dylib" + }, + { + "source" : "P", + "arch" : "arm64e", + "base" : 6988390400, + "size" : 98300, + "uuid" : "88025d90-bb66-34a8-8628-91ec5b3fb900", + "path" : "\/usr\/lib\/libc++abi.dylib", + "name" : "libc++abi.dylib" + }, + { + "source" : "P", + "arch" : "arm64e", + "base" : 6985121792, + "size" : 286112, + "uuid" : "9a019b6d-aeb6-3a3e-9c74-717c18dd5d43", + "path" : "\/usr\/lib\/libobjc.A.dylib", + "name" : "libobjc.A.dylib" + }, + { + "source" : "P", + "arch" : "arm64e", + "base" : 6989135872, + "CFBundleShortVersionString" : "6.9", + "CFBundleIdentifier" : "com.apple.CoreFoundation", + "size" : 5079040, + "uuid" : "0cb1d6ec-b4ee-33d5-9828-29db31cad6fc", + "path" : "\/System\/Library\/Frameworks\/CoreFoundation.framework\/Versions\/A\/CoreFoundation", + "name" : "CoreFoundation", + "CFBundleVersion" : "1953.1" + }, + { + "source" : "P", + "arch" : "arm64", + "base" : 4370333696, + "size" : 16384, + "uuid" : "2cee52c2-2d9c-3e64-bdd0-c43ccd1b37ec", + "path" : "\/Users\/USER\/*\/asi", + "name" : "asi" + }, + { + "source" : "P", + "arch" : "arm64e", + "base" : 6985408512, + "size" : 566452, + "uuid" : "0d973234-ed2d-3a07-889a-46b424e29ae0", + "path" : "\/usr\/lib\/dyld", + "name" : "dyld" + } +], + "sharedCache" : { + "base" : 6984761344, + "size" : 3405660160, + "uuid" : "5fe7ffdc-ba32-33ba-8827-d3d9094c6bc3" +}, + "vmSummary" : "ReadOnly portion of Libraries: Total=861.7M resident=0K(0%) swapped_out_or_unallocated=861.7M(100%)\nWritable regions: Total=666.4M written=0K(0%) resident=0K(0%) swapped_out=0K(0%) unallocated=666.4M(100%)\n\n VIRTUAL REGION \nREGION TYPE SIZE COUNT (non-coalesced) \n=========== ======= ======= \nActivity Tracing 256K 1 \nKernel Alloc Once 32K 1 \nMALLOC 154.2M 14 \nMALLOC guard page 96K 5 \nMALLOC_MEDIUM (reserved) 120.0M 1 reserved VM address space (unallocated)\nMALLOC_NANO (reserved) 384.0M 1 reserved VM address space (unallocated)\nSTACK GUARD 56.0M 1 \nStack 8176K 1 \n__AUTH 307K 58 \n__AUTH_CONST 3560K 142 \n__DATA 1494K 136 \n__DATA_CONST 3988K 144 \n__DATA_DIRTY 361K 58 \n__LINKEDIT 763.4M 2 \n__OBJC_CONST 289K 36 \n__OBJC_RO 65.1M 1 \n__OBJC_RW 1983K 1 \n__TEXT 98.3M 151 \ndyld private memory 256K 1 \nshared memory 80K 4 \n=========== ======= ======= \nTOTAL 1.6G 759 \nTOTAL, minus reserved VM space 1.1G 759 \n", + "legacyInfo" : { + "threadTriggered" : { + "queue" : "com.apple.main-thread" + } +} +} diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/application_specific_info/asi.yaml b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/application_specific_info/asi.yaml new file mode 100644 index 0000000000000..31042daadd8a9 --- /dev/null +++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/application_specific_info/asi.yaml @@ -0,0 +1,392 @@ +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x100000C + cpusubtype: 0x0 + filetype: 0x2 + ncmds: 21 + sizeofcmds: 1864 + flags: 0x200085 + reserved: 0x0 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __PAGEZERO + vmaddr: 0 + vmsize: 4294967296 + fileoff: 0 + filesize: 0 + maxprot: 0 + initprot: 0 + nsects: 0 + flags: 0 + - cmd: LC_SEGMENT_64 + cmdsize: 552 + segname: __TEXT + vmaddr: 4294967296 + vmsize: 16384 + fileoff: 0 + filesize: 16384 + maxprot: 5 + initprot: 5 + nsects: 6 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x100003E50 + size: 172 + offset: 0x3E50 + align: 2 + reloff: 0x0 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: FF8301D1FD7B05A9FD43019108008052A8431EB8BFC31FB8A0831FB8A1031FF829000094E11340F9E01700F9280000B0000940F9E8030091090000B029010191090100F9090000B029810191090500F9090000B029010291090900F91F0D00F9020000B0428000911A000094E11340F9A0831EF8A0835EF8420180D21D000094E8030091000100F9000000B00080029107000094E01740F908000094A0435EB8FD7B45A9FF830191C0035FD6 + - sectname: __stubs + segname: __TEXT + addr: 0x100003EFC + size: 36 + offset: 0x3EFC + align: 2 + reloff: 0x0 + nreloc: 0 + flags: 0x80000408 + reserved1: 0x0 + reserved2: 0xC + reserved3: 0x0 + content: 100000B0100240F900021FD6100000B0100640F900021FD6100000B0100A40F900021FD6 + - sectname: __objc_stubs + segname: __TEXT + addr: 0x100003F20 + size: 64 + offset: 0x3F20 + align: 5 + reloff: 0x0 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: 210000B0210040F9100000B0100E40F900021FD6200020D4200020D4200020D4210000B0210440F9100000B0100E40F900021FD6200020D4200020D4200020D4 + - sectname: __cstring + segname: __TEXT + addr: 0x100003F60 + size: 26 + offset: 0x3F60 + align: 0 + reloff: 0x0 + nreloc: 0 + flags: 0x2 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: 4A696D004A61736F6E004A6F6E61730049736D61696C00254000 + - sectname: __objc_methname + segname: __TEXT + addr: 0x100003F7A + size: 34 + offset: 0x3F7A + align: 1 + reloff: 0x0 + nreloc: 0 + flags: 0x2 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: 6F626A6563744174496E6465783A00006172726179576974684F626A656374733A00 + - sectname: __unwind_info + segname: __TEXT + addr: 0x100003F9C + size: 72 + offset: 0x3F9C + align: 2 + reloff: 0x0 + nreloc: 0 + flags: 0x0 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: 010000001C000000000000001C000000000000001C00000002000000503E00003400000034000000FD3E00000000000034000000030000000C000100100001000000000000000004 + - cmd: LC_SEGMENT_64 + cmdsize: 312 + segname: __DATA_CONST + vmaddr: 4294983680 + vmsize: 16384 + fileoff: 16384 + filesize: 16384 + maxprot: 3 + initprot: 3 + nsects: 3 + flags: 16 + Sections: + - sectname: __got + segname: __DATA_CONST + addr: 0x100004000 + size: 32 + offset: 0x4000 + align: 3 + reloff: 0x0 + nreloc: 0 + flags: 0x6 + reserved1: 0x3 + reserved2: 0x0 + reserved3: 0x0 + content: '0000000000001080010000000000108002000000000010800300000000001080' + - sectname: __cfstring + segname: __DATA_CONST + addr: 0x100004020 + size: 160 + offset: 0x4020 + align: 3 + reloff: 0x0 + nreloc: 0 + flags: 0x0 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: 0400000000002080C807000000000000603F00000000200003000000000000000400000000002080C807000000000000643F00000000200005000000000000000400000000002080C8070000000000006A3F00000000200005000000000000000400000000002080C807000000000000703F00000000200006000000000000000400000000002080C807000000000000773F0000000000000200000000000000 + - sectname: __objc_imageinfo + segname: __DATA_CONST + addr: 0x1000040C0 + size: 8 + offset: 0x40C0 + align: 2 + reloff: 0x0 + nreloc: 0 + flags: 0x0 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: '0000000040000000' + - cmd: LC_SEGMENT_64 + cmdsize: 232 + segname: __DATA + vmaddr: 4295000064 + vmsize: 16384 + fileoff: 32768 + filesize: 16384 + maxprot: 3 + initprot: 3 + nsects: 2 + flags: 0 + Sections: + - sectname: __objc_selrefs + segname: __DATA + addr: 0x100008000 + size: 16 + offset: 0x8000 + align: 3 + reloff: 0x0 + nreloc: 0 + flags: 0x10000005 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: 8A3F0000000010007A3F000000001000 + - sectname: __objc_classrefs + segname: __DATA + addr: 0x100008010 + size: 8 + offset: 0x8010 + align: 3 + reloff: 0x0 + nreloc: 0 + flags: 0x10000000 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: '0500000000000080' + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __LINKEDIT + vmaddr: 4295016448 + vmsize: 16384 + fileoff: 49152 + filesize: 1264 + maxprot: 1 + initprot: 1 + nsects: 0 + flags: 0 + - cmd: LC_DYLD_CHAINED_FIXUPS + cmdsize: 16 + dataoff: 49152 + datasize: 264 + - cmd: LC_DYLD_EXPORTS_TRIE + cmdsize: 16 + dataoff: 49416 + datasize: 48 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 49472 + nsyms: 10 + stroff: 49664 + strsize: 224 + - cmd: LC_DYSYMTAB + cmdsize: 80 + ilocalsym: 0 + nlocalsym: 2 + iextdefsym: 2 + nextdefsym: 2 + iundefsym: 4 + nundefsym: 6 + tocoff: 0 + ntoc: 0 + modtaboff: 0 + nmodtab: 0 + extrefsymoff: 0 + nextrefsyms: 0 + indirectsymoff: 49632 + nindirectsyms: 7 + extreloff: 0 + nextrel: 0 + locreloff: 0 + nlocrel: 0 + - cmd: LC_LOAD_DYLINKER + cmdsize: 32 + name: 12 + Content: '/usr/lib/dyld' + ZeroPadBytes: 7 + - cmd: LC_UUID + cmdsize: 24 + uuid: 2CEE52C2-2D9C-3E64-BDD0-C43CCD1B37EC + - cmd: LC_BUILD_VERSION + cmdsize: 32 + platform: 1 + minos: 851968 + sdk: 851968 + ntools: 1 + Tools: + - tool: 3 + version: 55836672 + - cmd: LC_SOURCE_VERSION + cmdsize: 16 + version: 0 + - cmd: LC_MAIN + cmdsize: 24 + entryoff: 15952 + stacksize: 0 + - cmd: LC_LOAD_DYLIB + cmdsize: 96 + dylib: + name: 24 + timestamp: 2 + current_version: 127992064 + compatibility_version: 19660800 + Content: '/System/Library/Frameworks/Foundation.framework/Versions/C/Foundation' + ZeroPadBytes: 3 + - cmd: LC_LOAD_DYLIB + cmdsize: 56 + dylib: + name: 24 + timestamp: 2 + current_version: 86441984 + compatibility_version: 65536 + Content: '/usr/lib/libSystem.B.dylib' + ZeroPadBytes: 6 + - cmd: LC_LOAD_DYLIB + cmdsize: 104 + dylib: + name: 24 + timestamp: 2 + current_version: 127992064 + compatibility_version: 9830400 + Content: '/System/Library/Frameworks/CoreFoundation.framework/Versions/A/CoreFoundation' + ZeroPadBytes: 3 + - cmd: LC_LOAD_DYLIB + cmdsize: 56 + dylib: + name: 24 + timestamp: 2 + current_version: 14942208 + compatibility_version: 65536 + Content: '/usr/lib/libobjc.A.dylib' + ZeroPadBytes: 8 + - cmd: LC_FUNCTION_STARTS + cmdsize: 16 + dataoff: 49464 + datasize: 8 + - cmd: LC_DATA_IN_CODE + cmdsize: 16 + dataoff: 49472 + datasize: 0 + - cmd: LC_CODE_SIGNATURE + cmdsize: 16 + dataoff: 49888 + datasize: 528 +LinkEditData: + NameList: + - n_strx: 156 + n_type: 0x1E + n_sect: 3 + n_desc: 0 + n_value: 4294983456 + - n_strx: 188 + n_type: 0x1E + n_sect: 3 + n_desc: 0 + n_value: 4294983488 + - n_strx: 2 + n_type: 0xF + n_sect: 1 + n_desc: 16 + n_value: 4294967296 + - n_strx: 22 + n_type: 0xF + n_sect: 1 + n_desc: 0 + n_value: 4294983248 + - n_strx: 28 + n_type: 0x1 + n_sect: 0 + n_desc: 256 + n_value: 0 + - n_strx: 35 + n_type: 0x1 + n_sect: 0 + n_desc: 768 + n_value: 0 + - n_strx: 57 + n_type: 0x1 + n_sect: 0 + n_desc: 768 + n_value: 0 + - n_strx: 91 + n_type: 0x1 + n_sect: 0 + n_desc: 1024 + n_value: 0 + - n_strx: 116 + n_type: 0x1 + n_sect: 0 + n_desc: 1024 + n_value: 0 + - n_strx: 142 + n_type: 0x1 + n_sect: 0 + n_desc: 1024 + n_value: 0 + StringTable: + - ' ' + - __mh_execute_header + - _main + - _NSLog + - '_OBJC_CLASS_$_NSArray' + - ___CFConstantStringClassReference + - _objc_autoreleasePoolPop + - _objc_autoreleasePoolPush + - _objc_msgSend + - '_objc_msgSend$arrayWithObjects:' + - '_objc_msgSend$objectAtIndex:' + - '' + - '' + - '' + - '' + - '' + - '' + - '' + IndirectSymbols: [ 0x4, 0x7, 0x8, 0x4, 0x7, 0x8, 0x9 ] + FunctionStarts: [ 0x3E50 ] +... diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/application_specific_info/main.m b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/application_specific_info/main.m new file mode 100644 index 0000000000000..e6745a81333d7 --- /dev/null +++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/Inputs/application_specific_info/main.m @@ -0,0 +1,13 @@ +#include + +int main(int argc, char *argv[]) { + @autoreleasepool { + + NSArray *crew = [NSArray arrayWithObjects:@"Jim", @"Jason", @"Jonas", @"Ismail", nil]; + + // This will throw an exception. + NSLog(@"%@", [crew objectAtIndex:10]); + } + + return 0; +} diff --git a/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/app_specific_backtrace_crashlog.test b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/app_specific_backtrace_crashlog.test new file mode 100644 index 0000000000000..266b1b4ee404d --- /dev/null +++ b/lldb/test/Shell/ScriptInterpreter/Python/Crashlog/app_specific_backtrace_crashlog.test @@ -0,0 +1,52 @@ +# REQUIRES: python, native && target-aarch64 && system-darwin + +# RUN: mkdir -p %t.dir +# RUN: yaml2obj %S/Inputs/application_specific_info/asi.yaml > %t.dir/asi +# RUN: %lldb -o 'command script import lldb.macosx.crashlog' \ +# RUN: -o 'crashlog -a -i -t %t.dir/asi %S/Inputs/application_specific_info/asi.ips' \ +# RUN: -o "thread list" -o "bt all" 2>&1 | FileCheck %s + +# CHECK: "crashlog" {{.*}} commands have been installed, use the "--help" options on these commands + +# CHECK: (lldb) process status --verbose +# CHECK-NEXT: Process 96535 stopped +# CHECK-NEXT: * thread #1, queue = 'com.apple.main-thread', stop reason = EXC_CRASH (code=0, subcode=0x0) +# CHECK-NEXT: frame #0: 0x00000001a08c7224{{.*}}[artificial] +# CHECK: Extended Crash Information: +# CHECK: Application Specific Information: +# CHECK-NEXT: CoreFoundation: *** Terminating app due to uncaught exception 'NSRangeException', reason: '*** __boundsFail: index 10 beyond bounds [0 .. 3]' +# CHECK-NEXT: libc++abi.dylib: terminating with uncaught exception of type NSException +# CHECK-NEXT: libsystem_c.dylib: abort() called + + +# CHECK: (lldb) thread backtrace --extended true +# CHECK-NEXT: * thread #1, queue = 'com.apple.main-thread', stop reason = EXC_CRASH (code=0, subcode=0x0) +# CHECK-NEXT: * frame #0: 0x00000001a08c7224{{.*}}[artificial] +# CHECK-NEXT: frame #1: 0x00000001a08fdceb{{.*}}[artificial] +# CHECK-NEXT: frame #2: 0x00000001a08372c7{{.*}}[artificial] +# CHECK-NEXT: frame #3: 0x00000001a08b7b17{{.*}}[artificial] +# CHECK-NEXT: frame #4: 0x00000001a08a7a0b{{.*}}[artificial] +# CHECK-NEXT: frame #5: 0x00000001a05ab763{{.*}}[artificial] +# CHECK-NEXT: frame #6: 0x00000001a08b6eb3{{.*}}[artificial] +# CHECK-NEXT: frame #7: 0x00000001a08b9c2b{{.*}}[artificial] +# CHECK-NEXT: frame #8: 0x00000001a08b9bd7{{.*}}[artificial] +# CHECK-NEXT: frame #9: 0x00000001a05a3007{{.*}}[artificial] +# CHECK-NEXT: frame #10: 0x00000001a0b3dcc3{{.*}}[artificial] +# CHECK-NEXT: frame #11: 0x00000001a0b46af3{{.*}}[artificial] +# CHECK-NEXT: frame #12: 0x00000001a09a12a3{{.*}}[artificial] +# CHECK-NEXT: frame #13: 0x00000001047e3ecf asi`main{{.*}}[artificial] +# CHECK-NEXT: frame #14: 0x00000001a05d3e4f{{.*}}[artificial] + +# CHECK: thread #4294967295: tid = 0x0001, 0x00000001a0a58418{{.*}}, queue = 'Application Specific Backtrace' +# CHECK-NEXT: frame #0: 0x00000001a0a58418{{.*}} +# CHECK-NEXT: frame #1: 0x00000001a05a2ea7{{.*}} +# CHECK-NEXT: frame #2: 0x00000001a0b3dcc3{{.*}} +# CHECK-NEXT: frame #3: 0x00000001a0b46af3{{.*}} +# CHECK-NEXT: frame #4: 0x00000001a09a12a3{{.*}} +# CHECK-NEXT: frame #5: 0x00000001047e3ecf asi`main{{.*}} +# CHECK-NEXT: frame #6: 0x00000001a05d3e4f dyld`start{{.*}} + + +# CHECK: (lldb) thread list +# CHECK-NEXT: Process 96535 stopped +# CHECK-NEXT: * thread #1: tid = 0x1af8f3, 0x00000001a08c7224{{.*}}, queue = 'com.apple.main-thread', stop reason = EXC_CRASH (code=0, subcode=0x0) diff --git a/lldb/test/Shell/Settings/Inputs/names.cpp b/lldb/test/Shell/Settings/Inputs/names.cpp index 461c6d091a0f4..cf6982abb8f35 100644 --- a/lldb/test/Shell/Settings/Inputs/names.cpp +++ b/lldb/test/Shell/Settings/Inputs/names.cpp @@ -1,5 +1,3 @@ -#include - namespace detail { template struct Quux {}; } // namespace detail @@ -7,15 +5,16 @@ template struct Quux {}; using FuncPtr = detail::Quux (*(*)(int))(float); struct Foo { - template void foo(T const &t) const noexcept(true) {} + template void foo(T arg) const noexcept(true) {} - template void operator<<(size_t) {} + template void operator<<(int) {} template FuncPtr returns_func_ptr(detail::Quux &&) const noexcept(false) { return nullptr; } }; namespace ns { -template int foo(T const &t) noexcept(false) { return 0; } +template int foo(char const *str) noexcept(false) { return 0; } +template int foo(T t) { return 1; } template FuncPtr returns_func_ptr(detail::Quux &&) { return nullptr; } } // namespace ns @@ -24,20 +23,20 @@ int bar() { return 1; } namespace { int anon_bar() { return 1; } -auto anon_lambda = [](std::function) mutable {}; +auto anon_lambda = [] {}; } // namespace int main() { - ns::foo(bar); - ns::foo(std::function{bar}); + ns::foo(bar); + ns::foo("bar"); ns::foo(anon_lambda); - ns::foo(std::function{anon_bar}); - ns::foo(&Foo::foo>); + ns::foo(anon_bar); + ns::foo)>("method"); ns::returns_func_ptr(detail::Quux{}); Foo f; - f.foo(std::function{bar}); - f.foo(std::function{anon_bar}); + f.foo(anon_bar); f.operator<< <(2 > 1)>(0); f.returns_func_ptr(detail::Quux{}); + return 0; } diff --git a/lldb/test/Shell/Settings/TestFrameFormatNameWithArgs.test b/lldb/test/Shell/Settings/TestFrameFormatNameWithArgs.test index d990114f57845..dc4dedadee80a 100644 --- a/lldb/test/Shell/Settings/TestFrameFormatNameWithArgs.test +++ b/lldb/test/Shell/Settings/TestFrameFormatNameWithArgs.test @@ -1,4 +1,4 @@ -# REQUIRES: system-darwin +# UNSUPPORTED: system-windows # RUN: %clangxx_host -g -O0 %S/Inputs/names.cpp -std=c++17 -o %t.out # RUN: %lldb -b -s %s %t.out | FileCheck %s settings set -f frame-format "frame ${function.name-with-args}\n" @@ -8,21 +8,19 @@ break set -n returns_func_ptr run # CHECK: frame int ns::foo(t={{.*}}) c -# CHECK: frame int ns::foo>(t= Function = bar() ) +# CHECK: frame int ns::foo(str="bar") c -# CHECK: frame int ns::foo<(anonymous namespace)::$_0>(t={{.*}}) +# CHECK: frame int ns::foo<(anonymous namespace)::$_0>(t=(anonymous namespace)::(unnamed class) @ {{.*}}) c -# CHECK: frame int ns::foo>(t= Function = (anonymous namespace)::anon_bar() ) +# CHECK: frame int ns::foo(t=({{.*}}`(anonymous namespace)::anon_bar() at {{.*}})) c -# CHECK: frame int ns::foo const&) const noexcept>(t={{.*}}) +# CHECK: frame int ns::foo(str="method") c # CHECK: frame ns::returns_func_ptr((null)={{.*}}) c -# CHECK: frame void Foo::foo>(this={{.*}}, t= Function = bar() ) const +# CHECK: frame void Foo::foo(this={{.*}}, arg=({{.*}}`(anonymous namespace)::anon_bar() at {{.*}})) c -# CHECK: frame void Foo::foo>(this={{.*}}, t= Function = (anonymous namespace)::anon_bar() ) const -c -# CHECK: frame void Foo::operator<<<1ul>(this={{.*}}, (null)=0) +# CHECK: frame void Foo::operator<<<1>(this={{.*}}, (null)=0) c # CHECK: frame Foo::returns_func_ptr(this={{.*}}, (null)={{.*}}) q diff --git a/lldb/test/Shell/SymbolFile/NativePDB/inline_sites.test b/lldb/test/Shell/SymbolFile/NativePDB/inline_sites.test index cf82f8b493568..e8319341084af 100644 --- a/lldb/test/Shell/SymbolFile/NativePDB/inline_sites.test +++ b/lldb/test/Shell/SymbolFile/NativePDB/inline_sites.test @@ -56,7 +56,7 @@ # https://github.com/llvm/llvm-project/issues/53575. Fix them after resolving # the issue. -# CEHCK-LABEL: (lldb) image lookup -a 0x140001003 -v +# CHECK-LABEL: (lldb) image lookup -a 0x140001003 -v # CHECK: Summary: {{.*}}`main + 3 at a.cpp:2 # CHECK: Function: id = {{.*}}, name = "main", range = [0x0000000140001000-0x0000000140001046) # CHECK: Blocks: id = {{.*}}, range = [0x140001000-0x140001046) @@ -64,7 +64,7 @@ # CHECK-NEXT: Variable: id = {{.*}}, name = "argc", type = "int", valid ranges = , location = [0x0000000140001000, 0x000000014000102d) -> DW_OP_reg26 XMM9 # CHECK-NEXT: Variable: id = {{.*}}, name = "argv", type = "char **", valid ranges = , location = [0x0000000140001000, 0x0000000140001045) -> DW_OP_reg3 RBX -# CEHCK-LABEL: (lldb) image lookup -a 0x140001004 -v +# CHECK-LABEL: (lldb) image lookup -a 0x140001004 -v # CHECK: Summary: {{.*}}`main + 4 [inlined] Namespace1::foo at a.h:5 # CHECK-NEXT: {{.*}}`main + 4 at a.cpp:3 # CHECK: Function: id = {{.*}}, name = "main", range = [0x0000000140001000-0x0000000140001046) @@ -77,7 +77,7 @@ # CHECK-NEXT: Variable: id = {{.*}}, name = "argv", type = "char **", valid ranges = , location = [0x0000000140001000, 0x0000000140001045) -> DW_OP_reg3 RBX # CHECK-NEXT: Variable: id = {{.*}}, name = "main_local", type = "int", valid ranges = , location = [0x0000000140001004, 0x0000000140001046) -> DW_OP_breg7 RSP+48 -# CEHCK-LABEL: (lldb) image lookup -a 0x140001010 -v +# CHECK-LABEL: (lldb) image lookup -a 0x140001010 -v # CHECK: Summary: {{.*}}`main + 16 [inlined] Namespace1::foo + 12 at a.h:7 # CHECK-NEXT: {{.*}}`main + 4 at a.cpp:3 # CHECK: Function: id = {{.*}}, name = "main", range = [0x0000000140001000-0x0000000140001046) @@ -90,7 +90,7 @@ # CHECK-NEXT: Variable: id = {{.*}}, name = "argv", type = "char **", valid ranges = , location = [0x0000000140001000, 0x0000000140001045) -> DW_OP_reg3 RBX # CHECK-NEXT: Variable: id = {{.*}}, name = "main_local", type = "int", valid ranges = , location = [0x0000000140001004, 0x0000000140001046) -> DW_OP_breg7 RSP+48 -# CEHCK-LABEL: (lldb) image lookup -a 0x14000101c -v +# CHECK-LABEL: (lldb) image lookup -a 0x14000101c -v # CHECK: Summary: {{.*}}`main + 28 [inlined] Class1::bar at b.h:5 # CHECK-NEXT: {{.*}}`main + 28 [inlined] Namespace1::foo + 24 at a.h:9 # CHECK-NEXT: {{.*}}`main + 4 at a.cpp:3 @@ -107,7 +107,7 @@ # CHECK-NEXT: Variable: id = {{.*}}, name = "argv", type = "char **", valid ranges = , location = [0x0000000140001000, 0x0000000140001045) -> DW_OP_reg3 RBX # CHECK-NEXT: Variable: id = {{.*}}, name = "main_local", type = "int", valid ranges = , location = [0x0000000140001004, 0x0000000140001046) -> DW_OP_breg7 RSP+48 -# CEHCK-LABEL: (lldb) image lookup -a 0x14000102a -v +# CHECK-LABEL: (lldb) image lookup -a 0x14000102a -v # CHECK: Summary: {{.*}}`main + 42 [inlined] Namespace2::Class2::func at c.h:5 # CHECK-NEXT: {{.*}}`main + 42 [inlined] Class1::bar + 14 at b.h:7 # CHECK-NEXT: {{.*}}`main + 28 [inlined] Namespace1::foo + 24 at a.h:9 @@ -127,7 +127,7 @@ # CHECK-NEXT: Variable: id = {{.*}}, name = "argv", type = "char **", valid ranges = , location = [0x0000000140001000, 0x0000000140001045) -> DW_OP_reg3 RBX # CHECK-NEXT: Variable: id = {{.*}}, name = "main_local", type = "int", valid ranges = , location = [0x0000000140001004, 0x0000000140001046) -> DW_OP_breg7 RSP+48 -# CEHCK-LABEL: (lldb) image lookup -a 0x140001039 -v +# CHECK-LABEL: (lldb) image lookup -a 0x140001039 -v # CHECK: Summary: {{.*}}`main + 57 at a.cpp:3 # CHECK: Function: id = {{.*}}, name = "main", range = [0x0000000140001000-0x0000000140001046) # CHECK: Blocks: id = {{.*}}, range = [0x140001000-0x140001046) @@ -135,7 +135,7 @@ # CHECK-NEXT: Variable: id = {{.*}}, name = "argv", type = "char **", valid ranges = , location = [0x0000000140001000, 0x0000000140001045) -> DW_OP_reg3 RBX # CHECK-NEXT: Variable: id = {{.*}}, name = "main_local", type = "int", valid ranges = , location = [0x0000000140001004, 0x0000000140001046) -> DW_OP_breg7 RSP+48 -# CEHCK-LABEL: (lldb) image lookup -a 0x140001044 -v +# CHECK-LABEL: (lldb) image lookup -a 0x140001044 -v # CHECK: Summary: {{.*}}`main + 68 [inlined] Namespace1::foo + 5 at a.h:8 # CHECK-NEXT: {{.*}}`main + 63 at a.cpp:3 # CHECK: Function: id = {{.*}}, name = "main", range = [0x0000000140001000-0x0000000140001046) diff --git a/lldb/tools/debugserver/source/CMakeLists.txt b/lldb/tools/debugserver/source/CMakeLists.txt index f636e387bf1f0..c6e7e8cf49e85 100644 --- a/lldb/tools/debugserver/source/CMakeLists.txt +++ b/lldb/tools/debugserver/source/CMakeLists.txt @@ -95,7 +95,7 @@ check_c_source_compiles( #else #error Not building for ARM64 #endif - int main() { return 0; } + int main(void) { return 0; } " BUILDING_FOR_ARM64_OSX ) diff --git a/lldb/unittests/Expression/DWARFExpressionTest.cpp b/lldb/unittests/Expression/DWARFExpressionTest.cpp index 35a064fc14bd8..4251eb0aecda9 100644 --- a/lldb/unittests/Expression/DWARFExpressionTest.cpp +++ b/lldb/unittests/Expression/DWARFExpressionTest.cpp @@ -8,6 +8,7 @@ #include "lldb/Expression/DWARFExpression.h" #include "Plugins/Platform/Linux/PlatformLinux.h" +#include "Plugins/SymbolFile/DWARF/DWARFDebugInfo.h" #include "Plugins/TypeSystem/Clang/TypeSystemClang.h" #include "TestingSupport/Symbol/YAMLModuleTester.h" #include "lldb/Core/Debugger.h" @@ -401,3 +402,115 @@ TEST_F(DWARFExpressionMockProcessTest, DW_OP_deref) { Evaluate({DW_OP_lit4, DW_OP_deref, DW_OP_stack_value}, {}, {}, &exe_ctx), llvm::HasValue(GetScalar(32, 0x07060504, false))); } + +TEST_F(DWARFExpressionMockProcessTest, WASM_DW_OP_addr) { + // Set up a wasm target + ArchSpec arch("wasm32-unknown-unknown-wasm"); + lldb::PlatformSP host_platform_sp = + platform_linux::PlatformLinux::CreateInstance(true, &arch); + ASSERT_TRUE(host_platform_sp); + Platform::SetHostPlatform(host_platform_sp); + lldb::DebuggerSP debugger_sp = Debugger::CreateInstance(); + ASSERT_TRUE(debugger_sp); + lldb::TargetSP target_sp; + lldb::PlatformSP platform_sp; + debugger_sp->GetTargetList().CreateTarget(*debugger_sp, "", arch, + lldb_private::eLoadDependentsNo, + platform_sp, target_sp); + + ExecutionContext exe_ctx(target_sp, false); + // DW_OP_addr takes a single operand of address size width: + uint8_t expr[] = {DW_OP_addr, 0x40, 0x0, 0x0, 0x0}; + DataExtractor extractor(expr, sizeof(expr), lldb::eByteOrderLittle, + /*addr_size*/ 4); + Value result; + Status status; + ASSERT_TRUE(DWARFExpression::Evaluate( + &exe_ctx, /*reg_ctx*/ nullptr, /*module_sp*/ {}, extractor, + /*unit*/ nullptr, lldb::eRegisterKindLLDB, + /*initial_value_ptr*/ nullptr, + /*object_address_ptr*/ nullptr, result, &status)) + << status.ToError(); + + ASSERT_EQ(result.GetValueType(), Value::ValueType::LoadAddress); +} + +TEST_F(DWARFExpressionMockProcessTest, WASM_DW_OP_addr_index) { + const char *yamldata = R"( +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_386 +DWARF: + debug_abbrev: + - Table: + - Code: 0x00000001 + Tag: DW_TAG_compile_unit + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_addr_base + Form: DW_FORM_sec_offset + + debug_info: + - Version: 5 + AddrSize: 4 + UnitType: DW_UT_compile + Entries: + - AbbrCode: 0x00000001 + Values: + - Value: 0x8 # Offset of the first Address past the header + - AbbrCode: 0x0 + + debug_addr: + - Version: 5 + AddressSize: 4 + Entries: + - Address: 0x1234 + - Address: 0x5678 +)"; + + // Can't use DWARFExpressionTester from above because subsystems overlap with + // the fixture. + SubsystemRAII subsystems; + llvm::Expected file = TestFile::fromYaml(yamldata); + EXPECT_THAT_EXPECTED(file, llvm::Succeeded()); + auto module_sp = std::make_shared(file->moduleSpec()); + auto *dwarf_cu = llvm::cast(module_sp->GetSymbolFile()) + ->DebugInfo() + .GetUnitAtIndex(0); + ASSERT_TRUE(dwarf_cu); + dwarf_cu->ExtractDIEsIfNeeded(); + + // Set up a wasm target + ArchSpec arch("wasm32-unknown-unknown-wasm"); + lldb::PlatformSP host_platform_sp = + platform_linux::PlatformLinux::CreateInstance(true, &arch); + ASSERT_TRUE(host_platform_sp); + Platform::SetHostPlatform(host_platform_sp); + lldb::DebuggerSP debugger_sp = Debugger::CreateInstance(); + ASSERT_TRUE(debugger_sp); + lldb::TargetSP target_sp; + lldb::PlatformSP platform_sp; + debugger_sp->GetTargetList().CreateTarget(*debugger_sp, "", arch, + lldb_private::eLoadDependentsNo, + platform_sp, target_sp); + + ExecutionContext exe_ctx(target_sp, false); + // DW_OP_addrx takes a single leb128 operand, the index in the addr table: + uint8_t expr[] = {DW_OP_addrx, 0x01}; + DataExtractor extractor(expr, sizeof(expr), lldb::eByteOrderLittle, + /*addr_size*/ 4); + Value result; + Status status; + ASSERT_TRUE(DWARFExpression::Evaluate( + &exe_ctx, /*reg_ctx*/ nullptr, /*module_sp*/ {}, extractor, dwarf_cu, + lldb::eRegisterKindLLDB, + /*initial_value_ptr*/ nullptr, + /*object_address_ptr*/ nullptr, result, &status)) + << status.ToError(); + + ASSERT_EQ(result.GetValueType(), Value::ValueType::LoadAddress); + ASSERT_EQ(result.GetScalar().UInt(), 0x5678u); +} diff --git a/lldb/unittests/Utility/CMakeLists.txt b/lldb/unittests/Utility/CMakeLists.txt index d697464600ff5..848a36215aa67 100644 --- a/lldb/unittests/Utility/CMakeLists.txt +++ b/lldb/unittests/Utility/CMakeLists.txt @@ -54,6 +54,10 @@ add_lldb_unittest(UtilityTests Support ) -add_unittest_inputs(UtilityTests +set(test_inputs StructuredData-basic.json + StructuredData-nested.json + StructuredData-full.json ) + +add_unittest_inputs(UtilityTests "${test_inputs}") diff --git a/lldb/unittests/Utility/Inputs/StructuredData-full.json b/lldb/unittests/Utility/Inputs/StructuredData-full.json new file mode 100644 index 0000000000000..4e4945cd6a280 --- /dev/null +++ b/lldb/unittests/Utility/Inputs/StructuredData-full.json @@ -0,0 +1,15 @@ +{ + "Array": [ + 3.14, + { + "key": "val" + } + ], + "Dictionary": { + "FalseBool": false + }, + "Integer": 1, + "Null": null, + "String": "value", + "TrueBool": true +} diff --git a/lldb/unittests/Utility/Inputs/StructuredData-nested.json b/lldb/unittests/Utility/Inputs/StructuredData-nested.json new file mode 100644 index 0000000000000..facf461bb6c1f --- /dev/null +++ b/lldb/unittests/Utility/Inputs/StructuredData-nested.json @@ -0,0 +1,14 @@ +{ + "my_dict": [ + { + "three": 3, + "two": 2 + }, + { + "four": { + "val": 4 + } + }, + 1 + ] +} diff --git a/lldb/unittests/Utility/StructuredDataTest.cpp b/lldb/unittests/Utility/StructuredDataTest.cpp index cb5e418cd958e..e536039f365a4 100644 --- a/lldb/unittests/Utility/StructuredDataTest.cpp +++ b/lldb/unittests/Utility/StructuredDataTest.cpp @@ -31,6 +31,73 @@ TEST(StructuredDataTest, StringDump) { } } +TEST(StructuredDataTest, GetDescriptionEmpty) { + Status status; + auto object_sp = StructuredData::ParseJSON("{}"); + ASSERT_NE(nullptr, object_sp); + + StreamString S; + object_sp->GetDescription(S); + EXPECT_EQ(0u, S.GetSize()); +} + +TEST(StructuredDataTest, GetDescriptionBasic) { + Status status; + std::string input = GetInputFilePath("StructuredData-basic.json"); + auto object_sp = StructuredData::ParseJSONFromFile(FileSpec(input), status); + ASSERT_NE(nullptr, object_sp); + + const std::string expected = "[0]: 1\n" + "[1]: 2\n" + "[2]: 3"; + + StreamString S; + object_sp->GetDescription(S); + EXPECT_EQ(expected, S.GetString()); +} + +TEST(StructuredDataTest, GetDescriptionNested) { + Status status; + std::string input = GetInputFilePath("StructuredData-nested.json"); + auto object_sp = StructuredData::ParseJSONFromFile(FileSpec(input), status); + ASSERT_NE(nullptr, object_sp); + + const std::string expected = "my_dict:\n" + " [0]:\n" + " three: 3\n" + " two: 2\n" + " [1]:\n" + " four:\n" + " val: 4\n" + " [2]: 1"; + + StreamString S; + object_sp->GetDescription(S); + EXPECT_EQ(expected, S.GetString()); +} + +TEST(StructuredDataTest, GetDescriptionFull) { + Status status; + std::string input = GetInputFilePath("StructuredData-full.json"); + auto object_sp = StructuredData::ParseJSONFromFile(FileSpec(input), status); + ASSERT_NE(nullptr, object_sp); + + const std::string expected = "Array:\n" + " [0]: 3.140000\n" + " [1]:\n" + " key: val\n" + "Dictionary:\n" + " FalseBool: False\n" + "Integer: 1\n" + "Null: NULL\n" + "String: value\n" + "TrueBool: True"; + + StreamString S; + object_sp->GetDescription(S); + EXPECT_EQ(expected, S.GetString()); +} + TEST(StructuredDataTest, ParseJSONFromFile) { Status status; auto object_sp = StructuredData::ParseJSONFromFile( diff --git a/llvm-spirv/include/LLVMSPIRVExtensions.inc b/llvm-spirv/include/LLVMSPIRVExtensions.inc index 1caaf7730b644..65b053675f074 100644 --- a/llvm-spirv/include/LLVMSPIRVExtensions.inc +++ b/llvm-spirv/include/LLVMSPIRVExtensions.inc @@ -55,3 +55,4 @@ EXT(SPV_INTEL_non_constant_addrspace_printf) EXT(SPV_INTEL_complex_float_mul_div) EXT(SPV_INTEL_split_barrier) EXT(SPV_INTEL_masked_gather_scatter) +EXT(SPV_INTEL_tensor_float32_conversion) diff --git a/llvm-spirv/lib/SPIRV/OCLToSPIRV.cpp b/llvm-spirv/lib/SPIRV/OCLToSPIRV.cpp index ea963743c536d..ae3611e814097 100644 --- a/llvm-spirv/lib/SPIRV/OCLToSPIRV.cpp +++ b/llvm-spirv/lib/SPIRV/OCLToSPIRV.cpp @@ -944,27 +944,24 @@ void OCLToSPIRVBase::visitCallReadImageWithSampler(CallInst *CI, assert(CI->getCalledFunction() && "Unexpected indirect call"); Function *Func = CI->getCalledFunction(); bool IsRetScalar = !CI->getType()->isVectorTy(); - SmallVector ArgStructTys; - getParameterTypes(CI, ArgStructTys); Type *Ret = CI->getType(); - auto *ImageTy = OCLTypeToSPIRVPtr->getAdaptedArgumentType(Func, 0).second; + auto *ImageTy = OCLTypeToSPIRVPtr->getAdaptedArgumentType(Func, 0); if (!ImageTy) - ImageTy = ArgStructTys[0]; - ImageTy = adaptSPIRVImageType(M, ImageTy); - auto *SampledImgStructTy = getSPIRVStructTypeByChangeBaseTypeName( - M, ImageTy, kSPIRVTypeName::Image, kSPIRVTypeName::SampledImg); - auto *SampledImgTy = PointerType::get(SampledImgStructTy, SPIRAS_Global); - Value *SampledImgArgs[] = {CI->getArgOperand(0), CI->getArgOperand(1)}; - auto *SampledImg = addCallInstSPIRV(M, getSPIRVFuncName(OpSampledImage), - SampledImgTy, SampledImgArgs, nullptr, - {ArgStructTys[0], ArgStructTys[1]}, CI, - kSPIRVName::TempSampledImage); + ImageTy = getCallValueType(CI, 0); auto Mutator = mutateCallInst( CI, getSPIRVFuncName(OpImageSampleExplicitLod, std::string(kSPIRVPostfix::ExtDivider) + getPostfixForReturnType(Ret))); - Mutator.replaceArg(0, {SampledImg, SampledImgStructTy}).removeArg(1); + Mutator.mapArg(0, [&](IRBuilder<> &Builder, Value *ImgArg, Type *ImgType) { + auto *SampledImgTy = adjustImageType(ImageTy, kSPIRVTypeName::Image, + kSPIRVTypeName::SampledImg); + Value *SampledImgArgs[] = {CI->getArgOperand(0), CI->getArgOperand(1)}; + return addSPIRVCallPair(Builder, OpSampledImage, SampledImgTy, + SampledImgArgs, {ImgType, Mutator.getType(1)}, + kSPIRVName::TempSampledImage); + }); + Mutator.removeArg(1); unsigned ImgOpMask = getImageSignZeroExt(DemangledName); unsigned ImgOpMaskInsIndex = Mutator.arg_size(); switch (Mutator.arg_size()) { @@ -997,15 +994,7 @@ void OCLToSPIRVBase::visitCallReadImageWithSampler(CallInst *CI, void OCLToSPIRVBase::visitCallGetImageSize(CallInst *CI, StringRef DemangledName) { - StringRef TyName; - SmallVector SubStrs; - SmallVector ParamTys; - getParameterTypes(CI, ParamTys); - auto IsImg = isOCLImageStructType(ParamTys[0], &TyName); - (void)IsImg; - assert(IsImg); - std::string ImageTyName = getImageBaseTypeName(TyName); - auto Desc = map(ImageTyName); + auto Desc = getImageDescriptor(getCallValueType(CI, 0)); unsigned Dim = getImageDimension(Desc.Dim) + Desc.Arrayed; assert(Dim > 0 && "Invalid image dimension."); assert(CI->arg_size() == 1); @@ -1131,8 +1120,10 @@ void OCLToSPIRVBase::visitCallToAddr(CallInst *CI, StringRef DemangledName) { Mutator .mapArg(Mutator.arg_size() - 1, [&](Value *V) { - return std::pair( - castToInt8Ptr(V, CI), Type::getInt8Ty(V->getContext())); + return std::make_pair( + castToInt8Ptr(V, CI), + TypedPointerType::get(Type::getInt8Ty(V->getContext()), + SPIRAS_Generic)); }) .appendArg(StorageClass); }; @@ -1497,9 +1488,7 @@ void OCLToSPIRVBase::processSubgroupBlockReadWriteINTEL( // reads and vector block reads. void OCLToSPIRVBase::visitSubgroupBlockReadINTEL(CallInst *CI) { OCLBuiltinTransInfo Info; - SmallVector ParamTys; - getParameterTypes(CI, ParamTys); - if (isOCLImageStructType(ParamTys[0])) + if (isOCLImageType(getCallValueType(CI, 0))) Info.UniqName = getSPIRVFuncName(spv::OpSubgroupImageBlockReadINTEL); else Info.UniqName = getSPIRVFuncName(spv::OpSubgroupBlockReadINTEL); @@ -1512,9 +1501,7 @@ void OCLToSPIRVBase::visitSubgroupBlockReadINTEL(CallInst *CI) { // instructions. void OCLToSPIRVBase::visitSubgroupBlockWriteINTEL(CallInst *CI) { OCLBuiltinTransInfo Info; - SmallVector ParamTys; - getParameterTypes(CI, ParamTys); - if (isOCLImageStructType(ParamTys[0])) + if (isOCLImageType(getCallValueType(CI, 0))) Info.UniqName = getSPIRVFuncName(spv::OpSubgroupImageBlockWriteINTEL); else Info.UniqName = getSPIRVFuncName(spv::OpSubgroupBlockWriteINTEL); @@ -1614,7 +1601,7 @@ void OCLToSPIRVBase::visitSubgroupAVCWrapperBuiltinCall( std::string MCETName = std::string(kOCLSubgroupsAVCIntel::TypePrefix) + "mce_" + TyKind + "_t"; auto *MCESTy = getSubgroupAVCIntelMCEType(M, MCETName); - auto *MCETy = PointerType::get(MCESTy, SPIRAS_Private); + auto *MCETy = TypedPointerType::get(MCESTy, SPIRAS_Private); std::string ToMCEFName = Prefix + OpKind + "_convert_to_mce_" + TyKind; Op ToMCEOC = OpNop; OCLSPIRVSubgroupAVCIntelBuiltinMap::find(ToMCEFName, &ToMCEOC); @@ -1631,28 +1618,24 @@ void OCLToSPIRVBase::visitSubgroupAVCWrapperBuiltinCall( mutateCallInst(CI, WrappedOC) .mapArg(CI->arg_size() - 1, - [&](Value *Arg, Type *ParamTy) { + [&](IRBuilder<> &Builder, Value *Arg, Type *ParamTy) { // Create conversion function call for the last operand - return std::pair( - addCallInstSPIRV(M, getSPIRVFuncName(ToMCEOC), MCETy, Arg, - nullptr, {ParamTy}, CI, ""), - MCESTy); + return addSPIRVCallPair(Builder, ToMCEOC, MCETy, {Arg}, + {ParamTy}); }) - .changeReturnType(MCETy, [=](IRBuilder<> &, CallInst *NewCI) { + .changeReturnType(MCETy, [&](IRBuilder<> &Builder, CallInst *NewCI) { // Create conversion function call for the return result - return addCallInstSPIRV(M, getSPIRVFuncName(FromMCEOC), CI->getType(), - NewCI, nullptr, {MCESTy}, CI, ""); + return addSPIRVCall(Builder, FromMCEOC, CI->getType(), {NewCI}, + {MCETy}); }); } else { // Wrapper built-ins which take the 'result_t' argument requires only one // conversion for the argument mutateCallInst(CI, WrappedOC) - .mapArg(CI->arg_size() - 1, [&](Value *Arg, Type *ParamTy) { + .mapArg(CI->arg_size() - 1, [&](IRBuilder<> &Builder, Value *Arg, + Type *ParamTy) { // Create conversion function call for the last operand - return std::pair( - addCallInstSPIRV(M, getSPIRVFuncName(ToMCEOC), MCETy, Arg, - nullptr, {ParamTy}, CI, ""), - MCESTy); + return addSPIRVCallPair(Builder, ToMCEOC, MCETy, {Arg}, {ParamTy}); }); } } @@ -1676,9 +1659,8 @@ void OCLToSPIRVBase::visitSubgroupAVCBuiltinCallWithSampler( return; // this is not a VME built-in SmallVector ParamTys; - getParameterTypes(CI, ParamTys); - auto *TyIt = - std::find_if(ParamTys.begin(), ParamTys.end(), isSamplerStructTy); + getParameterTypes(CI->getCalledFunction(), ParamTys); + auto *TyIt = std::find_if(ParamTys.begin(), ParamTys.end(), isSamplerTy); assert(TyIt != ParamTys.end() && "Invalid Subgroup AVC Intel built-in call"); unsigned SamplerIndex = TyIt - ParamTys.begin(); Value *SamplerVal = CI->getOperand(SamplerIndex); @@ -1687,30 +1669,24 @@ void OCLToSPIRVBase::visitSubgroupAVCBuiltinCallWithSampler( SmallVector AdaptedTys; for (unsigned I = 0; I < CI->arg_size(); I++) AdaptedTys.push_back( - OCLTypeToSPIRVPtr->getAdaptedArgumentType(CI->getCalledFunction(), I) - .second); + OCLTypeToSPIRVPtr->getAdaptedArgumentType(CI->getCalledFunction(), I)); auto *AdaptedIter = AdaptedTys.begin(); mutateCallInst(CI, OC) - .mapArgs([&](Value *Arg, Type *PointerTy) { - if (!isOCLImageStructType(PointerTy)) - return std::make_pair(Arg, PointerTy); + .mapArgs([&](IRBuilder<> &Builder, Value *Arg, Type *ArgTy) { + if (!isOCLImageType(ArgTy)) + return BuiltinCallMutator::ValueTypePair(Arg, ArgTy); auto *ImageTy = *AdaptedIter++; if (!ImageTy) - ImageTy = PointerTy; - ImageTy = adaptSPIRVImageType(M, ImageTy); - auto *SampledImgStructTy = getSPIRVStructTypeByChangeBaseTypeName( - M, ImageTy, kSPIRVTypeName::Image, kSPIRVTypeName::VmeImageINTEL); - auto *SampledImgTy = - PointerType::get(SampledImgStructTy, SPIRAS_Global); + ImageTy = ArgTy; + auto *SampledImgTy = adjustImageType(ImageTy, kSPIRVTypeName::Image, + kSPIRVTypeName::VmeImageINTEL); Value *SampledImgArgs[] = {Arg, SamplerVal}; - return std::pair( - addCallInstSPIRV(M, getSPIRVFuncName(OpVmeImageINTEL), SampledImgTy, - SampledImgArgs, nullptr, {PointerTy, SamplerTy}, - CI, kSPIRVName::TempSampledImage), - SampledImgStructTy); + return addSPIRVCallPair(Builder, OpVmeImageINTEL, SampledImgTy, + SampledImgArgs, {ArgTy, SamplerTy}, + kSPIRVName::TempSampledImage); }) .removeArg(SamplerIndex); } diff --git a/llvm-spirv/lib/SPIRV/OCLTypeToSPIRV.cpp b/llvm-spirv/lib/SPIRV/OCLTypeToSPIRV.cpp index ce0d6ac0de8c6..d742944bb2697 100644 --- a/llvm-spirv/lib/SPIRV/OCLTypeToSPIRV.cpp +++ b/llvm-spirv/lib/SPIRV/OCLTypeToSPIRV.cpp @@ -106,12 +106,11 @@ bool OCLTypeToSPIRVBase::runOCLTypeToSPIRV(Module &Module) { return false; } -void OCLTypeToSPIRVBase::addAdaptedType(Value *V, Type *Ty, - unsigned AddrSpace) { +void OCLTypeToSPIRVBase::addAdaptedType(Value *V, Type *Ty) { LLVM_DEBUG(dbgs() << "[add adapted type] "; V->printAsOperand(dbgs(), true, M); dbgs() << " => " << *Ty << '\n'); - AdaptedTy[V] = {Ty, AddrSpace}; + AdaptedTy[V] = Ty; } void OCLTypeToSPIRVBase::addWork(Function *F) { @@ -133,17 +132,16 @@ void OCLTypeToSPIRVBase::adaptFunction(Function *F) { auto Loc = AdaptedTy.find(&I); auto Found = (Loc != AdaptedTy.end()); Changed |= Found; - ArgTys.push_back(Found ? Loc->second.first : I.getType()); + ArgTys.push_back(Found ? Loc->second : I.getType()); if (Found) { - auto *Ty = Loc->second.first; - unsigned AddrSpace = Loc->second.second; + Type *Ty = Loc->second; for (auto &U : I.uses()) { if (auto *CI = dyn_cast(U.getUser())) { auto ArgIndex = CI->getArgOperandNo(&U); auto CF = CI->getCalledFunction(); if (AdaptedTy.count(CF) == 0) { - addAdaptedType(CF->getArg(ArgIndex), Ty, AddrSpace); + addAdaptedType(CF->getArg(ArgIndex), Ty); addWork(CF); } } @@ -156,7 +154,7 @@ void OCLTypeToSPIRVBase::adaptFunction(Function *F) { auto FT = F->getFunctionType(); FT = FunctionType::get(FT->getReturnType(), ArgTys, FT->isVarArg()); - addAdaptedType(F, FT, 0); + addAdaptedType(F, TypedPointerType::get(FT, 0)); } // Handle functions with sampler arguments that don't get called by @@ -181,7 +179,8 @@ void OCLTypeToSPIRVBase::adaptArgumentsBySamplerUse(Module &M) { AdaptedTy.count(SamplerArg) != 0) // Already traced this, move on. continue; - addAdaptedType(SamplerArg, getSamplerStructType(&M), SPIRAS_Constant); + addAdaptedType(SamplerArg, TypedPointerType::get(getSamplerStructType(&M), + SPIRAS_Constant)); auto Caller = cast(SamplerArg)->getParent(); addWork(Caller); TraceArg(Caller, cast(SamplerArg)->getArgNo()); @@ -209,15 +208,16 @@ void OCLTypeToSPIRVBase::adaptFunctionArguments(Function *F) { bool Changed = false; auto Arg = F->arg_begin(); SmallVector ParamTys; - getParameterTypes(F, ParamTys); // If we couldn't get any information from demangling, there is nothing that // can be done. - if (ParamTys.empty()) + if (!getParameterTypes(F, ParamTys)) return; for (unsigned I = 0; I < F->arg_size(); ++I, ++Arg) { - StructType *NewTy = dyn_cast_or_null(ParamTys[I]); + StructType *NewTy = nullptr; + if (auto *TPT = dyn_cast(ParamTys[I])) + NewTy = dyn_cast_or_null(TPT->getElementType()); if (NewTy && NewTy->isOpaque()) { auto STName = NewTy->getStructName(); if (!hasAccessQualifiedName(STName)) @@ -225,10 +225,10 @@ void OCLTypeToSPIRVBase::adaptFunctionArguments(Function *F) { if (STName.startswith(kSPR2TypeName::ImagePrefix)) { auto Ty = STName.str(); auto AccStr = getAccessQualifierFullName(Ty); - addAdaptedType( - &*Arg, - getOrCreateOpaqueStructType(M, mapOCLTypeNameToSPIRV(Ty, AccStr)), - SPIRAS_Global); + addAdaptedType(&*Arg, TypedPointerType::get( + getOrCreateOpaqueStructType( + M, mapOCLTypeNameToSPIRV(Ty, AccStr)), + SPIRAS_Global)); Changed = true; } } @@ -249,7 +249,8 @@ void OCLTypeToSPIRVBase::adaptArgumentsByMetadata(Function *F) { for (unsigned I = 0, E = TypeMD->getNumOperands(); I != E; ++I, ++Arg) { auto OCLTyStr = getMDOperandAsString(TypeMD, I); if (OCLTyStr == OCL_TYPE_NAME_SAMPLER_T) { - addAdaptedType(&(*Arg), getSamplerStructType(M), SPIRAS_Constant); + addAdaptedType(&(*Arg), TypedPointerType::get(getSamplerStructType(M), + SPIRAS_Constant)); Changed = true; } else if (OCLTyStr.startswith("image") && OCLTyStr.endswith("_t")) { auto Ty = (Twine("opencl.") + OCLTyStr).str(); @@ -257,10 +258,10 @@ void OCLTypeToSPIRVBase::adaptArgumentsByMetadata(Function *F) { auto AccMD = F->getMetadata(SPIR_MD_KERNEL_ARG_ACCESS_QUAL); assert(AccMD && "Invalid access qualifier metadata"); auto AccStr = getMDOperandAsString(AccMD, I); - addAdaptedType( - &(*Arg), - getOrCreateOpaqueStructType(M, mapOCLTypeNameToSPIRV(Ty, AccStr)), - SPIRAS_Global); + addAdaptedType(&(*Arg), TypedPointerType::get( + getOrCreateOpaqueStructType( + M, mapOCLTypeNameToSPIRV(Ty, AccStr)), + SPIRAS_Global)); Changed = true; } } @@ -297,15 +298,12 @@ void OCLTypeToSPIRVBase::adaptArgumentsByMetadata(Function *F) { // opencl data type x and access qualifier y, and use opencl.image_x.y to // represent image_x type with access qualifier y. // -std::pair -OCLTypeToSPIRVBase::getAdaptedArgumentType(Function *F, unsigned ArgNo) { +Type *OCLTypeToSPIRVBase::getAdaptedArgumentType(Function *F, unsigned ArgNo) { Value *Arg = F->getArg(ArgNo); auto Loc = AdaptedTy.find(Arg); if (Loc == AdaptedTy.end()) - return {nullptr, nullptr}; - Type *PointeeTy = Loc->second.first; - Type *PointerTy = PointerType::get(PointeeTy, Loc->second.second); - return {PointerTy, PointeeTy}; + return nullptr; + return Loc->second; } } // namespace SPIRV diff --git a/llvm-spirv/lib/SPIRV/OCLTypeToSPIRV.h b/llvm-spirv/lib/SPIRV/OCLTypeToSPIRV.h index 17d3e7dc4a2a3..b0034acfda930 100644 --- a/llvm-spirv/lib/SPIRV/OCLTypeToSPIRV.h +++ b/llvm-spirv/lib/SPIRV/OCLTypeToSPIRV.h @@ -59,25 +59,22 @@ class OCLTypeToSPIRVBase { bool runOCLTypeToSPIRV(llvm::Module &M); - /// Returns the adapted type of the corresponding argument for a function. - /// The first value of the returned pair is the LLVM type of the argument. - /// The second value of the returned pair is the pointer element type of the - /// argument, if the type is a pointer. - std::pair - getAdaptedArgumentType(llvm::Function *F, unsigned ArgNo); + /// Returns the adapted type of the corresponding argument for a function. If + /// the type is a pointer type, it will return a TypedPointerType instead. + llvm::Type *getAdaptedArgumentType(llvm::Function *F, unsigned ArgNo); private: llvm::Module *M; llvm::LLVMContext *Ctx; - // Map of argument/Function -> {pointee type, address space} - std::map> AdaptedTy; + // Map of argument/Function -> adapted type (probably TypedPointerType) + std::map AdaptedTy; std::set WorkSet; // Functions to be adapted void adaptFunctionArguments(llvm::Function *F); void adaptArgumentsByMetadata(llvm::Function *F); void adaptArgumentsBySamplerUse(llvm::Module &M); void adaptFunction(llvm::Function *F); - void addAdaptedType(llvm::Value *V, llvm::Type *PointeeTy, unsigned AS); + void addAdaptedType(llvm::Value *V, llvm::Type *Ty); void addWork(llvm::Function *F); }; diff --git a/llvm-spirv/lib/SPIRV/OCLUtil.cpp b/llvm-spirv/lib/SPIRV/OCLUtil.cpp index d83e36e4a6f5c..5be31a328e036 100644 --- a/llvm-spirv/lib/SPIRV/OCLUtil.cpp +++ b/llvm-spirv/lib/SPIRV/OCLUtil.cpp @@ -1336,9 +1336,12 @@ Value *unwrapSpecialTypeInitializer(Value *V) { return nullptr; } -bool isSamplerStructTy(Type *Ty) { - auto *STy = dyn_cast_or_null(Ty); - return STy && STy->hasName() && STy->getName() == kSPR2TypeName::Sampler; +bool isSamplerTy(Type *Ty) { + if (auto *TPT = dyn_cast_or_null(Ty)) { + auto *STy = dyn_cast_or_null(TPT->getElementType()); + return STy && STy->hasName() && STy->getName() == kSPR2TypeName::Sampler; + } + return false; } bool isPipeOrAddressSpaceCastBI(const StringRef MangledName) { diff --git a/llvm-spirv/lib/SPIRV/OCLUtil.h b/llvm-spirv/lib/SPIRV/OCLUtil.h index bd4f6dcfc217e..9eb3166571c6a 100644 --- a/llvm-spirv/lib/SPIRV/OCLUtil.h +++ b/llvm-spirv/lib/SPIRV/OCLUtil.h @@ -499,7 +499,7 @@ bool isEnqueueKernelBI(const StringRef MangledName); bool isKernelQueryBI(const StringRef MangledName); /// Check that the type is the sampler_t -bool isSamplerStructTy(Type *Ty); +bool isSamplerTy(Type *Ty); // Checks if the binary operator is an unfused fmul + fadd instruction. bool isUnfusedMulAdd(BinaryOperator *B); diff --git a/llvm-spirv/lib/SPIRV/SPIRVBuiltinHelper.cpp b/llvm-spirv/lib/SPIRV/SPIRVBuiltinHelper.cpp index fae86366f40d1..915de2a01aaee 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVBuiltinHelper.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVBuiltinHelper.cpp @@ -64,9 +64,15 @@ BuiltinCallMutator::BuiltinCallMutator( : CI(CI), FuncName(FuncName), Attrs(CI->getCalledFunction()->getAttributes()), ReturnTy(CI->getType()), Args(CI->args()), Rules(Rules), Builder(CI) { - getParameterTypes(CI->getCalledFunction(), PointerTypes, - std::move(NameMapFn)); - PointerTypes.resize(Args.size(), nullptr); + bool DidDemangle = getParameterTypes(CI->getCalledFunction(), PointerTypes, + std::move(NameMapFn)); + if (!DidDemangle) { + // TODO: PipeBlocking.ll causes demangling failures. + // assert(isNonMangledOCLBuiltin(CI->getCalledFunction()->getName()) && + // "SPIR-V builtin functions should be mangled"); + for (Value *Arg : Args) + PointerTypes.push_back(Arg->getType()); + } } BuiltinCallMutator::BuiltinCallMutator(BuiltinCallMutator &&Other) @@ -84,9 +90,15 @@ Value *BuiltinCallMutator::doConversion() { assert(CI && "Need to have a call instruction to do the conversion"); auto Mangler = makeMangler(CI, Rules); for (unsigned I = 0; I < Args.size(); I++) { - Mangler->getTypeMangleInfo(I).PointerTy = PointerTypes[I]; + Mangler->getTypeMangleInfo(I).PointerTy = + dyn_cast(PointerTypes[I]); } assert(Attrs.getNumAttrSets() <= Args.size() + 2 && "Too many attributes?"); + + // Sanitize the return type, in case it's a TypedPointerType. + if (auto *TPT = dyn_cast(ReturnTy)) + ReturnTy = PointerType::get(TPT->getElementType(), TPT->getAddressSpace()); + CallInst *NewCall = Builder.Insert(addCallInst(CI->getModule(), FuncName, ReturnTy, Args, &Attrs, nullptr, Mangler.get())); @@ -110,7 +122,7 @@ BuiltinCallMutator &BuiltinCallMutator::setArgs(ArrayRef NewArgs) { assert(!Arg->getType()->isPointerTy() && "Cannot use this signature with pointer types"); Args.push_back(Arg); - PointerTypes.emplace_back(); + PointerTypes.push_back(Arg->getType()); } return *this; } @@ -151,23 +163,10 @@ static void moveAttributes(LLVMContext &Ctx, AttributeList &Attrs, Attrs = AttributeList::get(Ctx, NewAttrs); } -// Convert a ValueTypePair to a TypedPointerType for storing in the PointerTypes -// array. -static TypedPointerType *toTPT(BuiltinCallMutator::ValueTypePair Pair) { - if (!Pair.second) - return nullptr; - unsigned AS = 0; - if (auto *TPT = dyn_cast(Pair.first->getType())) - AS = TPT->getAddressSpace(); - else if (isa(Pair.first->getType())) - AS = Pair.first->getType()->getPointerAddressSpace(); - return TypedPointerType::get(Pair.second, AS); -} - BuiltinCallMutator &BuiltinCallMutator::insertArg(unsigned Index, ValueTypePair Arg) { Args.insert(Args.begin() + Index, Arg.first); - PointerTypes.insert(PointerTypes.begin() + Index, toTPT(Arg)); + PointerTypes.insert(PointerTypes.begin() + Index, Arg.second); moveAttributes(CI->getContext(), Attrs, Index, Args.size() - Index, Index + 1); return *this; @@ -176,7 +175,7 @@ BuiltinCallMutator &BuiltinCallMutator::insertArg(unsigned Index, BuiltinCallMutator &BuiltinCallMutator::replaceArg(unsigned Index, ValueTypePair Arg) { Args[Index] = Arg.first; - PointerTypes[Index] = toTPT(Arg); + PointerTypes[Index] = Arg.second; Attrs = Attrs.removeParamAttributes(CI->getContext(), Index); return *this; } @@ -211,3 +210,71 @@ BuiltinCallMutator BuiltinCallHelper::mutateCallInst(CallInst *CI, std::string FuncName) { return BuiltinCallMutator(CI, std::move(FuncName), Rules, NameMapFn); } + +Value *BuiltinCallHelper::addSPIRVCall(IRBuilder<> &Builder, spv::Op Opcode, + Type *ReturnTy, ArrayRef Args, + ArrayRef ArgTys, + const Twine &Name) { + // Sanitize the return type, in case it's a TypedPointerType. + if (auto *TPT = dyn_cast(ReturnTy)) + ReturnTy = PointerType::get(TPT->getElementType(), TPT->getAddressSpace()); + + // Copy the types into the mangling info. + BuiltinFuncMangleInfo BtnInfo; + for (unsigned I = 0; I < ArgTys.size(); I++) { + if (Args[I]->getType()->isPointerTy()) { + assert(cast(Args[I]->getType()) + ->isOpaqueOrPointeeTypeMatches( + cast(ArgTys[I])->getElementType())); + BtnInfo.getTypeMangleInfo(I).PointerTy = ArgTys[I]; + } + } + + // Create the function and the call. + auto *F = getOrCreateFunction(M, ReturnTy, getTypes(Args), + getSPIRVFuncName(Opcode), &BtnInfo); + return Builder.CreateCall(F, Args, ReturnTy->isVoidTy() ? "" : Name); +} + +Type *BuiltinCallHelper::adjustImageType(Type *T, StringRef OldImageKind, + StringRef NewImageKind) { + if (auto *TypedPtrTy = dyn_cast(T)) { + Type *StructTy = TypedPtrTy->getElementType(); + // Adapt opencl.* struct type names to spirv.* struct type names. + if (isOCLImageType(T)) { + auto ImageTypeName = StructTy->getStructName(); + StringRef Acc = kAccessQualName::ReadOnly; + if (hasAccessQualifiedName(ImageTypeName)) + Acc = getAccessQualifierFullName(ImageTypeName); + StructTy = getOrCreateOpaqueStructType( + M, mapOCLTypeNameToSPIRV(ImageTypeName, Acc)); + } + + // Change type name (e.g., spirv.Image -> spirv.SampledImg) if necessary. + StringRef Postfixes; + if (isSPIRVStructType(StructTy, OldImageKind, &Postfixes)) + StructTy = getOrCreateOpaqueStructType( + M, getSPIRVTypeName(NewImageKind, Postfixes)); + else { + report_fatal_error("Type did not have expected image kind"); + } + return TypedPointerType::get(StructTy, TypedPtrTy->getAddressSpace()); + } + report_fatal_error("Expected type to be a SPIRV image type"); +} + +BuiltinCallMutator::ValueTypePair +BuiltinCallHelper::getCallValue(CallInst *CI, unsigned ArgNo) { + Function *CalledFunc = CI->getCalledFunction(); + assert(CalledFunc && "Unexpected indirect call"); + if (CalledFunc != CachedFunc) { + CachedFunc = CalledFunc; + [[maybe_unused]] bool DidDemangle = + getParameterTypes(CalledFunc, CachedParameterTypes, NameMapFn); + assert(DidDemangle && "Expected SPIR-V builtins to be properly mangled"); + } + + Value *ParamValue = CI->getArgOperand(ArgNo); + Type *ParamType = CachedParameterTypes[ArgNo]; + return {ParamValue, ParamType}; +} diff --git a/llvm-spirv/lib/SPIRV/SPIRVBuiltinHelper.h b/llvm-spirv/lib/SPIRV/SPIRVBuiltinHelper.h index e5658a0f0db89..90774a6594a48 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVBuiltinHelper.h +++ b/llvm-spirv/lib/SPIRV/SPIRVBuiltinHelper.h @@ -80,7 +80,7 @@ class BuiltinCallMutator { // The arguments for the new call instruction. llvm::SmallVector Args; // The pointer element types for the new call instruction. - llvm::SmallVector PointerTypes; + llvm::SmallVector PointerTypes; // The mangler rules to use for the new call instruction. ManglingRules Rules; @@ -116,11 +116,14 @@ class BuiltinCallMutator { /// Get the corresponding argument for the new call. llvm::Value *getArg(unsigned Index) const { return Args[Index]; } + llvm::Type *getType(unsigned Index) const { return PointerTypes[Index]; } + /// Return the pointer element type of the corresponding index, or nullptr if /// it is not a pointer. llvm::Type *getPointerElementType(unsigned Index) const { - llvm::TypedPointerType *ElTy = PointerTypes[Index]; - return ElTy ? ElTy->getElementType() : nullptr; + if (auto *TPT = llvm::dyn_cast(PointerTypes[Index])) + return TPT->getElementType(); + return nullptr; } /// A pair representing both the LLVM value of an argument and its @@ -128,7 +131,7 @@ class BuiltinCallMutator { /// implicit conversion from an LLVM value object (but only if it is not of /// pointer type), or by the appropriate std::pair type. struct ValueTypePair : public std::pair { - ValueTypePair(llvm::Value *V) : pair(V, nullptr) { + ValueTypePair(llvm::Value *V) : pair(V, V->getType()) { assert(!V->getType()->isPointerTy() && "Must specify a pointer element type if value is a pointer."); } @@ -181,7 +184,7 @@ class BuiltinCallMutator { BuiltinCallMutator &moveArg(unsigned FromIndex, unsigned ToIndex) { if (FromIndex == ToIndex) return *this; - ValueTypePair Pair(Args[FromIndex], getPointerElementType(FromIndex)); + ValueTypePair Pair(Args[FromIndex], getType(FromIndex)); removeArg(FromIndex); insertArg(ToIndex, Pair); return *this; @@ -200,15 +203,15 @@ class BuiltinCallMutator { /// When present, the IRBuilder parameter corresponds to a builder that is set /// to insert immediately before the new call instruction. The Value parameter /// corresponds to the argument to be mutated. The Type parameter, when - /// present, corresponds to the pointer element type of the argument, or null - /// when it is not present. + /// present, will be either a TypedPointerType representing the "true" type of + /// the value, or the argument's type otherwise. template BuiltinCallMutator &mapArg(unsigned Index, FnType Func) { using namespace llvm; using std::is_invocable; IRBuilder<> Builder(CI); Value *V = Args[Index]; - [[maybe_unused]] Type *T = getPointerElementType(Index); + [[maybe_unused]] Type *T = getType(Index); // Dispatch the function call as appropriate, based on the types that the // function may be called with. @@ -272,6 +275,52 @@ class BuiltinCallHelper { /// to the given SPIR-V opcode (whose name is used in the lookup map of /// getSPIRVFuncName). BuiltinCallMutator mutateCallInst(llvm::CallInst *CI, spv::Op Opcode); + + /// Create a call to a SPIR-V builtin function (specified via opcode). + /// The return type and argument types may be TypedPointerType, if the actual + /// LLVM type is a pointer type. + llvm::Value *addSPIRVCall(llvm::IRBuilder<> &Builder, spv::Op Opcode, + llvm::Type *ReturnTy, + llvm::ArrayRef Args, + llvm::ArrayRef ArgTys, + const llvm::Twine &Name = ""); + + /// Create a call to a SPIR-V builtin function, returning a value and type + /// pair suitable for use in BuiltinCallMutator::replaceArg and similar + /// functions. + BuiltinCallMutator::ValueTypePair + addSPIRVCallPair(llvm::IRBuilder<> &Builder, spv::Op Opcode, + llvm::Type *ReturnTy, llvm::ArrayRef Args, + llvm::ArrayRef ArgTys, + const llvm::Twine &Name = "") { + llvm::Value *V = + addSPIRVCall(Builder, Opcode, ReturnTy, Args, ArgTys, Name); + return BuiltinCallMutator::ValueTypePair(V, ReturnTy); + } + + /// Adapt the various SPIR-V image types, for example changing a "spirv.Image" + /// type into a "spirv.SampledImage" type with identical parameters. + /// + /// The input type is expected to be a TypedPointerType to either a + /// "spirv.*" or "opencl.*" struct type. In the case of "opencl.*" struct + /// types, it will first convert it into the corresponding "spirv.Image" + /// struct type. + /// + /// If the image type does not match OldImageKind, this method will abort. + llvm::Type *adjustImageType(llvm::Type *T, llvm::StringRef OldImageKind, + llvm::StringRef NewImageKind); + +private: + llvm::SmallVector CachedParameterTypes; + llvm::Function *CachedFunc = nullptr; + +public: + BuiltinCallMutator::ValueTypePair getCallValue(llvm::CallInst *CI, + unsigned ArgNo); + + llvm::Type *getCallValueType(llvm::CallInst *CI, unsigned ArgNo) { + return getCallValue(CI, ArgNo).second; + } }; } // namespace SPIRV diff --git a/llvm-spirv/lib/SPIRV/SPIRVInternal.h b/llvm-spirv/lib/SPIRV/SPIRVInternal.h index bd41494983659..a97c8745ddade 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVInternal.h +++ b/llvm-spirv/lib/SPIRV/SPIRVInternal.h @@ -236,6 +236,7 @@ inline void SPIRVMap::init() { add(Attribute::NoAlias, FunctionParameterAttributeNoAlias); add(Attribute::NoCapture, FunctionParameterAttributeNoCapture); add(Attribute::ReadOnly, FunctionParameterAttributeNoWrite); + add(Attribute::ReadNone, FunctionParameterAttributeNoReadWrite); } typedef SPIRVMap SPIRSPIRVFuncParamAttrMap; @@ -243,8 +244,6 @@ typedef SPIRVMap template <> inline void SPIRVMap::init() { - add(Attribute::ReadNone, FunctionControlPureMask); - add(Attribute::ReadOnly, FunctionControlConstMask); add(Attribute::AlwaysInline, FunctionControlInlineMask); add(Attribute::NoInline, FunctionControlDontInlineMask); add(Attribute::OptimizeNone, internal::FunctionControlOptNoneINTELMask); @@ -613,12 +612,9 @@ Scope getArgAsScope(CallInst *CI, unsigned I); /// \param I argument index. Decoration getArgAsDecoration(CallInst *CI, unsigned I); -/// Check if a type is SPIRV sampler type. -bool isSPIRVSamplerType(llvm::Type *Ty); - -/// Check if a type is OCL image type (if pointed to). +/// Check if a type is OCL image type. /// \return type name without "opencl." prefix. -bool isOCLImageStructType(llvm::Type *Ty, StringRef *Name = nullptr); +bool isOCLImageType(llvm::Type *Ty, StringRef *Name = nullptr); /// \param BaseTyName is the type name as in spirv.BaseTyName.Postfixes /// \param Postfix contains postfixes extracted from the SPIR-V image @@ -862,12 +858,6 @@ std::string getSPIRVTypeName(StringRef BaseTyName, StringRef Postfixes = ""); /// Checks if given type name is either ConstantSampler or ConsantPipeStorage. bool isSPIRVConstantName(StringRef TyName); -/// Get SPIR-V type by changing the type name from spirv.OldName.Postfixes -/// to spirv.NewName.Postfixes. -Type *getSPIRVStructTypeByChangeBaseTypeName(Module *M, Type *T, - StringRef OldName, - StringRef NewName); - /// Get the postfixes of SPIR-V image type name as in spirv.Image.postfixes. std::string getSPIRVImageTypePostfixes(StringRef SampledType, SPIRVTypeImageDescriptor Desc, @@ -877,10 +867,6 @@ std::string getSPIRVImageTypePostfixes(StringRef SampledType, /// friendly LLVM IR. std::string getSPIRVImageSampledTypeName(SPIRVType *Ty); -/// Translates OpenCL image type names to SPIR-V. -/// E.g. %opencl.image1d_rw_t -> %spirv.Image._void_0_0_0_0_0_0_2 -Type *adaptSPIRVImageType(Module *M, Type *PointeeType); - /// Get LLVM type for sampled type of SPIR-V image type by postfix. Type *getLLVMTypeForSPIRVImageSampledTypePostfix(StringRef Postfix, LLVMContext &Ctx); @@ -889,6 +875,9 @@ Type *getLLVMTypeForSPIRVImageSampledTypePostfix(StringRef Postfix, /// E.g. opencl.image2d_ro_t.3 -> image2d_t std::string getImageBaseTypeName(StringRef Name); +/// Extract the image type descriptor from the given image type. +SPIRVTypeImageDescriptor getImageDescriptor(Type *Ty); + /// Map OpenCL opaque type name to SPIR-V type name. std::string mapOCLTypeNameToSPIRV(StringRef Name, StringRef Acc = ""); @@ -942,18 +931,16 @@ bool containsUnsignedAtomicType(StringRef Name); std::string mangleBuiltin(StringRef UniqName, ArrayRef ArgTypes, BuiltinFuncMangleInfo *BtnInfo); -/// Extract the pointee types of arguments from a mangled function name. If the -/// corresponding type is not a pointer to a struct type, its value will be a -/// nullptr instead. -void getParameterTypes( +/// Extract the true pointer types, expressed as a TypedPointerType, of +/// arguments from a mangled function name. If the corresponding type is not a +/// pointer type, its value will be the argument's actual type instead. Returns +/// true if the function name was successfully demangled. +bool getParameterTypes( Function *F, SmallVectorImpl &ArgTys, std::function StructNameMapFn = nullptr); -inline void getParameterTypes(CallInst *CI, SmallVectorImpl &ArgTys) { +inline bool getParameterTypes(CallInst *CI, SmallVectorImpl &ArgTys) { return getParameterTypes(CI->getCalledFunction(), ArgTys); } -void getParameterTypes( - Function *F, SmallVectorImpl &ArgTys, - std::function StructNameMapFn = nullptr); /// Mangle a function from OpenCL extended instruction set in SPIR-V friendly IR /// manner @@ -1014,6 +1001,10 @@ bool hasLoopMetadata(const Module *M); // If so, return it's extended opcode in ExtOp. bool isSPIRVOCLExtInst(const CallInst *CI, OCLExtOpKind *ExtOp); +/// Returns true if a function name corresponds to an OpenCL builtin that is not +/// expected to have name mangling. +bool isNonMangledOCLBuiltin(StringRef Name); + // check LLVM Intrinsics type(s) for validity bool checkTypeForSPIRVExtendedInstLowering(IntrinsicInst *II, SPIRVModule *BM); diff --git a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp index a0ff677dfde94..b31af9f60c00b 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp @@ -4378,7 +4378,9 @@ Instruction *SPIRVToLLVM::transOCLBuiltinFromExtInst(SPIRVExtInst *BC, if (isFuncNoUnwind()) F->addFnAttr(Attribute::NoUnwind); if (isFuncReadNone(UnmangledName)) - F->addFnAttr(Attribute::ReadNone); + for (llvm::Argument &Arg : F->args()) + if (Arg.getType()->isPointerTy()) + Arg.addAttr(Attribute::ReadNone); } auto Args = transValue(BC->getArgValues(), F, BB); SPIRVDBG(dbgs() << "[transOCLBuiltinFromExtInst] Function: " << *F diff --git a/llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp b/llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp index ac8eac6fd5cd0..c677ef9fb17be 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp @@ -256,33 +256,14 @@ void SPIRVToOCLBase::visitCastInst(CastInst &Cast) { void SPIRVToOCLBase::visitCallSPIRVImageQuerySize(CallInst *CI) { // Get image type - SmallVector ParamTys; - getParameterTypes(CI, ParamTys); - StructType *ImgTy = cast(ParamTys[0]); - assert(ImgTy && ImgTy->isOpaque() && - "image type must be an opaque structure"); - StringRef ImgTyName = ImgTy->getName(); - assert(ImgTyName.startswith("opencl.image") && "not an OCL image type"); - - unsigned ImgDim = 0; - bool ImgArray = false; - - if (ImgTyName.startswith("opencl.image1d")) { - ImgDim = 1; - } else if (ImgTyName.startswith("opencl.image2d")) { - ImgDim = 2; - } else if (ImgTyName.startswith("opencl.image3d")) { - ImgDim = 3; - } - assert(ImgDim != 0 && "unexpected image dimensionality"); - - if (ImgTyName.count("_array_") != 0) { - ImgArray = true; - } + Type *ImgTy = getCallValueType(CI, 0); + auto Desc = getImageDescriptor(ImgTy); + unsigned ImgDim = getImageDimension(Desc.Dim); + bool ImgArray = Desc.Arrayed; AttributeList Attributes = CI->getCalledFunction()->getAttributes(); BuiltinFuncMangleInfo Mangle; - Mangle.getTypeMangleInfo(0).PointerTy = TypedPointerType::get(ImgTy, 0); + Mangle.getTypeMangleInfo(0).PointerTy = ImgTy; Type *Int32Ty = Type::getInt32Ty(*Ctx); Instruction *GetImageSize = nullptr; @@ -590,7 +571,8 @@ void SPIRVToOCLBase::visitCallSPIRVPipeBuiltin(CallInst *CI, Op OC) { if (T != NewTy) { P = Builder.CreatePointerBitCastOrAddrSpaceCast(P, NewTy); } - return std::pair(P, Builder.getInt8Ty()); + return std::make_pair( + P, TypedPointerType::get(Builder.getInt8Ty(), SPIRAS_Generic)); }); } } @@ -758,31 +740,25 @@ void SPIRVToOCLBase::visitCallSPIRVImageSampleExplicitLodBuiltIn(CallInst *CI, T = VT->getElementType(); auto Mutator = mutateCallImageOperands(CI, kOCLBuiltinName::SampledReadImage, T, 2); + + CallInst *CallSampledImg = cast(CI->getArgOperand(0)); + auto Img = getCallValue(CallSampledImg, 0); + auto Sampler = getCallValue(CallSampledImg, 1); bool IsDepthImage = false; - Value *Sampler = nullptr; - Type *SamplerTy = nullptr; Mutator.mapArg(0, [&](Value *SampledImg) { - CallInst *CallSampledImg = cast(SampledImg); - SmallVector SampledArgTys; - getParameterTypes(CallSampledImg, SampledArgTys); - Type *ImgTy = SampledArgTys[0]; - SamplerTy = SampledArgTys[1]; - StringRef ImageTypeName; - if (isOCLImageStructType(ImgTy, &ImageTypeName)) + if (isOCLImageType(Img.second, &ImageTypeName)) IsDepthImage = ImageTypeName.contains("_depth_"); - auto Img = CallSampledImg->getArgOperand(0); - Sampler = CallSampledImg->getArgOperand(1); if (CallSampledImg->hasOneUse()) { CallSampledImg->replaceAllUsesWith( UndefValue::get(CallSampledImg->getType())); CallSampledImg->dropAllReferences(); CallSampledImg->eraseFromParent(); } - return std::make_pair(Img, ImgTy); + return Img; }); - Mutator.insertArg(1, {Sampler, SamplerTy}); + Mutator.insertArg(1, Sampler); if (IsDepthImage) Mutator.changeReturnType(T, [&](IRBuilder<> &Builder, CallInst *NewCI) { return Builder.CreateInsertElement( @@ -878,14 +854,12 @@ void SPIRVToOCLBase::visitCallSPIRVAvcINTELEvaluateBuiltIn(CallInst *CI, mutateCallInst(CI, OCLSPIRVSubgroupAVCIntelBuiltinMap::rmap(OC)); if (NumImages) { CallInst *SrcImage = cast(Mutator.getArg(0)); - SmallVector SrcImageTys; - getParameterTypes(SrcImage, SrcImageTys); if (NumImages == 1) { // Multi reference opcode - remove src image OpVmeImageINTEL opcode // and replace it with corresponding OpImage and OpSampler arguments size_t SamplerPos = Mutator.arg_size() - 1; - Mutator.replaceArg(0, {SrcImage->getOperand(0), SrcImageTys[0]}); - Mutator.insertArg(SamplerPos, {SrcImage->getOperand(1), SrcImageTys[1]}); + Mutator.replaceArg(0, getCallValue(SrcImage, 0)); + Mutator.insertArg(SamplerPos, getCallValue(SrcImage, 1)); } else { CallInst *FwdRefImage = cast(Mutator.getArg(1)); CallInst *BwdRefImage = @@ -895,17 +869,15 @@ void SPIRVToOCLBase::visitCallSPIRVAvcINTELEvaluateBuiltIn(CallInst *CI, // opcodes and OpSampler Mutator.removeArgs(0, NumImages); // insert source OpImage and OpSampler - Mutator.insertArg(0, {SrcImage->getOperand(0), SrcImageTys[0]}); - Mutator.insertArg(1, {SrcImage->getOperand(1), SrcImageTys[1]}); + Mutator.insertArg(0, getCallValue(SrcImage, 0)); + Mutator.insertArg(1, getCallValue(SrcImage, 1)); // insert reference OpImage - getParameterTypes(FwdRefImage, SrcImageTys); - Mutator.insertArg(1, {FwdRefImage->getOperand(0), SrcImageTys[0]}); + Mutator.insertArg(1, getCallValue(FwdRefImage, 0)); EraseVmeImageCall(SrcImage); EraseVmeImageCall(FwdRefImage); if (BwdRefImage) { // Dual reference opcode - insert second reference OpImage argument - getParameterTypes(BwdRefImage, SrcImageTys); - Mutator.insertArg(2, {BwdRefImage->getOperand(0), SrcImageTys[0]}); + Mutator.insertArg(2, getCallValue(BwdRefImage, 0)); EraseVmeImageCall(BwdRefImage); } } @@ -1129,11 +1101,6 @@ std::string SPIRVToOCLBase::translateOpaqueType(StringRef STName) { return OCLOpaqueName; } -void SPIRVToOCLBase::getParameterTypes(CallInst *CI, - SmallVectorImpl &Tys) { - ::getParameterTypes(CI->getCalledFunction(), Tys, translateOpaqueType); -} - void addSPIRVBIsLoweringPass(ModulePassManager &PassMgr, SPIRV::BIsRepresentation BIsRep) { switch (BIsRep) { diff --git a/llvm-spirv/lib/SPIRV/SPIRVToOCL.h b/llvm-spirv/lib/SPIRV/SPIRVToOCL.h index 204cd72e7757e..1b92fc96bd140 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVToOCL.h +++ b/llvm-spirv/lib/SPIRV/SPIRVToOCL.h @@ -280,8 +280,6 @@ class SPIRVToOCLBase : public InstVisitor, static std::string getOCLPipeOpaqueType(SmallVector &Postfixes); - void getParameterTypes(CallInst *CI, SmallVectorImpl &Tys); - static std::string translateOpaqueType(StringRef STName); /// Mutate the call instruction based on (optional) image operands at position diff --git a/llvm-spirv/lib/SPIRV/SPIRVToOCL20.cpp b/llvm-spirv/lib/SPIRV/SPIRVToOCL20.cpp index bce8ed588d079..ee645731d69dc 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVToOCL20.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVToOCL20.cpp @@ -174,17 +174,17 @@ CallInst *SPIRVToOCL20Base::mutateCommonAtomicArguments(CallInst *CI, Op OC) { auto OrderIdx = Ptr + 2; auto Mutator = mutateCallInst(CI, Name); - Mutator.mapArgs([=](Value *PtrArg, Type *PtrElemTy) { - Type *PtrArgTy = PtrArg->getType(); - if (PtrArgTy->isPointerTy()) { - if (PtrArgTy->getPointerAddressSpace() != SPIRAS_Generic) { - Type *FixedPtr = PointerType::getWithSamePointeeType( - cast(PtrArgTy), SPIRAS_Generic); - PtrArg = CastInst::CreatePointerBitCastOrAddrSpaceCast( - PtrArg, FixedPtr, PtrArg->getName() + ".as", CI); + Mutator.mapArgs([=](IRBuilder<> &Builder, Value *PtrArg, Type *PtrArgTy) { + if (auto *TypedPtrTy = dyn_cast(PtrArgTy)) { + if (TypedPtrTy->getAddressSpace() != SPIRAS_Generic) { + Type *ElementTy = TypedPtrTy->getElementType(); + Type *FixedPtr = PointerType::get(ElementTy, SPIRAS_Generic); + PtrArg = Builder.CreateAddrSpaceCast(PtrArg, FixedPtr, + PtrArg->getName() + ".as"); + PtrArgTy = TypedPointerType::get(ElementTy, SPIRAS_Generic); } } - return std::make_pair(PtrArg, PtrElemTy); + return std::make_pair(PtrArg, PtrArgTy); }); Mutator.mapArg(ScopeIdx, [=](Value *Arg) { return SPIRV::transSPIRVMemoryScopeIntoOCLMemoryScope(Arg, CI); @@ -224,7 +224,7 @@ void SPIRVToOCL20Base::visitCallSPIRVAtomicCmpExchg(CallInst *CI) { cast(PExpected->getType()), AddrSpc); Value *V = Builder.CreateAddrSpaceCast( PExpected, PtrTyAS, PExpected->getName() + ".as"); - return std::make_pair(V, MemTy); + return std::make_pair(V, TypedPointerType::get(MemTy, AddrSpc)); }) .moveArg(4, 2) .changeReturnType(Type::getInt1Ty(*Ctx), [=](IRBuilder<> &Builder, @@ -264,7 +264,8 @@ void SPIRVToOCL20Base::visitCallSPIRVEnqueueKernel(CallInst *CI, Op OC) { Mutator.mapArg(6, [=](IRBuilder<> &Builder, Value *Invoke) { Value *Replace = CastInst::CreatePointerBitCastOrAddrSpaceCast( Invoke, Builder.getInt8PtrTy(SPIRAS_Generic), "", CI); - return std::pair(Replace, Builder.getInt8Ty()); + return std::make_pair( + Replace, TypedPointerType::get(Builder.getInt8Ty(), SPIRAS_Generic)); }); if (!HasVaargs) { diff --git a/llvm-spirv/lib/SPIRV/SPIRVTypeScavenger.cpp b/llvm-spirv/lib/SPIRV/SPIRVTypeScavenger.cpp index ab16374b027d3..c61ecdc22d941 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVTypeScavenger.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVTypeScavenger.cpp @@ -229,13 +229,13 @@ void SPIRVTypeScavenger::deduceFunctionType(Function &F) { // If the function is a mangled name, try to recover types from the Itanium // name mangling. if (F.getName().startswith("_Z")) { - SmallVector ParameterTypes; - getParameterTypes(&F, ParameterTypes); + SmallVector ParamTypes; + getParameterTypes(&F, ParamTypes); for (Argument *Arg : PointerArgs) { - if (auto *Ty = ParameterTypes[Arg->getArgNo()]) { - DeducedTypes[Arg] = Ty; + if (auto *Ty = dyn_cast(ParamTypes[Arg->getArgNo()])) { + DeducedTypes[Arg] = Ty->getElementType(); LLVM_DEBUG(dbgs() << "Arg " << Arg->getArgNo() << " of " << F.getName() - << " has type " << *Ty << "\n"); + << " has type " << *Ty->getElementType() << "\n"); } } } diff --git a/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp b/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp index fd222fbdca2e3..fb5be8af701b7 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp @@ -250,19 +250,19 @@ void getFunctionTypeParameterTypes(llvm::FunctionType *FT, bool isVoidFuncTy(FunctionType *FT) { return FT->getReturnType()->isVoidTy(); } -bool isOCLImageStructType(llvm::Type *Ty, StringRef *Name) { - if (auto *ST = dyn_cast_or_null(Ty)) - if (ST->isOpaque()) { - auto FullName = ST->getName(); - if (FullName.find(kSPR2TypeName::ImagePrefix) == 0) { - if (Name) - *Name = FullName.drop_front(strlen(kSPR2TypeName::OCLPrefix)); - return true; +bool isOCLImageType(llvm::Type *Ty, StringRef *Name) { + if (auto *TPT = dyn_cast_or_null(Ty)) + if (auto *ST = dyn_cast_or_null(TPT->getElementType())) + if (ST->isOpaque()) { + auto FullName = ST->getName(); + if (FullName.find(kSPR2TypeName::ImagePrefix) == 0) { + if (Name) + *Name = FullName.drop_front(strlen(kSPR2TypeName::OCLPrefix)); + return true; + } } - } return false; } - /// \param BaseTyName is the type Name as in spirv.BaseTyName.Postfixes /// \param Postfix contains postfixes extracted from the SPIR-V image /// type Name as spirv.BaseTyName.Postfixes. @@ -722,15 +722,6 @@ static StringRef stringify(const itanium_demangle::NameType *Node) { return StringRef(Str.begin(), Str.size()); } -void getParameterTypes(Function *F, SmallVectorImpl &ArgTys, - std::function NameMapFn) { - SmallVector PIPs; - getParameterTypes(F, PIPs, std::move(NameMapFn)); - for (auto *Pair : PIPs) { - ArgTys.push_back(Pair ? Pair->getElementType() : nullptr); - } -} - template static TypedPointerType * parseNode(Module *M, const llvm::itanium_demangle::Node *ParamType, @@ -838,14 +829,14 @@ parseNode(Module *M, const llvm::itanium_demangle::Node *ParamType, return PointeeTy ? TypedPointerType::get(PointeeTy, AS) : nullptr; } -void getParameterTypes(Function *F, SmallVectorImpl &ArgTys, +bool getParameterTypes(Function *F, SmallVectorImpl &ArgTys, std::function NameMapFn) { using namespace llvm::itanium_demangle; // If there's no mangled name, we can't do anything. Also, if there's no // parameters, do nothing. StringRef Name = F->getName(); if (!Name.startswith("_Z") || F->arg_empty()) - return; + return Name.startswith("_Z"); Module *M = F->getParent(); auto GetStructType = [&](StringRef Name) { @@ -860,7 +851,7 @@ void getParameterTypes(Function *F, SmallVectorImpl &ArgTys, bool HasSret = false; for (Argument &Arg : F->args()) { if (!Arg.getType()->isPointerTy()) - ArgTys.push_back(nullptr); + ArgTys.push_back(Arg.getType()); else if (Type *Ty = Arg.getParamStructRetType()) { assert(!HasSret && &Arg == F->getArg(0) && "sret parameter should only appear on the first argument"); @@ -871,7 +862,7 @@ void getParameterTypes(Function *F, SmallVectorImpl &ArgTys, else ArgTys.push_back(TypedPointerType::get(Ty, 0)); } else { - ArgTys.push_back(nullptr); + ArgTys.push_back(Arg.getType()); } } @@ -894,7 +885,7 @@ void getParameterTypes(Function *F, SmallVectorImpl &ArgTys, // name encoding, bail out. auto *RootNode = dyn_cast_or_null(Demangler.parse()); if (!RootNode) - return; + return false; // Get the parameter list. If the function is a vararg function, drop the last // parameter. @@ -912,7 +903,7 @@ void getParameterTypes(Function *F, SmallVectorImpl &ArgTys, } else { LLVM_DEBUG(dbgs() << "[getParameterTypes] function " << MangledName << " was expected to have a varargs parameter\n"); - return; + return false; } } @@ -923,21 +914,26 @@ void getParameterTypes(Function *F, SmallVectorImpl &ArgTys, << " appears to have " << Params.size() << " arguments but has " << (ArgTys.end() - ArgIter) << "\n"); - return; + return false; } + // Overwrite the types of pointer-typed arguments with information from + // demangling. + bool DemangledSuccessfully = true; for (auto *ParamType : Params) { - Type *ArgTy = F->getArg(ArgIter - ArgTys.begin())->getType(); - TypedPointerType *PointeeTy = parseNode(M, ParamType, GetStructType); - if (ArgTy->isPointerTy() && PointeeTy == nullptr) { - PointeeTy = TypedPointerType::get(Type::getInt8Ty(ArgTy->getContext()), - ArgTy->getPointerAddressSpace()); - LLVM_DEBUG(dbgs() << "Failed to recover type of argument " - << (ArgIter - ArgTys.begin()) << " of function " - << F->getName() << "\n"); - } - *ArgIter++ = PointeeTy; + Type *ArgTy = *ArgIter; + Type *DemangledTy = parseNode(M, ParamType, GetStructType); + if (ArgTy->isPointerTy() && DemangledTy == nullptr) { + DemangledTy = TypedPointerType::get(Type::getInt8Ty(ArgTy->getContext()), + ArgTy->getPointerAddressSpace()); + LLVM_DEBUG(dbgs() << "Failed to recover type of argument " << *ArgTy + << " of function " << F->getName() << "\n"); + DemangledSuccessfully = false; + } else if (!DemangledTy) + DemangledTy = ArgTy; + *ArgIter++ = DemangledTy; } + return DemangledSuccessfully; } CallInst *mutateCallInst( @@ -1553,17 +1549,6 @@ bool isSPIRVConstantName(StringRef TyName) { return false; } -Type *getSPIRVStructTypeByChangeBaseTypeName(Module *M, Type *T, - StringRef OldName, - StringRef NewName) { - StringRef Postfixes; - if (isSPIRVStructType(T, OldName, &Postfixes)) - return getOrCreateOpaqueStructType(M, getSPIRVTypeName(NewName, Postfixes)); - LLVM_DEBUG(dbgs() << " Invalid SPIR-V type " << *T << '\n'); - llvm_unreachable("Invalid SPIR-V type"); - return nullptr; -} - std::string getSPIRVImageTypePostfixes(StringRef SampledType, SPIRVTypeImageDescriptor Desc, SPIRVAccessQualifierKind Acc) { @@ -1665,6 +1650,13 @@ std::string mapOCLTypeNameToSPIRV(StringRef Name, StringRef Acc) { return getSPIRVTypeName(BaseTy, OS.str()); } +SPIRVTypeImageDescriptor getImageDescriptor(Type *Ty) { + StringRef TyName; + [[maybe_unused]] bool IsImg = isOCLImageType(Ty, &TyName); + assert(IsImg && "Must be an image type"); + return map(getImageBaseTypeName(TyName)); +} + bool eraseIfNoUse(Function *F) { bool Changed = false; if (!F) @@ -1825,19 +1817,6 @@ StringRef getAccessQualifierFullName(StringRef TyName) { .Case(kAccessQualPostfix::ReadWrite, kAccessQualName::ReadWrite); } -/// Translates OpenCL image type names to SPIR-V. -Type *adaptSPIRVImageType(Module *M, Type *PointeeType) { - if (isOCLImageStructType(PointeeType)) { - auto ImageTypeName = PointeeType->getStructName(); - StringRef Acc = kAccessQualName::ReadOnly; - if (hasAccessQualifiedName(ImageTypeName)) - Acc = getAccessQualifierFullName(ImageTypeName); - return getOrCreateOpaqueStructType( - M, mapOCLTypeNameToSPIRV(ImageTypeName, Acc)); - } - return PointeeType; -} - llvm::PointerType *getOCLClkEventType(Module *M) { return getOrCreateOpaquePtrType(M, SPIR_TYPE_NAME_CLK_EVENT_T, SPIRAS_Private); @@ -2065,8 +2044,10 @@ bool lowerBuiltinVariableToCall(GlobalVariable *GV, Func = Function::Create(FT, GlobalValue::ExternalLinkage, MangledName, M); Func->setCallingConv(CallingConv::SPIR_FUNC); Func->addFnAttr(Attribute::NoUnwind); - Func->addFnAttr(Attribute::ReadNone); Func->addFnAttr(Attribute::WillReturn); + for (llvm::Argument &Arg : Func->args()) + if (Arg.getType()->isPointerTy()) + Arg.addAttr(Attribute::ReadNone); } // Collect instructions in these containers to remove them later. diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp index 8d3245ec8bffa..3dacdf8e4f238 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp @@ -162,7 +162,8 @@ static void translateSEVDecoration(Attribute Sev, SPIRVValue *Val) { } LLVMToSPIRVBase::LLVMToSPIRVBase(SPIRVModule *SMod) - : M(nullptr), Ctx(nullptr), BM(SMod), SrcLang(0), SrcLangVer(0) { + : BuiltinCallHelper(ManglingRules::None), M(nullptr), Ctx(nullptr), + BM(SMod), SrcLang(0), SrcLangVer(0) { DbgTran = std::make_unique(nullptr, SMod, this); } @@ -173,6 +174,7 @@ LLVMToSPIRVBase::~LLVMToSPIRVBase() { bool LLVMToSPIRVBase::runLLVMToSPIRV(Module &Mod) { M = &Mod; + initialize(Mod); CG = std::make_unique(Mod); Ctx = &M->getContext(); DbgTran->setModule(M); @@ -533,8 +535,10 @@ SPIRVType *LLVMToSPIRVBase::transPointerType(Type *ET, unsigned AddrSpc) { } if (STName.startswith(kSPR2TypeName::ImagePrefix)) { assert(AddrSpc == SPIRAS_Global); - Type *ImageTy = adaptSPIRVImageType(M, ST); - return SaveType(transPointerType(ImageTy, SPIRAS_Global)); + Type *ImageTy = + adjustImageType(TypedPointerType::get(ST, AddrSpc), + kSPIRVTypeName::Image, kSPIRVTypeName::Image); + return SaveType(transType(ImageTy)); } if (STName == kSPR2TypeName::Sampler) return SaveType(transSPIRVOpaqueType( @@ -702,19 +706,17 @@ SPIRVType *LLVMToSPIRVBase::transSPIRVOpaqueType(StringRef STName, return SaveType(BM->addImageType( SampledT, Desc, static_cast(Ops[6]))); } else if (TN == kSPIRVTypeName::SampledImg) { - return SaveType( - BM->addSampledImageType(static_cast(transPointerType( - getSPIRVStructTypeByChangeBaseTypeName( - M, ST, kSPIRVTypeName::SampledImg, kSPIRVTypeName::Image), - SPIRAS_Global)))); + return SaveType(BM->addSampledImageType(static_cast( + transType(adjustImageType(TypedPointerType::get(ST, SPIRAS_Global), + kSPIRVTypeName::SampledImg, + kSPIRVTypeName::Image))))); } else if (TN == kSPIRVTypeName::VmeImageINTEL) { // This type is the same as SampledImageType, but consumed by Subgroup AVC // Intel extension instructions. - return SaveType( - BM->addVmeImageINTELType(static_cast(transPointerType( - getSPIRVStructTypeByChangeBaseTypeName( - M, ST, kSPIRVTypeName::VmeImageINTEL, kSPIRVTypeName::Image), - SPIRAS_Global)))); + return SaveType(BM->addVmeImageINTELType(static_cast( + transType(adjustImageType(TypedPointerType::get(ST, SPIRAS_Global), + kSPIRVTypeName::VmeImageINTEL, + kSPIRVTypeName::Image))))); } else if (TN == kSPIRVTypeName::Sampler) return SaveType(BM->addSamplerType()); else if (TN == kSPIRVTypeName::DeviceEvent) @@ -739,22 +741,15 @@ SPIRVType *LLVMToSPIRVBase::transScavengedType(Value *V) { SPIRVType *RT = transType(F->getReturnType()); std::vector PT; for (Argument &Arg : F->args()) { - auto TypePair = - OCLTypeToSPIRVPtr->getAdaptedArgumentType(F, Arg.getArgNo()); - Type *Ty = TypePair.first; - Type *PointeeTy = TypePair.second; + Type *Ty = OCLTypeToSPIRVPtr->getAdaptedArgumentType(F, Arg.getArgNo()); if (!Ty) { Ty = Arg.getType(); if (Ty->isPointerTy()) - PointeeTy = - Scavenger->getArgumentPointerElementType(F, Arg.getArgNo()); + Ty = TypedPointerType::get( + Scavenger->getArgumentPointerElementType(F, Arg.getArgNo()), + Ty->getPointerAddressSpace()); } - SPIRVType *TransTy = nullptr; - if (Ty->isPointerTy()) - TransTy = transPointerType(PointeeTy, Ty->getPointerAddressSpace()); - else - TransTy = transType(Ty); - PT.push_back(TransTy); + PT.push_back(transType(Ty)); } return getSPIRVFunctionType(RT, PT); @@ -849,8 +844,10 @@ SPIRVFunction *LLVMToSPIRVBase::transFunctionDecl(Function *F) { BA->addAttr(FunctionParameterAttributeNoCapture); if (I->hasStructRetAttr()) BA->addAttr(FunctionParameterAttributeSret); - if (I->onlyReadsMemory()) + if (Attrs.hasParamAttr(ArgNo, Attribute::ReadOnly)) BA->addAttr(FunctionParameterAttributeNoWrite); + if (Attrs.hasParamAttr(ArgNo, Attribute::ReadNone)) + BA->addAttr(FunctionParameterAttributeNoReadWrite); if (Attrs.hasParamAttr(ArgNo, Attribute::ZExt)) BA->addAttr(FunctionParameterAttributeZext); if (Attrs.hasParamAttr(ArgNo, Attribute::SExt)) @@ -5057,15 +5054,13 @@ LLVMToSPIRVBase::transBuiltinToInstWithoutDecoration(Op OC, CallInst *CI, // for this call, because there is no support for type corresponding to // OpTypeSampledImage. So, in this case, we create the required type here. Value *Image = CI->getArgOperand(0); - SmallVector ParamTys; - getParameterTypes(CI, ParamTys); - Type *ImageTy = adaptSPIRVImageType(M, ParamTys[0]); - Type *SampledImgTy = getSPIRVStructTypeByChangeBaseTypeName( - M, ImageTy, kSPIRVTypeName::Image, kSPIRVTypeName::SampledImg); + Type *SampledImgTy = + adjustImageType(getCallValueType(CI, 0), kSPIRVTypeName::Image, + kSPIRVTypeName::SampledImg); Value *Sampler = CI->getArgOperand(1); - return BM->addSampledImageInst( - transPointerType(SampledImgTy, SPIRAS_Global), transValue(Image, BB), - transValue(Sampler, BB), BB); + return BM->addSampledImageInst(transType(SampledImgTy), + transValue(Image, BB), + transValue(Sampler, BB), BB); } case OpFixedSqrtINTEL: case OpFixedRecipINTEL: diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriter.h b/llvm-spirv/lib/SPIRV/SPIRVWriter.h index 0e5cd5ae6f8b7..3c2669a847e97 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVWriter.h +++ b/llvm-spirv/lib/SPIRV/SPIRVWriter.h @@ -46,6 +46,7 @@ #include "OCLTypeToSPIRV.h" #include "OCLUtil.h" #include "SPIRVBasicBlock.h" +#include "SPIRVBuiltinHelper.h" #include "SPIRVEntry.h" #include "SPIRVEnum.h" #include "SPIRVFunction.h" @@ -68,7 +69,7 @@ using namespace OCLUtil; namespace SPIRV { -class LLVMToSPIRVBase { +class LLVMToSPIRVBase : protected BuiltinCallHelper { public: LLVMToSPIRVBase(SPIRVModule *SMod); bool runLLVMToSPIRV(Module &Mod); diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVInstruction.h b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVInstruction.h index 2ffd4c3aa6526..6af437b9c99ea 100644 --- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVInstruction.h +++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVInstruction.h @@ -3561,6 +3561,64 @@ class SPIRVMaskedScatterINTELInst _SPIRV_OP(MaskedGather, true, 7) _SPIRV_OP(MaskedScatter, false, 5) #undef _SPIRV_OP + +template +class SPIRVTensorFloat32ConversionINTELInstBase : public SPIRVUnaryInst { +protected: + SPIRVCapVec getRequiredCapability() const override { + return getVec(internal::CapabilityTensorFloat32ConversionINTEL); + } + + llvm::Optional getRequiredExtension() const override { + return ExtensionID::SPV_INTEL_tensor_float32_conversion; + } + + void validate() const override { + SPIRVUnaryInst::validate(); + + SPIRVType *ResCompTy = this->getType(); + SPIRVWord ResCompCount = 1; + if (ResCompTy->isTypeVector()) { + ResCompCount = ResCompTy->getVectorComponentCount(); + ResCompTy = ResCompTy->getVectorComponentType(); + } + + // validate is a const method, whilst getOperand is non-const method + // because it may call a method of class Module that may modify LiteralMap + // of Module field. That modification is not impacting validate method for + // these instructions, so const_cast is safe here. + using SPVTF32ConvTy = SPIRVTensorFloat32ConversionINTELInstBase; + SPIRVValue *Input = const_cast(this)->getOperand(0); + + SPIRVType *InCompTy = Input->getType(); + SPIRVWord InCompCount = 1; + if (InCompTy->isTypeVector()) { + InCompCount = InCompTy->getVectorComponentCount(); + InCompTy = InCompTy->getVectorComponentType(); + } + + auto InstName = OpCodeNameMap::map(OC); + SPIRVErrorLog &SPVErrLog = this->getModule()->getErrorLog(); + + SPVErrLog.checkError( + ResCompTy->isTypeFloat(32), SPIRVEC_InvalidInstruction, + InstName + "\nResult value must be a scalar or vector of floating-point" + " 32-bit type\n"); + SPVErrLog.checkError(InCompTy->isTypeFloat(32), SPIRVEC_InvalidInstruction, + InstName + + "\nInput value must be a scalar or vector of " + "floating-point 32-bit type\n"); + SPVErrLog.checkError( + ResCompCount == InCompCount, SPIRVEC_InvalidInstruction, + InstName + "\nInput type must have the same number of components as " + "result type\n"); + } +}; + +#define _SPIRV_OP(x) \ + typedef SPIRVTensorFloat32ConversionINTELInstBase SPIRV##x; +_SPIRV_OP(ConvertFToTF32INTEL) +#undef _SPIRV_OP } // namespace SPIRV #endif // SPIRV_LIBSPIRV_SPIRVINSTRUCTION_H diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h index 4520a5a4602a9..d098ff0d8b244 100644 --- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h +++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h @@ -620,6 +620,8 @@ template <> inline void SPIRVMap::init() { "NonConstantAddrspacePrintfINTEL"); add(internal::CapabilityComplexFloatMulDivINTEL, "ComplexFloatMulDivINTEL"); add(internal::CapabilityMaskedGatherScatterINTEL, "MaskedGatherScatterINTEL"); + add(internal::CapabilityTensorFloat32ConversionINTEL, + "TensorFloat32ConversionINTEL"); } SPIRV_DEF_NAMEMAP(Capability, SPIRVCapabilityNameMap) diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVOpCodeEnumInternal.h b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVOpCodeEnumInternal.h index 0ed0d855d5e61..9d8765b5aee5e 100644 --- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVOpCodeEnumInternal.h +++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVOpCodeEnumInternal.h @@ -15,3 +15,4 @@ _SPIRV_OP_INTERNAL(ComplexFMulINTEL, internal::ComplexFMulINTEL) _SPIRV_OP_INTERNAL(ComplexFDivINTEL, internal::ComplexFDivINTEL) _SPIRV_OP_INTERNAL(MaskedGatherINTEL, internal::OpMaskedGatherINTEL) _SPIRV_OP_INTERNAL(MaskedScatterINTEL, internal::OpMaskedScatterINTEL) +_SPIRV_OP_INTERNAL(ConvertFToTF32INTEL, internal::ConvertFToTF32INTEL) diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/spirv_internal.hpp b/llvm-spirv/lib/SPIRV/libSPIRV/spirv_internal.hpp index a08f59c37b5d0..3220c6ebe5092 100644 --- a/llvm-spirv/lib/SPIRV/libSPIRV/spirv_internal.hpp +++ b/llvm-spirv/lib/SPIRV/libSPIRV/spirv_internal.hpp @@ -46,6 +46,7 @@ enum InternalOp { IOpJointMatrixWorkItemLengthINTEL = 6410, IOpComplexFMulINTEL = 6415, IOpComplexFDivINTEL = 6416, + IOpConvertFToTF32INTEL = 6426, IOpMaskedGatherINTEL = 6428, IOpMaskedScatterINTEL = 6429, IOpPrev = OpMax - 2, @@ -81,6 +82,7 @@ enum InternalCapability { ICapGlobalVariableDecorationsINTEL = 6146, ICapabilityNonConstantAddrspacePrintfINTEL = 6411, ICapabilityComplexFloatMulDivINTEL = 6414, + ICapabilityTensorFloat32ConversionINTEL = 6425, ICapabilityMaskedGatherScatterINTEL = 6427 }; @@ -133,6 +135,9 @@ _SPIRV_OP(Op, ComplexFDivINTEL) _SPIRV_OP(Capability, MaskedGatherScatterINTEL) _SPIRV_OP(Op, MaskedGatherINTEL) _SPIRV_OP(Op, MaskedScatterINTEL) + +_SPIRV_OP(Capability, TensorFloat32ConversionINTEL) +_SPIRV_OP(Op, ConvertFToTF32INTEL) #undef _SPIRV_OP constexpr Op OpForward = static_cast(IOpForward); diff --git a/llvm-spirv/test/extensions/INTEL/SPV_INTEL_tensor_float32_conversion/convert_tensor_float32.ll b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_tensor_float32_conversion/convert_tensor_float32.ll new file mode 100644 index 0000000000000..1f0270694178f --- /dev/null +++ b/llvm-spirv/test/extensions/INTEL/SPV_INTEL_tensor_float32_conversion/convert_tensor_float32.ll @@ -0,0 +1,50 @@ +; RUN: llvm-as %s -o %t.bc +; RUN: llvm-spirv %t.bc -o %t.spv --spirv-ext=+SPV_INTEL_tensor_float32_conversion +; RUN: llvm-spirv %t.spv -o %t.spt --to-text +; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV +; RUN: llvm-spirv %t.spv -o %t.rev.bc -r -emit-opaque-pointers --spirv-target-env=SPV-IR +; RUN: llvm-dis %t.rev.bc -o %t.rev.ll +; RUN: FileCheck < %t.rev.ll %s --check-prefix=CHECK-LLVM + +; RUN: not llvm-spirv %t.bc 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR +; CHECK-ERROR: RequiresExtension: Feature requires the following SPIR-V extension: +; CHECK-ERROR-NEXT: SPV_INTEL_tensor_float32_conversion + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown" + +; CHECK-SPIRV: Capability TensorFloat32ConversionINTEL +; CHECK-SPIRV: Extension "SPV_INTEL_tensor_float32_conversion" +; CHECK-SPIRV: TypeFloat [[#FP32Ty:]] 32 +; CHECK-SPIRV: TypeVector [[#FP32v8Ty:]] [[#FP32Ty]] 8 +; CHECK-SPIRV: Constant [[#FP32Ty]] [[#CONST:]] 1065353216 + +; CHECK-SPIRV: FunctionParameter [[#FP32Ty]] [[FP32ValId:.*]] +; CHECK-SPIRV: FunctionParameter [[#FP32v8Ty]] [[FP32v8ValId:.*]] + +; CHECK-SPIRV: ConvertFToTF32INTEL [[#FP32Ty]] [[#]] [[FP32ValId]] +; CHECK-SPIRV: ConvertFToTF32INTEL [[#FP32v8Ty]] [[#]] [[FP32v8ValId]] +; CHECK-SPIRV: ConvertFToTF32INTEL [[#FP32Ty]] [[#]] [[#CONST]] + +; CHECK-LLVM: call spir_func float @_Z27__spirv_ConvertFToTF32INTELf(float +; CHECK-LLVM: call spir_func <8 x float> @_Z27__spirv_ConvertFToTF32INTELDv8_f(<8 x float> +; CHECK-LLVM: call spir_func float @_Z27__spirv_ConvertFToTF32INTELf(float 1.000000e+00) + +define spir_func void @_Z2opffv8(float %a, <8 x float> %in) { + %1 = tail call spir_func float @_Z27__spirv_ConvertFToTF32INTELf(float %a) + %2 = tail call spir_func <8 x float> @_Z27__spirv_ConvertFToTF32INTELDv8_f(<8 x float> %in) + %3 = tail call spir_func float @_Z27__spirv_ConvertFToTF32INTELf(float 1.000000e+00) + ret void +} + +declare spir_func float @_Z27__spirv_ConvertFToTF32INTELf(float) + +declare spir_func <8 x float> @_Z27__spirv_ConvertFToTF32INTELDv8_f(<8 x float>) + +!opencl.spir.version = !{!0} +!spirv.Source = !{!1} +!llvm.ident = !{!2} + +!0 = !{i32 1, i32 2} +!1 = !{i32 4, i32 100000} +!2 = !{!"clang version 16.0.0"} diff --git a/llvm-spirv/test/transcoding/OpGenericPtrMemSemantics.ll b/llvm-spirv/test/transcoding/OpGenericPtrMemSemantics.ll index 8b7545e7aedff..9da8e260d448d 100644 --- a/llvm-spirv/test/transcoding/OpGenericPtrMemSemantics.ll +++ b/llvm-spirv/test/transcoding/OpGenericPtrMemSemantics.ll @@ -24,7 +24,7 @@ target triple = "spir-unknown-unknown" @gint = addrspace(1) global i32 1, align 4 -; Function Attrs: nounwind readnone +; Function Attrs: nounwind define spir_func i32 @isFenceValid(i32 %fence) #0 { entry: %switch = icmp ult i32 %fence, 4 @@ -66,7 +66,7 @@ entry: declare spir_func i32 @_Z13get_global_idj(i32) #2 -attributes #0 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #3 = { nounwind } diff --git a/llvm-spirv/test/transcoding/OpImageSampleExplicitLod.ll b/llvm-spirv/test/transcoding/OpImageSampleExplicitLod.ll index c214ff9dceb5e..b41b7d07c455e 100644 --- a/llvm-spirv/test/transcoding/OpImageSampleExplicitLod.ll +++ b/llvm-spirv/test/transcoding/OpImageSampleExplicitLod.ll @@ -43,14 +43,14 @@ entry: ; Function Attrs: nounwind declare spir_func float @_Z11read_imagef20ocl_image2d_depth_ro11ocl_samplerDv2_i(%opencl.image2d_depth_ro_t addrspace(1)*, i32, <2 x i32>) #0 -; Function Attrs: nounwind readnone +; Function Attrs: nounwind declare spir_func i32 @_Z13get_global_idj(i32) #1 ; Function Attrs: nounwind declare spir_func <2 x i32> @_Z13get_image_dim20ocl_image2d_depth_ro(%opencl.image2d_depth_ro_t addrspace(1)*) #0 attributes #0 = { nounwind } -attributes #1 = { nounwind readnone } +attributes #1 = { nounwind } !opencl.enable.FP_CONTRACT = !{} !opencl.spir.version = !{!6} diff --git a/llvm-spirv/test/transcoding/OpSwitch32.ll b/llvm-spirv/test/transcoding/OpSwitch32.ll index 1dd9337fd996f..718dbe29fe25e 100644 --- a/llvm-spirv/test/transcoding/OpSwitch32.ll +++ b/llvm-spirv/test/transcoding/OpSwitch32.ll @@ -75,12 +75,12 @@ sw.epilog: ; preds = %entry, %sw.bb1, %sw ret void } -; Function Attrs: nounwind readnone +; Function Attrs: nounwind declare spir_func i64 @_Z13get_global_idj(i32) #1 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind readnone } +attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind } !opencl.enable.FP_CONTRACT = !{} !opencl.spir.version = !{!6} diff --git a/llvm-spirv/test/transcoding/OpSwitch64.ll b/llvm-spirv/test/transcoding/OpSwitch64.ll index 54d396627d0eb..91dfc30536630 100644 --- a/llvm-spirv/test/transcoding/OpSwitch64.ll +++ b/llvm-spirv/test/transcoding/OpSwitch64.ll @@ -86,12 +86,12 @@ sw.epilog: ; preds = %entry, %sw.bb3, %sw ret void } -; Function Attrs: nounwind readnone +; Function Attrs: nounwind declare spir_func i64 @_Z13get_global_idj(i32) #1 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind readnone } +attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind } !opencl.enable.FP_CONTRACT = !{} !opencl.spir.version = !{!6} diff --git a/llvm-spirv/test/transcoding/bitcast.ll b/llvm-spirv/test/transcoding/bitcast.ll index 1ab279f7b539e..0e6aa95ca295f 100644 --- a/llvm-spirv/test/transcoding/bitcast.ll +++ b/llvm-spirv/test/transcoding/bitcast.ll @@ -26,12 +26,12 @@ entry: ret void } -; Function Attrs: nounwind readnone +; Function Attrs: nounwind declare spir_func i64 @_Z13get_global_idj(i32) #1 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind readnone } +attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind } !opencl.enable.FP_CONTRACT = !{} !opencl.spir.version = !{!6} diff --git a/llvm-spirv/test/transcoding/builtin_calls.ll b/llvm-spirv/test/transcoding/builtin_calls.ll index 8c9d7b6aa9c2d..a5d6c1eae77ea 100644 --- a/llvm-spirv/test/transcoding/builtin_calls.ll +++ b/llvm-spirv/test/transcoding/builtin_calls.ll @@ -16,7 +16,7 @@ target triple = "spir-unknown-unknown" ; CHECK-SPIRV: Variable {{[0-9]+}} [[Id:[0-9]+]] ; CHECK-SPIRV: Variable {{[0-9]+}} [[Id:[0-9]+]] -; Function Attrs: nounwind readnone +; Function Attrs: nounwind define spir_kernel void @f() #0 !kernel_arg_addr_space !0 !kernel_arg_access_qual !0 !kernel_arg_type !0 !kernel_arg_base_type !0 !kernel_arg_type_qual !0 { entry: %0 = call spir_func i32 @_Z29__spirv_BuiltInGlobalLinearIdv() diff --git a/llvm-spirv/test/transcoding/builtin_function_readnone_attr.ll b/llvm-spirv/test/transcoding/builtin_function_readnone_attr.ll index 07881972b4e28..c99098fa33908 100644 --- a/llvm-spirv/test/transcoding/builtin_function_readnone_attr.ll +++ b/llvm-spirv/test/transcoding/builtin_function_readnone_attr.ll @@ -1,13 +1,23 @@ ; RUN: llvm-as %s -o %t.bc ; RUN: llvm-spirv %t.bc -o %t.spv +; RUN: llvm-spirv %t.spv -to-text -o %t.spt +; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV ; RUN: llvm-spirv -r -emit-opaque-pointers %t.spv -o %t.bc ; RUN: llvm-dis < %t.bc | FileCheck %s --check-prefix=CHECK-LLVM +; CHECK-SPIRV: Name [[#A:]] "a" +; CHECK-SPIRV: Name [[#B:]] "b" +; CHECK-SPIRV: Decorate [[#A]] FuncParamAttr 5 +; CHECK-SPIRV: Decorate [[#A]] FuncParamAttr 6 +; CHECK-SPIRV: Decorate [[#B]] FuncParamAttr 7 + +; CHECK-LLVM: {{.*}}void @test_builtin_readnone(ptr nocapture readonly %{{.*}}, ptr nocapture readnone %{{.*}}) + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "spir-unknown-unknown" ; Function Attrs: convergent nofree norecurse nounwind uwtable -define dso_local spir_kernel void @test_builtin_readnone(double* nocapture readonly %a, double* nocapture %b) local_unnamed_addr #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { +define dso_local spir_kernel void @test_builtin_readnone(double* nocapture readonly %a, double* nocapture readnone %b) local_unnamed_addr #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { entry: %0 = load double, double* %a, align 8, !tbaa !7 %call = tail call double @_Z3expd(double %0) #2 @@ -18,18 +28,15 @@ entry: ret void } -; Function Attrs: convergent nounwind readnone -; CHECK-LLVM: declare{{.*}}@_Z3expd{{.*}}#[[#Attrs:]] +; Function Attrs: convergent nounwind declare dso_local double @_Z3expd(double) local_unnamed_addr #1 -; Function Attrs: convergent nounwind readnone -; CHECK-LLVM: declare{{.*}}@_Z3cosd{{.*}}#[[#Attrs]] +; Function Attrs: convergent nounwind declare dso_local double @_Z3cosd(double) local_unnamed_addr #1 attributes #0 = { convergent nofree norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } -; CHECK-LLVM: attributes #[[#Attrs]] {{.*}} readnone -attributes #1 = { convergent nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { convergent nounwind readnone } +attributes #1 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { convergent nounwind } !llvm.module.flags = !{!0} !opencl.ocl.version = !{!1} diff --git a/llvm-spirv/test/transcoding/builtin_vars_arithmetics.ll b/llvm-spirv/test/transcoding/builtin_vars_arithmetics.ll index b36298489f62b..3b50852f54390 100644 --- a/llvm-spirv/test/transcoding/builtin_vars_arithmetics.ll +++ b/llvm-spirv/test/transcoding/builtin_vars_arithmetics.ll @@ -123,7 +123,7 @@ entry: attributes #0 = { norecurse "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="test.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } -; CHECK-LLVM-OCL: attributes #1 = { nounwind readnone willreturn } +; CHECK-LLVM-OCL: attributes #1 = { nounwind willreturn } !llvm.module.flags = !{!0} !opencl.spir.version = !{!1} diff --git a/llvm-spirv/test/transcoding/isequal.ll b/llvm-spirv/test/transcoding/isequal.ll index a49f2fe942e55..f1e363ddc7d12 100644 --- a/llvm-spirv/test/transcoding/isequal.ll +++ b/llvm-spirv/test/transcoding/isequal.ll @@ -30,15 +30,15 @@ entry: ret void } -; Function Attrs: nounwind readnone +; Function Attrs: nounwind declare spir_func i64 @_Z13get_global_idj(i32) #1 -; Function Attrs: nounwind readnone +; Function Attrs: nounwind declare spir_func <8 x i32> @_Z7isequalDv8_fDv8_f(<8 x float>, <8 x float>) #1 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind readnone } +attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind } !opencl.enable.FP_CONTRACT = !{} !opencl.spir.version = !{!6} diff --git a/llvm-spirv/test/transcoding/unreachable.ll b/llvm-spirv/test/transcoding/unreachable.ll index 6193648f78928..078681fbd6a05 100644 --- a/llvm-spirv/test/transcoding/unreachable.ll +++ b/llvm-spirv/test/transcoding/unreachable.ll @@ -30,11 +30,11 @@ define spir_kernel void @unreachable_simple(i32 addrspace(1)* nocapture %in, i32 ret void } -; Function Attrs: nounwind readnone +; Function Attrs: nounwind declare spir_func i64 @_Z13get_global_idj(i32) #1 attributes #0 = { nounwind } -attributes #1 = { nounwind readnone } +attributes #1 = { nounwind } !opencl.enable.FP_CONTRACT = !{} !spirv.Source = !{!6} diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 30673d8b6a786..1de05b20bdb6b 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -839,7 +839,10 @@ include(config-ix) if("${LLVM_HOST_TRIPLE}" MATCHES "^powerpc64-ibm-aix") string(REGEX REPLACE "^powerpc64" "powerpc" LLVM_DEFAULT_TARGET_TRIPLE_default "${LLVM_HOST_TRIPLE}") else() - set(LLVM_DEFAULT_TARGET_TRIPLE_default "${LLVM_HOST_TRIPLE}") + # Only set default triple when native target is enabled. + if (LLVM_NATIVE_TARGET) + set(LLVM_DEFAULT_TARGET_TRIPLE_default "${LLVM_HOST_TRIPLE}") + endif() endif() set(LLVM_DEFAULT_TARGET_TRIPLE "${LLVM_DEFAULT_TARGET_TRIPLE_default}" CACHE STRING diff --git a/llvm/cmake/config-ix.cmake b/llvm/cmake/config-ix.cmake index 74ffaf7bfdec7..15a7d78b3ac43 100644 --- a/llvm/cmake/config-ix.cmake +++ b/llvm/cmake/config-ix.cmake @@ -71,7 +71,7 @@ if(APPLE) CHECK_C_SOURCE_COMPILES(" static const char *__crashreporter_info__ = 0; asm(\".desc ___crashreporter_info__, 0x10\"); - int main() { return 0; }" + int main(void) { return 0; }" HAVE_CRASHREPORTER_INFO) endif() @@ -622,15 +622,17 @@ if(CMAKE_GENERATOR MATCHES "Ninja" AND endif() if(CMAKE_HOST_APPLE AND APPLE) - if(NOT CMAKE_XCRUN) - find_program(CMAKE_XCRUN NAMES xcrun) - endif() - if(CMAKE_XCRUN) - execute_process(COMMAND ${CMAKE_XCRUN} -find ld - OUTPUT_VARIABLE LD64_EXECUTABLE - OUTPUT_STRIP_TRAILING_WHITESPACE) - else() - find_program(LD64_EXECUTABLE NAMES ld DOC "The ld64 linker") + if(NOT LD64_EXECUTABLE) + if(NOT CMAKE_XCRUN) + find_program(CMAKE_XCRUN NAMES xcrun) + endif() + if(CMAKE_XCRUN) + execute_process(COMMAND ${CMAKE_XCRUN} -find ld + OUTPUT_VARIABLE LD64_EXECUTABLE + OUTPUT_STRIP_TRAILING_WHITESPACE) + else() + find_program(LD64_EXECUTABLE NAMES ld DOC "The ld64 linker") + endif() endif() if(LD64_EXECUTABLE) diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake index 30ac0040e5650..428a22422e288 100644 --- a/llvm/cmake/modules/AddLLVM.cmake +++ b/llvm/cmake/modules/AddLLVM.cmake @@ -219,6 +219,7 @@ if (NOT DEFINED LLVM_LINKER_DETECTED AND NOT WIN32) else() if("${stdout}" MATCHES "^mold") set(LLVM_LINKER_DETECTED YES CACHE INTERNAL "") + set(LLVM_LINKER_IS_MOLD YES CACHE INTERNAL "") message(STATUS "Linker detection: mold") elseif("${stdout}" MATCHES "GNU gold") set(LLVM_LINKER_DETECTED YES CACHE INTERNAL "") diff --git a/llvm/cmake/modules/CheckAtomic.cmake b/llvm/cmake/modules/CheckAtomic.cmake index 3c5ba72993a3a..f11cadf39ff6b 100644 --- a/llvm/cmake/modules/CheckAtomic.cmake +++ b/llvm/cmake/modules/CheckAtomic.cmake @@ -82,6 +82,19 @@ elseif(LLVM_COMPILER_IS_GCC_COMPATIBLE OR CMAKE_CXX_COMPILER_ID MATCHES "XL") endif() endif() +# Set variable LLVM_ATOMIC_LIB specifying flags for linking against libatomic. +if(HAVE_CXX_ATOMICS_WITH_LIB OR HAVE_CXX_ATOMICS64_WITH_LIB) + # Use options --push-state, --as-needed and --pop-state if linker is known to support them. + # Use single option -Wl of compiler driver to avoid incorrect re-ordering of options by CMake. + if(LLVM_LINKER_IS_GNULD OR LLVM_LINKER_IS_GOLD OR LLVM_LINKER_IS_LLD OR LLVM_LINKER_IS_MOLD) + set(LLVM_ATOMIC_LIB "-Wl,--push-state,--as-needed,-latomic,--pop-state") + else() + set(LLVM_ATOMIC_LIB "-latomic") + endif() +else() + set(LLVM_ATOMIC_LIB) +endif() + ## TODO: This define is only used for the legacy atomic operations in ## llvm's Atomic.h, which should be replaced. Other code simply ## assumes C++11 works. diff --git a/llvm/cmake/modules/FindFFI.cmake b/llvm/cmake/modules/FindFFI.cmake index b0d859af89598..a493a89d63017 100644 --- a/llvm/cmake/modules/FindFFI.cmake +++ b/llvm/cmake/modules/FindFFI.cmake @@ -45,7 +45,7 @@ if(FFI_LIBRARIES) struct ffi_cif; typedef struct ffi_cif ffi_cif; void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue); - int main() { ffi_call(0, 0, 0, 0); }" + int main(void) { ffi_call(0, 0, 0, 0); }" HAVE_FFI_CALL) cmake_pop_check_state() endif() diff --git a/llvm/cmake/modules/FindTerminfo.cmake b/llvm/cmake/modules/FindTerminfo.cmake index 65edb80fa69a8..eef1f95853eb2 100644 --- a/llvm/cmake/modules/FindTerminfo.cmake +++ b/llvm/cmake/modules/FindTerminfo.cmake @@ -20,7 +20,7 @@ if(Terminfo_LIBRARIES) list(APPEND CMAKE_REQUIRED_LIBRARIES ${Terminfo_LIBRARIES}) check_c_source_compiles(" int setupterm(char *term, int filedes, int *errret); - int main() { return setupterm(0, 0, 0); }" + int main(void) { return setupterm(0, 0, 0); }" Terminfo_LINKABLE) cmake_pop_check_state() endif() diff --git a/llvm/cmake/modules/FindZ3.cmake b/llvm/cmake/modules/FindZ3.cmake index afb2c31756419..72fb5a96a52b9 100644 --- a/llvm/cmake/modules/FindZ3.cmake +++ b/llvm/cmake/modules/FindZ3.cmake @@ -18,8 +18,9 @@ function(check_z3_version z3_include z3_lib) # The program that will be executed to print Z3's version. file(WRITE ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/testz3.cpp "#include + #include #include - int main() { + int main(void) { unsigned int major, minor, build, rev; Z3_get_version(&major, &minor, &build, &rev); printf(\"%u.%u.%u\", major, minor, build); diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake index 7828e8a1627f2..abf10df855047 100644 --- a/llvm/cmake/modules/HandleLLVMOptions.cmake +++ b/llvm/cmake/modules/HandleLLVMOptions.cmake @@ -779,7 +779,7 @@ if (LLVM_ENABLE_WARNINGS AND (LLVM_COMPILER_IS_GCC_COMPATIBLE OR CLANG_CL)) # line is also a // comment. set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror -Wcomment") - CHECK_C_SOURCE_COMPILES("// \\\\\\n//\\nint main() {return 0;}" + CHECK_C_SOURCE_COMPILES("// \\\\\\n//\\nint main(void) {return 0;}" C_WCOMMENT_ALLOWS_LINE_WRAP) set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS}) if (NOT C_WCOMMENT_ALLOWS_LINE_WRAP) diff --git a/llvm/docs/AssignmentTracking.md b/llvm/docs/AssignmentTracking.md new file mode 100644 index 0000000000000..dfb5add3d4f74 --- /dev/null +++ b/llvm/docs/AssignmentTracking.md @@ -0,0 +1,233 @@ +# Debug Info Assignment Tracking + +Assignment Tracking is an alternative technique for tracking variable location +debug info through optimisations in LLVM. It provides accurate variable +locations for assignments where a local variable (or a field of one) is the +LHS. In rare and complicated circumstances indirect assignments might be +optimized away without being tracked, but otherwise we make our best effort to +track all variable locations. + +The core idea is to track more information about source assignments in order +and preserve enough information to be able to defer decisions about whether to +use non-memory locations (register, constant) or memory locations until after +middle end optimisations have run. This is in opposition to using +`llvm.dbg.declare` and `llvm.dbg.value`, which is to make the decision for most +variables early on, which can result in suboptimal variable locations that may +be either incorrect or incomplete. + +A secondary goal of assignment tracking is to cause minimal additional work for +LLVM pass writers, and minimal disruption to LLVM in general. + +## Status and usage + +**Status**: Experimental work in progress. Enabling is strongly advised against +except for development and testing. + +**Enable in Clang**: `-Xclang -fexperimental-assignment-tracking` + +**Enable in LLVM tools**: `-experimental-assignment-tracking` + +## Design and implementation + +### Assignment markers: `llvm.dbg.assign` + +`llvm.dbg.value`, a conventional debug intrinsic, marks out a position in the +IR where a variable takes a particular value. Similarly, Assignment Tracking +marks out the position of assignments with a new intrinsic called +`llvm.dbg.assign`. + +In order to know where in IR it is appropriate to use a memory location for a +variable, each assignment marker must in some way refer to the store, if any +(or multiple!), that performs the assignment. That way, the position of the +store and marker can be considered together when making that choice. Another +important benefit of referring to the store is that we can then build a two-way +mapping of stores<->markers that can be used to find markers that need to be +updated when stores are modified. + +An `llvm.dbg.assign` marker that is not linked to any instruction signals that +the store that performed the assignment has been optimised out, and therefore +the memory location will not be valid for at least some part of the program. + +Here's the `llvm.dbg.assign` signature. Each parameter is wrapped in +`MetadataAsValue`, and `Value *` type parameters are first wrapped in +`ValueAsMetadata`: + +``` +void @llvm.dbg.assign(Value *Value, + DIExpression *ValueExpression, + DILocalVariable *Variable, + DIAssignID *ID, + Value *Address, + DIExpression *AddressExpression) +``` + +The first three parameters look and behave like an `llvm.dbg.value`. `ID` is a +reference to a store (see next section). `Address` is the destination address +of the store and it is modified by `AddressExpression`. LLVM currently encodes +variable fragment information in `DIExpression`s, so as an implementation quirk +the `FragmentInfo` for `Variable` is contained within `ValueExpression` only. + +The formal LLVM-IR signature is: +``` +void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, metadata) +``` + +### Instruction link: `DIAssignID` + +`DIAssignID` metadata is the mechanism that is currently used to encode the +store<->marker link. The metadata node has no operands and all instances are +`distinct`; equality is checked for by comparing addresses. + +`llvm.dbg.assign` intrinsics use a `DIAssignID` metadata node instance as an +operand. This way it refers to any store-like instruction that has the same +`DIAssignID` attachment. E.g. For this test.cpp, + +``` +int fun(int a) { + return a; +} +``` +compiled without optimisations: +``` +$ clang++ test.cpp -o test.ll -emit-llvm -S -g -O0 -Xclang -fexperimental-assignment-tracking +``` +we get: +``` +define dso_local noundef i32 @_Z3funi(i32 noundef %a) #0 !dbg !8 { +entry: + %a.addr = alloca i32, align 4, !DIAssignID !13 + call void @llvm.dbg.assign(metadata i1 undef, metadata !14, metadata !DIExpression(), metadata !13, metadata i32* %a.addr, metadata !DIExpression()), !dbg !15 + store i32 %a, i32* %a.addr, align 4, !DIAssignID !16 + call void @llvm.dbg.assign(metadata i32 %a, metadata !14, metadata !DIExpression(), metadata !16, metadata i32* %a.addr, metadata !DIExpression()), !dbg !15 + %0 = load i32, i32* %a.addr, align 4, !dbg !17 + ret i32 %0, !dbg !18 +} + +... +!13 = distinct !DIAssignID() +!14 = !DILocalVariable(name: "a", ...) +... +!16 = distinct !DIAssignID() +``` + +The first `llvm.dbg.assign` refers to the `alloca` through `!DIAssignID !13`, +and the second refers to the `store` through `!DIAssignID !16`. + +### Store-like instructions + +In the absence of a linked `llvm.dbg.assign`, a store to an address that is +known to be the backing storage for a variable is considered to represent an +assignment to that variable. + +This gives us a safe fall-back in cases where `llvm.dbg.assign` intrinsics have +been deleted, the `DIAssignID` attachment on the store has been dropped, or the +optimiser has made a once-indirect store (not tracked with Assignment Tracking) +direct. + +### Middle-end: Considerations for pass-writers + +#### Non-debug instruction updates + +**Cloning** an instruction: nothing new to do. Cloning automatically clones a +`DIAssignID` attachment. Multiple instructions may have the same `DIAssignID` +instruction. In this case, the assignment is considered to take place in +multiple positions in the program. + +**Moving** a non-debug instruction: nothing new to do. Instructions linked to an +`llvm.dbg.assign` have their initial IR position marked by the position of the +`llvm.dbg.assign`. + +**Deleting** a non-debug instruction: nothing new to do. Simple DSE does not +require any change; it’s safe to delete an instruction with a `DIAssignID` +attachment. An `llvm.dbg.assign` that uses a `DIAssignID` that is not attached +to any instruction indicates that the memory location isn’t valid. + +**Merging** stores: In many cases no change is required as `DIAssignID` +attachments are automatically merged if `combineMetadata` is called. One way or +another, the `DIAssignID` attachments must be merged such that new store +becomes linked to all the `llvm.dbg.assign` intrinsics that the merged stores +were linked to. This can be achieved simply by calling a helper function +`Instruction::mergeDIAssignID`. + +**Inlining** stores: As stores are inlined we generate `llvm.dbg.assign` +intrinsics and `DIAssignID` attachments as if the stores represent source +assignments, just like the in frontend. This isn’t perfect, as stores may have +been moved, modified or deleted before inlining, but it does at least keep the +information about the variable correct within the non-inlined scope. + +**Splitting** stores: SROA and passes that split stores treat `llvm.dbg.assign` +intrinsics similarly to `llvm.dbg.declare` intrinsics. Clone the +`llvm.dbg.assign` intrinsics linked to the store, update the FragmentInfo in +the `ValueExpression`, and give the split stores (and cloned intrinsics) new +`DIAssignID` attachments each. In other words, treat the split stores as +separate assignments. For partial DSE (e.g. shortening a memset), we do the +same except that `llvm.dbg.assign` for the dead fragment gets an `Undef` +`Address`. + +**Promoting** allocas and store/loads: `llvm.dbg.assign` intrinsics implicitly +describe joined values in memory locations at CFG joins, but this is not +necessarily the case after promoting (or partially promoting) the +variable. Passes that promote variables are responsible for inserting +`llvm.dbg.assign` intrinsics after the resultant PHIs generated during +promotion. `mem2reg` already has to do this (with `llvm.dbg.value`) for +`llvm.dbg.declare`s. Where a store has no linked intrinsic, the store is +assumed to represent an assignment for variables stored at the destination +address. + +#### Debug intrinsic updates + +**Moving** a debug intrinsic: avoid moving `llvm.dbg.assign` intrinsics where +possible, as they represent a source-level assignment, whose position in the +program should not be affected by optimization passes. + +**Deleting** a debug intrinsic: Nothing new to do. Just like for conventional +debug intrinsics, unless it is unreachable, it’s almost always incorrect to +delete a `llvm.dbg.assign` intrinsic. + +### Lowering `llvm.dbg.assign` to MIR + +To begin with only SelectionDAG ISel will be supported. `llvm.dbg.assign` +intrinsics are lowered to MIR `DBG_INSTR_REF` instructions. Before this happens +we need to decide where it is appropriate to use memory locations and where we +must use a non-memory location (or no location) for each variable. In order to +make those decisions we run a standard fixed-point dataflow analysis that makes +the choice at each instruction, iteratively joining the results for each block. + +### TODO list + +As this is an experimental work in progress so there are some items we still need +to tackle: + +* LLVM is trying to replace usage of `Undef` with `Poison`. Use `Poison` rather + than `Undef` as the sentinal to denote "unknown location" for the address. See + D133293. This will be unecessary if the address can be removed, as described + below. + +* The system expects locals to be backed by a local alloca. This isn't always + the case - sometimes a pointer to storage is passed into a function + (e.g. sret, byval). We need to be able to handle those cases. See + llvm/test/DebugInfo/Generic/assignment-tracking/track-assignments.ll and + clang/test/CodeGen/assignment-tracking/assignment-tracking.cpp for examples. + +* `trackAssignments` doesn't yet work for variables that have their + `llvm.dbg.declare` location modified by a `DIExpression`, e.g. when the + address of the variable is itself stored in an `alloca` with the + `llvm.dbg.declare` using `DIExpression(DW_OP_deref)`. See `indirectReturn` in + llvm/test/DebugInfo/Generic/assignment-tracking/track-assignments.ll and in + clang/test/CodeGen/assignment-tracking/assignment-tracking.cpp for an + example. + +* In order to solve the first bullet-point we need to be able to specify that a + memory location is available without using a `DIAssignID`. This is because + the storage address is not computed by an instruction (it's an argument + value) and therefore we have nowhere to put the metadata attachment. To solve + this we probably need another marker intrinsic to denote "the variable's + stack home is X address" - similar to `llvm.dbg.declare` and `llvm.dbg.addr` + except that it needs to compose with `llvm.dbg.assign` intrinsics such that + the stack home address is only selected as a location for the variable when + the `llvm.dbg.assign` intrinsics agree it should be. + +* Given the above (a special "the stack home is X" intrinsic), and the fact + that we can only track assignments with fixed offsets and sizes, I think we + can probably get rid of the address and address-expression part, since it + will always be computable with the info we have. diff --git a/llvm/docs/CommandGuide/llvm-config.rst b/llvm/docs/CommandGuide/llvm-config.rst index 2dddbcc53dc6a..63658d0d90452 100644 --- a/llvm/docs/CommandGuide/llvm-config.rst +++ b/llvm/docs/CommandGuide/llvm-config.rst @@ -130,10 +130,6 @@ OPTIONS Print how the provided components can be collectively linked (`shared` or `static`). -**--src-root** - - Print the source root from which LLVM was built. - **--system-libs** Print all the system libraries needed to link against the specified LLVM diff --git a/llvm/docs/GettingInvolved.rst b/llvm/docs/GettingInvolved.rst index 7315271b7fe37..50961ceabb863 100644 --- a/llvm/docs/GettingInvolved.rst +++ b/llvm/docs/GettingInvolved.rst @@ -227,6 +227,10 @@ what to add to your calendar invite. - Every week on Thursday - - `Meeting details/agenda `__ + * - SYCL Upstream Working Group + - Every 2 weeks on Mondays + - `gcal `__ + - `Meeting details/agenda `__ .. _office-hours: @@ -314,6 +318,11 @@ don't find anyone present, chances are they happen to be off that day. `gcal `__ - `Google meet `__ - English, French + * - Paulo Matos (he/him) + - WebAssembly backend; LLVM IR; + - Monthly, 1st Monday of the month at 11:00am Europe/Berlin, for 30 minutes. + - `Igalia Jitsi `__ + - English, Portuguese, German Guidance for office hours hosts diff --git a/llvm/docs/HowToUpdateDebugInfo.rst b/llvm/docs/HowToUpdateDebugInfo.rst index 904ba71b965d3..c64b5d1d0d98b 100644 --- a/llvm/docs/HowToUpdateDebugInfo.rst +++ b/llvm/docs/HowToUpdateDebugInfo.rst @@ -217,6 +217,15 @@ Deleting a MIR-level MachineInstr TODO +Rules for updating ``DIAssignID`` Attachments +============================================= + +``DIAssignID`` metadata attachments are used by Assignment Tracking, which is +currently an experimental debug mode. + +See :doc:`AssignmentTracking` for how to update them and for more info on +Assignment Tracking. + How to automatically convert tests into debug info tests ======================================================== diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 3a809603ddcc4..0006dab9b2d29 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -886,8 +886,9 @@ Syntax:: [, partition "name"] The linkage must be one of ``private``, ``internal``, ``linkonce``, ``weak``, -``linkonce_odr``, ``weak_odr``, ``external``. Note that some system linkers -might not correctly handle dropping a weak symbol that is aliased. +``linkonce_odr``, ``weak_odr``, ``external``, ``available_externally``. Note +that some system linkers might not correctly handle dropping a weak symbol that +is aliased. Aliases that are not ``unnamed_addr`` are guaranteed to have the same address as the aliasee expression. ``unnamed_addr`` ones are only guaranteed to point @@ -906,8 +907,10 @@ some can only be checked when producing an object file: intermediate alias being overridden cannot be represented in an object file. -* No global value in the expression can be a declaration, since that - would require a relocation, which is not possible. +* If the alias has the ``available_externally`` linkage, the aliasee must be an + ``available_externally`` global value; otherwise the aliasee can be an + expression but no global value in the expression can be a declaration, since + that would require a relocation, which is not possible. * If either the alias or the aliasee may be replaced by a symbol outside the module at link time or runtime, any optimization cannot replace the alias with @@ -1414,6 +1417,30 @@ Currently, only the following parameter attributes are defined: same address may be returned), for a free-like function the pointer will always be invalidated. +``readnone`` + This attribute indicates that the function does not dereference that + pointer argument, even though it may read or write the memory that the + pointer points to if accessed through other pointers. + + If a function reads from or writes to a readnone pointer argument, the + behavior is undefined. + +``readonly`` + This attribute indicates that the function does not write through this + pointer argument, even though it may write to the memory that the pointer + points to. + + If a function writes to a readonly pointer argument, the behavior is + undefined. + +``writeonly`` + This attribute indicates that the function may write to, but does not read + through this pointer argument (even though it may read from the memory that + the pointer points to). + + If a function reads from a writeonly pointer argument, the behavior is + undefined. + .. _gc: Garbage Collector Strategy Names @@ -1701,22 +1728,6 @@ example: the profile information. By marking a function ``hot``, users can work around the cases where the training input does not have good coverage on all the hot functions. -``inaccessiblememonly`` - This attribute indicates that the function may only access memory that - is not accessible by the module being compiled before return from the - function. This is a weaker form of ``readnone``. If the function reads - or writes other memory, the behavior is undefined. - - For clarity, note that such functions are allowed to return new memory - which is ``noalias`` with respect to memory already accessible from - the module. That is, a function can be both ``inaccessiblememonly`` and - have a ``noalias`` return which introduces a new, potentially initialized, - allocation. -``inaccessiblemem_or_argmemonly`` - This attribute indicates that the function may only access memory that is - either not accessible by the module being compiled, or is pointed to - by its pointer arguments. This is a weaker form of ``argmemonly``. If the - function reads or writes other memory, the behavior is undefined. ``inlinehint`` This attribute indicates that the source code contained a hint that inlining this function is desirable (such as the "inline" keyword in @@ -1974,45 +1985,6 @@ example: function that has a ``"probe-stack"`` attribute is inlined into a function that has no ``"probe-stack"`` attribute at all, the resulting function has the ``"probe-stack"`` attribute of the callee. -``readnone`` - On a function, this attribute indicates that the function computes its - result (or decides to unwind an exception) based strictly on its arguments, - without dereferencing any pointer arguments or otherwise accessing - any mutable state (e.g. memory, control registers, etc) visible outside the - ``readnone`` function. It does not write through any pointer arguments - (including ``byval`` arguments) and never changes any state visible to - callers. This means while it cannot unwind exceptions by calling the ``C++`` - exception throwing methods (since they write to memory), there may be - non-``C++`` mechanisms that throw exceptions without writing to LLVM visible - memory. - - On an argument, this attribute indicates that the function does not - dereference that pointer argument, even though it may read or write the - memory that the pointer points to if accessed through other pointers. - - If a readnone function reads or writes memory visible outside the function, - or has other side-effects, the behavior is undefined. If a - function reads from or writes to a readnone pointer argument, the behavior - is undefined. -``readonly`` - On a function, this attribute indicates that the function does not write - through any pointer arguments (including ``byval`` arguments) or otherwise - modify any state (e.g. memory, control registers, etc) visible outside the - ``readonly`` function. It may dereference pointer arguments and read - state that may be set in the caller. A readonly function always - returns the same value (or unwinds an exception identically) when - called with the same set of arguments and global state. This means while it - cannot unwind exceptions by calling the ``C++`` exception throwing methods - (since they write to memory), there may be non-``C++`` mechanisms that throw - exceptions without writing to LLVM visible memory. - - On an argument, this attribute indicates that the function does not write - through this pointer argument, even though it may write to the memory that - the pointer points to. - - If a readonly function writes memory visible outside the function, or has - other side-effects, the behavior is undefined. If a function writes to a - readonly pointer argument, the behavior is undefined. ``"stack-probe-size"`` This attribute controls the behavior of stack probes: either the ``"probe-stack"`` attribute, or ABI-required stack probes, if any. @@ -2030,29 +2002,6 @@ example: of the callee. ``"no-stack-arg-probe"`` This attribute disables ABI-required stack probes, if any. -``writeonly`` - On a function, this attribute indicates that the function may write to but - does not read from memory visible outside the ``writeonly`` function. - - On an argument, this attribute indicates that the function may write to but - does not read through this pointer argument (even though it may read from - the memory that the pointer points to). - - If a writeonly function reads memory visible outside the function or has - other side-effects, the behavior is undefined. If a function reads - from a writeonly pointer argument, the behavior is undefined. -``argmemonly`` - This attribute indicates that the only memory accesses inside function are - loads and stores from objects pointed to by its pointer-typed arguments, - with arbitrary offsets. Or in other words, all memory operations in the - function can refer to memory only using pointers based on its function - arguments. - - Note that ``argmemonly`` can be used together with ``readonly`` attribute - in order to specify that function reads only from its arguments. - - If an argmemonly function reads or writes memory other than the pointer - arguments, or has other side-effects, the behavior is undefined. ``returns_twice`` This attribute indicates that this function can return twice. The C ``setjmp`` is an example of such a function. The compiler disables @@ -3648,7 +3597,7 @@ The semantics of non-zero address spaces are target-specific. Memory access through a non-dereferenceable pointer is undefined behavior in any address space. Pointers with the bit-value 0 are only assumed to be non-dereferenceable in address space 0, unless the function is -marked with the ``null_pointer_is_valid attribute``. +marked with the ``null_pointer_is_valid`` attribute. If an object can be proven accessible through a pointer with a different address space, the access may be modified to use that diff --git a/llvm/docs/NewPassManager.rst b/llvm/docs/NewPassManager.rst index 9390afa300ca5..3d0bcdeb4ccb7 100644 --- a/llvm/docs/NewPassManager.rst +++ b/llvm/docs/NewPassManager.rst @@ -179,6 +179,11 @@ sanitizer) passes to various parts of the pipeline. ``AMDGPUTargetMachine::registerPassBuilderCallbacks()`` is an example of a backend adding passes to various parts of the pipeline. +Pass plugins can also add passes into default pipelines. Different tools have +different ways of loading dynamic pass plugins. For example, ``opt +-load-pass-plugin=path/to/plugin.so`` loads a pass plugin into ``opt``. For +information on writing a pass plugin, see :doc:`WritingAnLLVMNewPMPass`. + Using Analyses ============== @@ -430,6 +435,8 @@ To use the new PM: .. code-block:: shell $ opt -passes='pass1,pass2' /tmp/a.ll -S + # -p is an alias for -passes + $ opt -p pass1,pass2 /tmp/a.ll -S The new PM typically requires explicit pass nesting. For example, to run a function pass, then a module pass, we need to wrap the function pass in a module diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 6f0a64fd43468..465fdc329647d 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -67,6 +67,27 @@ and there is no way to suppress this error. Changes to the LLVM IR ---------------------- +* The ``readnone``, ``readonly``, ``writeonly``, ``argmemonly``, + ``inaccessiblememonly`` and ``inaccessiblemem_or_argmemonly`` function + attributes have been replaced by a single ``memory(...)`` attribute. The + old attributes may be mapped to the new one as follows: + + * ``readnone`` -> ``memory(none)`` + * ``readonly`` -> ``memory(read)`` + * ``writeonly`` -> ``memory(write)`` + * ``argmemonly`` -> ``memory(argmem: readwrite)`` + * ``argmemonly readonly`` -> ``memory(argmem: read)`` + * ``argmemonly writeonly`` -> ``memory(argmem: write)`` + * ``inaccessiblememonly`` -> ``memory(inaccessiblemem: readwrite)`` + * ``inaccessiblememonly readonly`` -> ``memory(inaccessiblemem: read)`` + * ``inaccessiblememonly writeonly`` -> ``memory(inaccessiblemem: write)`` + * ``inaccessiblemem_or_argmemonly`` -> + ``memory(argmem: readwrite, inaccessiblemem: readwrite)`` + * ``inaccessiblemem_or_argmemonly readonly`` -> + ``memory(argmem: read, inaccessiblemem: read)`` + * ``inaccessiblemem_or_argmemonly writeonly`` -> + ``memory(argmem: write, inaccessiblemem: write)`` + * The constant expression variants of the following instructions has been removed: @@ -81,6 +102,8 @@ Changes to TableGen Changes to the AArch64 Backend ------------------------------ +* Added support for the Cortex-A715 CPU. + Changes to the AMDGPU Backend ----------------------------- @@ -146,6 +169,7 @@ Changes to the X86 Backend * Support ISA of ``AVX-IFMA``. * Support ISA of ``AVX-VNNI-INT8``. * Support ISA of ``AVX-NE-CONVERT``. +* ``-mcpu=raptorlake`` and ``-mcpu=meteorlake`` are now supported. Changes to the OCaml bindings ----------------------------- diff --git a/llvm/docs/SourceLevelDebugging.rst b/llvm/docs/SourceLevelDebugging.rst index a9ce60029b448..07468b1e75f17 100644 --- a/llvm/docs/SourceLevelDebugging.rst +++ b/llvm/docs/SourceLevelDebugging.rst @@ -251,6 +251,40 @@ directly, not its address. Note that the value operand of this intrinsic may be indirect (i.e, a pointer to the source variable), provided that interpreting the complex expression derives the direct value. +``llvm.dbg.assign`` +^^^^^^^^^^^^^^^^^^^ +.. toctree:: + :hidden: + + AssignmentTracking + +.. code-block:: llvm + + void @llvm.dbg.assign(Value *Value, + DIExpression *ValueExpression, + DILocalVariable *Variable, + DIAssignID *ID, + Value *Address, + DIExpression *AddressExpression) + +This intrinsic marks the position in IR where a source assignment occured. It +encodes the value of the variable. It references the store, if any, that +performs the assignment, and the destination address. + +The first three arguments are the same as for an ``llvm.dbg.value``. The fourth +argument is a ``DIAssignID`` used to reference a store. The fifth is the +destination of the store (wrapped as metadata), and the sixth is a `complex +expression `_ that modfies it. + +The formal LLVM-IR signature is: + +.. code-block:: llvm + + void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, metadata) + + +See :doc:`AssignmentTracking` for more info. + Object lifetimes and scoping ============================ diff --git a/llvm/docs/WritingAnLLVMNewPMPass.rst b/llvm/docs/WritingAnLLVMNewPMPass.rst index dbedc4bb6be9b..799863c7864ec 100644 --- a/llvm/docs/WritingAnLLVMNewPMPass.rst +++ b/llvm/docs/WritingAnLLVMNewPMPass.rst @@ -232,3 +232,58 @@ function. Required passes will still be run on ``optnone`` functions. For more implementation details, see ``PassInstrumentation::runBeforePass()``. + +Registering passes as plugins +----------------------------- + +LLVM provides a mechanism to register pass plugins within various tools like +``clang`` or ``opt``. A pass plugin can add passes to default optimization +pipelines or to be manually run via tools like ``opt``. For more information, +see :doc:`NewPassManager`. + +Create a CMake project at the root of the repo alongside +other projects. This project must contain the following minimal +``CMakeLists.txt``: + +.. code-block:: cmake + + add_llvm_pass_plugin(MyPassName source.cpp) + +See the definition of ``add_llvm_pass_plugin`` for more CMake details. + +The pass must provide at least one of two entry points for the new pass manager, +one for static registration and one for dynamically loaded plugins: + +- ``llvm::PassPluginLibraryInfo get##Name##PluginInfo();`` +- ``extern "C" ::llvm::PassPluginLibraryInfo llvmGetPassPluginInfo() LLVM_ATTRIBUTE_WEAK;`` + +Pass plugins are compiled and linked dynamically by default. Setting +``LLVM_${NAME}_LINK_INTO_TOOLS`` to ``ON`` turns the project into a statically +linked extension. + +For an in-tree example, see ``llvm/examples/Bye/``. + +To make ``PassBuilder`` aware of statically linked pass plugins: + +.. code-block:: c++ + + // Declare plugin extension function declarations. + #define HANDLE_EXTENSION(Ext) llvm::PassPluginLibraryInfo get##Ext##PluginInfo(); + #include "llvm/Support/Extension.def" + + ... + + // Register plugin extensions in PassBuilder. + #define HANDLE_EXTENSION(Ext) get##Ext##PluginInfo().RegisterPassBuilderCallbacks(PB); + #include "llvm/Support/Extension.def" + +To make ``PassBuilder`` aware of dynamically linked pass plugins: + +.. code-block:: c++ + + // Load plugin dynamically. + auto Plugin = PassPlugin::Load(PathToPlugin); + if (!Plugin) + report_error(); + // Register plugin extensions in PassBuilder. + Plugin.registerPassBuilderCallbacks(PB); diff --git a/llvm/docs/WritingAnLLVMPass.rst b/llvm/docs/WritingAnLLVMPass.rst index b644def6cbeb5..2b2ac719ef589 100644 --- a/llvm/docs/WritingAnLLVMPass.rst +++ b/llvm/docs/WritingAnLLVMPass.rst @@ -1183,51 +1183,6 @@ implement ``releaseMemory`` to, well, release the memory allocated to maintain this internal state. This method is called after the ``run*`` method for the class, before the next call of ``run*`` in your pass. -Building pass plugins -===================== - -As an alternative to using ``PLUGIN_TOOL``, LLVM provides a mechanism to -automatically register pass plugins within ``clang``, ``opt`` and ``bugpoint``. -One first needs to create an independent project and add it to either ``tools/`` -or, using the MonoRepo layout, at the root of the repo alongside other projects. -This project must contain the following minimal ``CMakeLists.txt``: - -.. code-block:: cmake - - add_llvm_pass_plugin(Name source0.cpp) - -The pass must provide two entry points for the new pass manager, one for static -registration and one for dynamically loaded plugins: - -- ``llvm::PassPluginLibraryInfo get##Name##PluginInfo();`` -- ``extern "C" ::llvm::PassPluginLibraryInfo llvmGetPassPluginInfo() LLVM_ATTRIBUTE_WEAK;`` - -Pass plugins are compiled and link dynamically by default, but it's -possible to set the following variables to change this behavior: - -- ``LLVM_${NAME}_LINK_INTO_TOOLS``, when set to ``ON``, turns the project into - a statically linked extension - - -When building a tool that uses the new pass manager, one can use the following snippet to -include statically linked pass plugins: - -.. code-block:: c++ - - // fetch the declaration - #define HANDLE_EXTENSION(Ext) llvm::PassPluginLibraryInfo get##Ext##PluginInfo(); - #include "llvm/Support/Extension.def" - - [...] - - // use them, PB is an llvm::PassBuilder instance - #define HANDLE_EXTENSION(Ext) get##Ext##PluginInfo().RegisterPassBuilderCallbacks(PB); - #include "llvm/Support/Extension.def" - - - - - Registering dynamically loaded passes ===================================== diff --git a/llvm/include/llvm-c/DebugInfo.h b/llvm/include/llvm-c/DebugInfo.h index 8554a01998736..ef6a147eb2a52 100644 --- a/llvm/include/llvm-c/DebugInfo.h +++ b/llvm/include/llvm-c/DebugInfo.h @@ -169,7 +169,8 @@ enum { LLVMDICommonBlockMetadataKind, LLVMDIStringTypeMetadataKind, LLVMDIGenericSubrangeMetadataKind, - LLVMDIArgListMetadataKind + LLVMDIArgListMetadataKind, + LLVMDIAssignIDMetadataKind, }; typedef unsigned LLVMMetadataKind; diff --git a/llvm/include/llvm/ADT/Sequence.h b/llvm/include/llvm/ADT/Sequence.h index 88a6fa9205983..1153352d8b24f 100644 --- a/llvm/include/llvm/ADT/Sequence.h +++ b/llvm/include/llvm/ADT/Sequence.h @@ -125,8 +125,8 @@ template bool canTypeFitValue(const U Value) { // - its internal representation overflows. struct CheckedInt { // Integral constructor, asserts if Value cannot be represented as intmax_t. - template ::value, bool> = 0> + template ::value, bool> = 0> static CheckedInt from(Integral FromValue) { if (!canTypeFitValue(FromValue)) assertOutOfBounds(); @@ -137,7 +137,7 @@ struct CheckedInt { // Enum constructor, asserts if Value cannot be represented as intmax_t. template ::value, bool> = 0> + std::enable_if_t::value, bool> = 0> static CheckedInt from(Enum FromValue) { using type = std::underlying_type_t; return from(static_cast(FromValue)); @@ -162,8 +162,8 @@ struct CheckedInt { } // Convert to integral, asserts if Value cannot be represented as Integral. - template ::value, bool> = 0> + template ::value, bool> = 0> Integral to() const { if (!canTypeFitValue(Value)) assertOutOfBounds(); @@ -173,7 +173,7 @@ struct CheckedInt { // Convert to enum, asserts if Value cannot be represented as Enum's // underlying type. template ::value, bool> = 0> + std::enable_if_t::value, bool> = 0> Enum to() const { using type = std::underlying_type_t; return Enum(to()); diff --git a/llvm/include/llvm/Analysis/MLInlineAdvisor.h b/llvm/include/llvm/Analysis/MLInlineAdvisor.h index 00e8d7d7dd4de..7535464e07100 100644 --- a/llvm/include/llvm/Analysis/MLInlineAdvisor.h +++ b/llvm/include/llvm/Analysis/MLInlineAdvisor.h @@ -69,7 +69,10 @@ class MLInlineAdvisor : public InlineAdvisor { getSkipAdviceIfUnreachableCallsite(CallBase &CB); void print(raw_ostream &OS) const override; - mutable DenseMap FPICache; + // Using std::map to benefit from its iterator / reference non-invalidating + // semantics, which make it easy to use `getCachedFPI` results from multiple + // calls without needing to copy to avoid invalidation effects. + mutable std::map FPICache; LazyCallGraph &CG; diff --git a/llvm/include/llvm/Analysis/ValueLattice.h b/llvm/include/llvm/Analysis/ValueLattice.h index bc6b279e9ed52..8bf6b2a095f6c 100644 --- a/llvm/include/llvm/Analysis/ValueLattice.h +++ b/llvm/include/llvm/Analysis/ValueLattice.h @@ -452,37 +452,8 @@ class ValueLatticeElement { /// true, false or undef constants, or nullptr if the comparison cannot be /// evaluated. Constant *getCompare(CmpInst::Predicate Pred, Type *Ty, - const ValueLatticeElement &Other) const { - if (isUnknownOrUndef() || Other.isUnknownOrUndef()) - return UndefValue::get(Ty); - - if (isConstant() && Other.isConstant()) - return ConstantExpr::getCompare(Pred, getConstant(), Other.getConstant()); - - if (ICmpInst::isEquality(Pred)) { - // not(C) != C => true, not(C) == C => false. - if ((isNotConstant() && Other.isConstant() && - getNotConstant() == Other.getConstant()) || - (isConstant() && Other.isNotConstant() && - getConstant() == Other.getNotConstant())) - return Pred == ICmpInst::ICMP_NE - ? ConstantInt::getTrue(Ty) : ConstantInt::getFalse(Ty); - } - - // Integer constants are represented as ConstantRanges with single - // elements. - if (!isConstantRange() || !Other.isConstantRange()) - return nullptr; - - const auto &CR = getConstantRange(); - const auto &OtherCR = Other.getConstantRange(); - if (CR.icmp(Pred, OtherCR)) - return ConstantInt::getTrue(Ty); - if (CR.icmp(CmpInst::getInversePredicate(Pred), OtherCR)) - return ConstantInt::getFalse(Ty); - - return nullptr; - } + const ValueLatticeElement &Other, + const DataLayout &DL) const; unsigned getNumRangeExtensions() const { return NumRangeExtensions; } void setNumRangeExtensions(unsigned N) { NumRangeExtensions = N; } diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h index 87df754968265..5fc192f145aaf 100644 --- a/llvm/include/llvm/AsmParser/LLToken.h +++ b/llvm/include/llvm/AsmParser/LLToken.h @@ -190,6 +190,11 @@ enum Kind { kw_argmem, kw_inaccessiblemem, + // Legacy memory attributes: + kw_argmemonly, + kw_inaccessiblememonly, + kw_inaccessiblemem_or_argmemonly, + kw_type, kw_opaque, diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h index ee5669c6c6aa8..74a51d5ce6907 100644 --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -349,7 +349,8 @@ enum MetadataCodes { // info. METADATA_COMMON_BLOCK = 44, // [distinct, scope, name, variable,...] METADATA_GENERIC_SUBRANGE = 45, // [distinct, count, lo, up, stride] - METADATA_ARG_LIST = 46 // [n x [type num, value num]] + METADATA_ARG_LIST = 46, // [n x [type num, value num]] + METADATA_ASSIGN_ID = 47, // [distinct, ...] }; // The constants block (CONSTANTS_BLOCK_ID) describes emission for each diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 515382a8e869e..5c54f0e8ab058 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -696,6 +696,9 @@ class CombinerHelper { /// (fma fneg(x), fneg(y), z) -> (fma x, y, z) bool matchRedundantNegOperands(MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchFsubToFneg(MachineInstr &MI, Register &MatchInfo); + void applyFsubToFneg(MachineInstr &MI, Register &MatchInfo); + bool canCombineFMadOrFMA(MachineInstr &MI, bool &AllowFusionGlobally, bool &HasFMAD, bool &Aggressive, bool CanReassociate = false); diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h index 3a53017a4e1c9..1921dcff4a60c 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -724,7 +724,10 @@ class LegalizationArtifactCombiner { /// and its callees rely upon. Register findValueFromDefImpl(Register DefReg, unsigned StartBit, unsigned Size) { - MachineInstr *Def = getDefIgnoringCopies(DefReg, MRI); + Optional DefSrcReg = + getDefSrcRegIgnoringCopies(DefReg, MRI); + MachineInstr *Def = DefSrcReg->MI; + DefReg = DefSrcReg->Reg; // If the instruction has a single def, then simply delegate the search. // For unmerge however with multiple defs, we need to compute the offset // into the source of the unmerge. diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 40663f95fa0a8..189db64609a48 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -1148,8 +1148,8 @@ class TargetInstrInfo : public MCInstrInfo { /// Return true if target supports reassociation of instructions in machine /// combiner pass to reduce register pressure for a given BB. virtual bool - shouldReduceRegisterPressure(MachineBasicBlock *MBB, - RegisterClassInfo *RegClassInfo) const { + shouldReduceRegisterPressure(const MachineBasicBlock *MBB, + const RegisterClassInfo *RegClassInfo) const { return false; } diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index a76fb97a14dc5..d0a7375c6a3b8 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -2000,6 +2000,14 @@ class TargetLoweringBase { llvm_unreachable("Masked atomicrmw expansion unimplemented on this target"); } + /// Perform a atomicrmw expansion using a target-specific way. This is + /// expected to be called when masked atomicrmw and bit test atomicrmw don't + /// work, and the target supports another way to lower atomicrmw. + virtual void emitExpandAtomicRMW(AtomicRMWInst *AI) const { + llvm_unreachable( + "Generic atomicrmw expansion unimplemented on this target"); + } + /// Perform a bit test atomicrmw using a target-specific intrinsic. This /// represents the combined bit test intrinsic which will be lowered at a late /// stage by the backend. diff --git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td index 8ed27f1a0c568..9fc145551880f 100644 --- a/llvm/include/llvm/CodeGen/ValueTypes.td +++ b/llvm/include/llvm/CodeGen/ValueTypes.td @@ -47,186 +47,187 @@ def v128i1 : ValueType<128, 24>; // 128 x i1 vector value def v256i1 : ValueType<256, 25>; // 256 x i1 vector value def v512i1 : ValueType<512, 26>; // 512 x i1 vector value def v1024i1 : ValueType<1024, 27>; // 1024 x i1 vector value - -def v128i2 : ValueType<256, 28>; // 128 x i2 vector value -def v256i2 : ValueType<512, 29>; // 256 x i2 vector value - -def v64i4 : ValueType<256, 30>; // 64 x i4 vector value -def v128i4 : ValueType<512, 31>; // 128 x i4 vector value - -def v1i8 : ValueType<8, 32>; // 1 x i8 vector value -def v2i8 : ValueType<16, 33>; // 2 x i8 vector value -def v4i8 : ValueType<32, 34>; // 4 x i8 vector value -def v8i8 : ValueType<64, 35>; // 8 x i8 vector value -def v16i8 : ValueType<128, 36>; // 16 x i8 vector value -def v32i8 : ValueType<256, 37>; // 32 x i8 vector value -def v64i8 : ValueType<512, 38>; // 64 x i8 vector value -def v128i8 : ValueType<1024, 39>; // 128 x i8 vector value -def v256i8 : ValueType<2048, 40>; // 256 x i8 vector value -def v512i8 : ValueType<4096, 41>; // 512 x i8 vector value -def v1024i8 : ValueType<8192, 42>; // 1024 x i8 vector value - -def v1i16 : ValueType<16, 43>; // 1 x i16 vector value -def v2i16 : ValueType<32, 44>; // 2 x i16 vector value -def v3i16 : ValueType<48, 45>; // 3 x i16 vector value -def v4i16 : ValueType<64, 46>; // 4 x i16 vector value -def v8i16 : ValueType<128, 47>; // 8 x i16 vector value -def v16i16 : ValueType<256, 48>; // 16 x i16 vector value -def v32i16 : ValueType<512, 49>; // 32 x i16 vector value -def v64i16 : ValueType<1024, 50>; // 64 x i16 vector value -def v128i16 : ValueType<2048, 51>; // 128 x i16 vector value -def v256i16 : ValueType<4096, 52>; // 256 x i16 vector value -def v512i16 : ValueType<8192, 53>; // 512 x i16 vector value - -def v1i32 : ValueType<32, 54>; // 1 x i32 vector value -def v2i32 : ValueType<64, 55>; // 2 x i32 vector value -def v3i32 : ValueType<96, 56>; // 3 x i32 vector value -def v4i32 : ValueType<128, 57>; // 4 x i32 vector value -def v5i32 : ValueType<160, 58>; // 5 x i32 vector value -def v6i32 : ValueType<192, 59>; // 6 x f32 vector value -def v7i32 : ValueType<224, 60>; // 7 x f32 vector value -def v8i32 : ValueType<256, 61>; // 8 x i32 vector value -def v16i32 : ValueType<512, 62>; // 16 x i32 vector value -def v32i32 : ValueType<1024, 63>; // 32 x i32 vector value -def v64i32 : ValueType<2048, 64>; // 64 x i32 vector value -def v128i32 : ValueType<4096, 65>; // 128 x i32 vector value -def v256i32 : ValueType<8192, 66>; // 256 x i32 vector value -def v512i32 : ValueType<16384, 67>; // 512 x i32 vector value -def v1024i32 : ValueType<32768, 68>; // 1024 x i32 vector value -def v2048i32 : ValueType<65536, 69>; // 2048 x i32 vector value - -def v1i64 : ValueType<64, 70>; // 1 x i64 vector value -def v2i64 : ValueType<128, 71>; // 2 x i64 vector value -def v3i64 : ValueType<192, 72>; // 3 x i64 vector value -def v4i64 : ValueType<256, 73>; // 4 x i64 vector value -def v8i64 : ValueType<512, 74>; // 8 x i64 vector value -def v16i64 : ValueType<1024, 75>; // 16 x i64 vector value -def v32i64 : ValueType<2048, 76>; // 32 x i64 vector value -def v64i64 : ValueType<4096, 77>; // 64 x i64 vector value -def v128i64 : ValueType<8192, 78>; // 128 x i64 vector value -def v256i64 : ValueType<16384, 79>; // 256 x i64 vector value - -def v1i128 : ValueType<128, 80>; // 1 x i128 vector value - -def v1f16 : ValueType<16, 81>; // 1 x f16 vector value -def v2f16 : ValueType<32, 82>; // 2 x f16 vector value -def v3f16 : ValueType<48, 83>; // 3 x f16 vector value -def v4f16 : ValueType<64, 84>; // 4 x f16 vector value -def v8f16 : ValueType<128, 85>; // 8 x f16 vector value -def v16f16 : ValueType<256, 86>; // 16 x f16 vector value -def v32f16 : ValueType<512, 87>; // 32 x f16 vector value -def v64f16 : ValueType<1024, 88>; // 64 x f16 vector value -def v128f16 : ValueType<2048, 89>; // 128 x f16 vector value -def v256f16 : ValueType<4096, 90>; // 256 x f16 vector value -def v512f16 : ValueType<8192, 91>; // 512 x f16 vector value - -def v2bf16 : ValueType<32, 92>; // 2 x bf16 vector value -def v3bf16 : ValueType<48, 93>; // 3 x bf16 vector value -def v4bf16 : ValueType<64, 94>; // 4 x bf16 vector value -def v8bf16 : ValueType<128, 95>; // 8 x bf16 vector value -def v16bf16 : ValueType<256, 96>; // 16 x bf16 vector value -def v32bf16 : ValueType<512, 97>; // 32 x bf16 vector value -def v64bf16 : ValueType<1024, 98>; // 64 x bf16 vector value -def v128bf16 : ValueType<2048, 99>; // 128 x bf16 vector value - -def v1f32 : ValueType<32, 100>; // 1 x f32 vector value -def v2f32 : ValueType<64, 101>; // 2 x f32 vector value -def v3f32 : ValueType<96, 102>; // 3 x f32 vector value -def v4f32 : ValueType<128, 103>; // 4 x f32 vector value -def v5f32 : ValueType<160, 104>; // 5 x f32 vector value -def v6f32 : ValueType<192, 105>; // 6 x f32 vector value -def v7f32 : ValueType<224, 106>; // 7 x f32 vector value -def v8f32 : ValueType<256, 107>; // 8 x f32 vector value -def v16f32 : ValueType<512, 108>; // 16 x f32 vector value -def v32f32 : ValueType<1024, 109>; // 32 x f32 vector value -def v64f32 : ValueType<2048, 110>; // 64 x f32 vector value -def v128f32 : ValueType<4096, 111>; // 128 x f32 vector value -def v256f32 : ValueType<8192, 112>; // 256 x f32 vector value -def v512f32 : ValueType<16384, 113>; // 512 x f32 vector value -def v1024f32 : ValueType<32768, 114>; // 1024 x f32 vector value -def v2048f32 : ValueType<65536, 115>; // 2048 x f32 vector value - -def v1f64 : ValueType<64, 116>; // 1 x f64 vector value -def v2f64 : ValueType<128, 117>; // 2 x f64 vector value -def v3f64 : ValueType<192, 118>; // 3 x f64 vector value -def v4f64 : ValueType<256, 119>; // 4 x f64 vector value -def v8f64 : ValueType<512, 120>; // 8 x f64 vector value -def v16f64 : ValueType<1024, 121>; // 16 x f64 vector value -def v32f64 : ValueType<2048, 122>; // 32 x f64 vector value -def v64f64 : ValueType<4096, 123>; // 64 x f64 vector value -def v128f64 : ValueType<8192, 124>; // 128 x f64 vector value -def v256f64 : ValueType<16384, 125>; // 256 x f64 vector value - -def nxv1i1 : ValueType<1, 126>; // n x 1 x i1 vector value -def nxv2i1 : ValueType<2, 127>; // n x 2 x i1 vector value -def nxv4i1 : ValueType<4, 128>; // n x 4 x i1 vector value -def nxv8i1 : ValueType<8, 129>; // n x 8 x i1 vector value -def nxv16i1 : ValueType<16, 130>; // n x 16 x i1 vector value -def nxv32i1 : ValueType<32, 131>; // n x 32 x i1 vector value -def nxv64i1 : ValueType<64, 132>; // n x 64 x i1 vector value - -def nxv1i8 : ValueType<8, 133>; // n x 1 x i8 vector value -def nxv2i8 : ValueType<16, 134>; // n x 2 x i8 vector value -def nxv4i8 : ValueType<32, 135>; // n x 4 x i8 vector value -def nxv8i8 : ValueType<64, 136>; // n x 8 x i8 vector value -def nxv16i8 : ValueType<128, 137>; // n x 16 x i8 vector value -def nxv32i8 : ValueType<256, 138>; // n x 32 x i8 vector value -def nxv64i8 : ValueType<512, 139>; // n x 64 x i8 vector value - -def nxv1i16 : ValueType<16, 140>; // n x 1 x i16 vector value -def nxv2i16 : ValueType<32, 141>; // n x 2 x i16 vector value -def nxv4i16 : ValueType<64, 142>; // n x 4 x i16 vector value -def nxv8i16 : ValueType<128, 143>; // n x 8 x i16 vector value -def nxv16i16 : ValueType<256, 144>; // n x 16 x i16 vector value -def nxv32i16 : ValueType<512, 145>; // n x 32 x i16 vector value - -def nxv1i32 : ValueType<32, 146>; // n x 1 x i32 vector value -def nxv2i32 : ValueType<64, 147>; // n x 2 x i32 vector value -def nxv4i32 : ValueType<128, 148>; // n x 4 x i32 vector value -def nxv8i32 : ValueType<256, 149>; // n x 8 x i32 vector value -def nxv16i32 : ValueType<512, 150>; // n x 16 x i32 vector value -def nxv32i32 : ValueType<1024, 151>; // n x 32 x i32 vector value - -def nxv1i64 : ValueType<64, 152>; // n x 1 x i64 vector value -def nxv2i64 : ValueType<128, 153>; // n x 2 x i64 vector value -def nxv4i64 : ValueType<256, 154>; // n x 4 x i64 vector value -def nxv8i64 : ValueType<512, 155>; // n x 8 x i64 vector value -def nxv16i64 : ValueType<1024, 156>; // n x 16 x i64 vector value -def nxv32i64 : ValueType<2048, 157>; // n x 32 x i64 vector value - -def nxv1f16 : ValueType<16, 158>; // n x 1 x f16 vector value -def nxv2f16 : ValueType<32, 159>; // n x 2 x f16 vector value -def nxv4f16 : ValueType<64, 160>; // n x 4 x f16 vector value -def nxv8f16 : ValueType<128, 161>; // n x 8 x f16 vector value -def nxv16f16 : ValueType<256, 162>; // n x 16 x f16 vector value -def nxv32f16 : ValueType<512, 163>; // n x 32 x f16 vector value - -def nxv1bf16 : ValueType<16, 164>; // n x 1 x bf16 vector value -def nxv2bf16 : ValueType<32, 165>; // n x 2 x bf16 vector value -def nxv4bf16 : ValueType<64, 166>; // n x 4 x bf16 vector value -def nxv8bf16 : ValueType<128, 167>; // n x 8 x bf16 vector value -def nxv16bf16 : ValueType<256, 168>; // n x 16 x bf16 vector value -def nxv32bf16 : ValueType<512, 169>; // n x 32 x bf16 vector value - -def nxv1f32 : ValueType<32, 170>; // n x 1 x f32 vector value -def nxv2f32 : ValueType<64, 171>; // n x 2 x f32 vector value -def nxv4f32 : ValueType<128, 172>; // n x 4 x f32 vector value -def nxv8f32 : ValueType<256, 173>; // n x 8 x f32 vector value -def nxv16f32 : ValueType<512, 174>; // n x 16 x f32 vector value - -def nxv1f64 : ValueType<64, 175>; // n x 1 x f64 vector value -def nxv2f64 : ValueType<128, 176>; // n x 2 x f64 vector value -def nxv4f64 : ValueType<256, 177>; // n x 4 x f64 vector value -def nxv8f64 : ValueType<512, 178>; // n x 8 x f64 vector value - -def x86mmx : ValueType<64, 179>; // X86 MMX value -def FlagVT : ValueType<0, 180>; // Pre-RA sched glue -def isVoid : ValueType<0, 181>; // Produces no value -def untyped : ValueType<8, 182>; // Produces an untyped value -def funcref : ValueType<0, 183>; // WebAssembly's funcref type -def externref : ValueType<0, 184>; // WebAssembly's externref type -def x86amx : ValueType<8192, 185>; // X86 AMX value -def i64x8 : ValueType<512, 186>; // 8 Consecutive GPRs (AArch64) +def v2048i1 : ValueType<2048, 28>; // 2048 x i1 vector value + +def v128i2 : ValueType<256, 29>; // 128 x i2 vector value +def v256i2 : ValueType<512, 30>; // 256 x i2 vector value + +def v64i4 : ValueType<256, 31>; // 64 x i4 vector value +def v128i4 : ValueType<512, 32>; // 128 x i4 vector value + +def v1i8 : ValueType<8, 33>; // 1 x i8 vector value +def v2i8 : ValueType<16, 34>; // 2 x i8 vector value +def v4i8 : ValueType<32, 35>; // 4 x i8 vector value +def v8i8 : ValueType<64, 36>; // 8 x i8 vector value +def v16i8 : ValueType<128, 37>; // 16 x i8 vector value +def v32i8 : ValueType<256, 38>; // 32 x i8 vector value +def v64i8 : ValueType<512, 39>; // 64 x i8 vector value +def v128i8 : ValueType<1024, 40>; // 128 x i8 vector value +def v256i8 : ValueType<2048, 41>; // 256 x i8 vector value +def v512i8 : ValueType<4096, 42>; // 512 x i8 vector value +def v1024i8 : ValueType<8192, 43>; // 1024 x i8 vector value + +def v1i16 : ValueType<16, 44>; // 1 x i16 vector value +def v2i16 : ValueType<32, 45>; // 2 x i16 vector value +def v3i16 : ValueType<48, 46>; // 3 x i16 vector value +def v4i16 : ValueType<64, 47>; // 4 x i16 vector value +def v8i16 : ValueType<128, 48>; // 8 x i16 vector value +def v16i16 : ValueType<256, 49>; // 16 x i16 vector value +def v32i16 : ValueType<512, 50>; // 32 x i16 vector value +def v64i16 : ValueType<1024, 51>; // 64 x i16 vector value +def v128i16 : ValueType<2048, 52>; // 128 x i16 vector value +def v256i16 : ValueType<4096, 53>; // 256 x i16 vector value +def v512i16 : ValueType<8192, 54>; // 512 x i16 vector value + +def v1i32 : ValueType<32, 55>; // 1 x i32 vector value +def v2i32 : ValueType<64, 56>; // 2 x i32 vector value +def v3i32 : ValueType<96, 57>; // 3 x i32 vector value +def v4i32 : ValueType<128, 58>; // 4 x i32 vector value +def v5i32 : ValueType<160, 59>; // 5 x i32 vector value +def v6i32 : ValueType<192, 60>; // 6 x f32 vector value +def v7i32 : ValueType<224, 61>; // 7 x f32 vector value +def v8i32 : ValueType<256, 62>; // 8 x i32 vector value +def v16i32 : ValueType<512, 63>; // 16 x i32 vector value +def v32i32 : ValueType<1024, 64>; // 32 x i32 vector value +def v64i32 : ValueType<2048, 65>; // 64 x i32 vector value +def v128i32 : ValueType<4096, 66>; // 128 x i32 vector value +def v256i32 : ValueType<8192, 67>; // 256 x i32 vector value +def v512i32 : ValueType<16384, 68>; // 512 x i32 vector value +def v1024i32 : ValueType<32768, 69>; // 1024 x i32 vector value +def v2048i32 : ValueType<65536, 70>; // 2048 x i32 vector value + +def v1i64 : ValueType<64, 71>; // 1 x i64 vector value +def v2i64 : ValueType<128, 72>; // 2 x i64 vector value +def v3i64 : ValueType<192, 73>; // 3 x i64 vector value +def v4i64 : ValueType<256, 74>; // 4 x i64 vector value +def v8i64 : ValueType<512, 75>; // 8 x i64 vector value +def v16i64 : ValueType<1024, 76>; // 16 x i64 vector value +def v32i64 : ValueType<2048, 77>; // 32 x i64 vector value +def v64i64 : ValueType<4096, 78>; // 64 x i64 vector value +def v128i64 : ValueType<8192, 79>; // 128 x i64 vector value +def v256i64 : ValueType<16384, 80>; // 256 x i64 vector value + +def v1i128 : ValueType<128, 81>; // 1 x i128 vector value + +def v1f16 : ValueType<16, 82>; // 1 x f16 vector value +def v2f16 : ValueType<32, 83>; // 2 x f16 vector value +def v3f16 : ValueType<48, 84>; // 3 x f16 vector value +def v4f16 : ValueType<64, 85>; // 4 x f16 vector value +def v8f16 : ValueType<128, 86>; // 8 x f16 vector value +def v16f16 : ValueType<256, 87>; // 16 x f16 vector value +def v32f16 : ValueType<512, 88>; // 32 x f16 vector value +def v64f16 : ValueType<1024, 89>; // 64 x f16 vector value +def v128f16 : ValueType<2048, 90>; // 128 x f16 vector value +def v256f16 : ValueType<4096, 91>; // 256 x f16 vector value +def v512f16 : ValueType<8192, 92>; // 512 x f16 vector value + +def v2bf16 : ValueType<32, 93>; // 2 x bf16 vector value +def v3bf16 : ValueType<48, 94>; // 3 x bf16 vector value +def v4bf16 : ValueType<64, 95>; // 4 x bf16 vector value +def v8bf16 : ValueType<128, 96>; // 8 x bf16 vector value +def v16bf16 : ValueType<256, 97>; // 16 x bf16 vector value +def v32bf16 : ValueType<512, 98>; // 32 x bf16 vector value +def v64bf16 : ValueType<1024, 99>; // 64 x bf16 vector value +def v128bf16 : ValueType<2048, 100>; // 128 x bf16 vector value + +def v1f32 : ValueType<32, 101>; // 1 x f32 vector value +def v2f32 : ValueType<64, 102>; // 2 x f32 vector value +def v3f32 : ValueType<96, 103>; // 3 x f32 vector value +def v4f32 : ValueType<128, 104>; // 4 x f32 vector value +def v5f32 : ValueType<160, 105>; // 5 x f32 vector value +def v6f32 : ValueType<192, 106>; // 6 x f32 vector value +def v7f32 : ValueType<224, 107>; // 7 x f32 vector value +def v8f32 : ValueType<256, 108>; // 8 x f32 vector value +def v16f32 : ValueType<512, 109>; // 16 x f32 vector value +def v32f32 : ValueType<1024, 110>; // 32 x f32 vector value +def v64f32 : ValueType<2048, 111>; // 64 x f32 vector value +def v128f32 : ValueType<4096, 112>; // 128 x f32 vector value +def v256f32 : ValueType<8192, 113>; // 256 x f32 vector value +def v512f32 : ValueType<16384, 114>; // 512 x f32 vector value +def v1024f32 : ValueType<32768, 115>; // 1024 x f32 vector value +def v2048f32 : ValueType<65536, 116>; // 2048 x f32 vector value + +def v1f64 : ValueType<64, 117>; // 1 x f64 vector value +def v2f64 : ValueType<128, 118>; // 2 x f64 vector value +def v3f64 : ValueType<192, 119>; // 3 x f64 vector value +def v4f64 : ValueType<256, 120>; // 4 x f64 vector value +def v8f64 : ValueType<512, 121>; // 8 x f64 vector value +def v16f64 : ValueType<1024, 122>; // 16 x f64 vector value +def v32f64 : ValueType<2048, 123>; // 32 x f64 vector value +def v64f64 : ValueType<4096, 124>; // 64 x f64 vector value +def v128f64 : ValueType<8192, 125>; // 128 x f64 vector value +def v256f64 : ValueType<16384, 126>; // 256 x f64 vector value + +def nxv1i1 : ValueType<1, 127>; // n x 1 x i1 vector value +def nxv2i1 : ValueType<2, 128>; // n x 2 x i1 vector value +def nxv4i1 : ValueType<4, 129>; // n x 4 x i1 vector value +def nxv8i1 : ValueType<8, 130>; // n x 8 x i1 vector value +def nxv16i1 : ValueType<16, 131>; // n x 16 x i1 vector value +def nxv32i1 : ValueType<32, 132>; // n x 32 x i1 vector value +def nxv64i1 : ValueType<64, 133>; // n x 64 x i1 vector value + +def nxv1i8 : ValueType<8, 134>; // n x 1 x i8 vector value +def nxv2i8 : ValueType<16, 135>; // n x 2 x i8 vector value +def nxv4i8 : ValueType<32, 136>; // n x 4 x i8 vector value +def nxv8i8 : ValueType<64, 137>; // n x 8 x i8 vector value +def nxv16i8 : ValueType<128, 138>; // n x 16 x i8 vector value +def nxv32i8 : ValueType<256, 139>; // n x 32 x i8 vector value +def nxv64i8 : ValueType<512, 140>; // n x 64 x i8 vector value + +def nxv1i16 : ValueType<16, 141>; // n x 1 x i16 vector value +def nxv2i16 : ValueType<32, 142>; // n x 2 x i16 vector value +def nxv4i16 : ValueType<64, 143>; // n x 4 x i16 vector value +def nxv8i16 : ValueType<128, 144>; // n x 8 x i16 vector value +def nxv16i16 : ValueType<256, 145>; // n x 16 x i16 vector value +def nxv32i16 : ValueType<512, 146>; // n x 32 x i16 vector value + +def nxv1i32 : ValueType<32, 147>; // n x 1 x i32 vector value +def nxv2i32 : ValueType<64, 148>; // n x 2 x i32 vector value +def nxv4i32 : ValueType<128, 149>; // n x 4 x i32 vector value +def nxv8i32 : ValueType<256, 150>; // n x 8 x i32 vector value +def nxv16i32 : ValueType<512, 151>; // n x 16 x i32 vector value +def nxv32i32 : ValueType<1024, 152>; // n x 32 x i32 vector value + +def nxv1i64 : ValueType<64, 153>; // n x 1 x i64 vector value +def nxv2i64 : ValueType<128, 154>; // n x 2 x i64 vector value +def nxv4i64 : ValueType<256, 155>; // n x 4 x i64 vector value +def nxv8i64 : ValueType<512, 156>; // n x 8 x i64 vector value +def nxv16i64 : ValueType<1024, 157>; // n x 16 x i64 vector value +def nxv32i64 : ValueType<2048, 158>; // n x 32 x i64 vector value + +def nxv1f16 : ValueType<16, 159>; // n x 1 x f16 vector value +def nxv2f16 : ValueType<32, 160>; // n x 2 x f16 vector value +def nxv4f16 : ValueType<64, 161>; // n x 4 x f16 vector value +def nxv8f16 : ValueType<128, 162>; // n x 8 x f16 vector value +def nxv16f16 : ValueType<256, 163>; // n x 16 x f16 vector value +def nxv32f16 : ValueType<512, 164>; // n x 32 x f16 vector value + +def nxv1bf16 : ValueType<16, 165>; // n x 1 x bf16 vector value +def nxv2bf16 : ValueType<32, 166>; // n x 2 x bf16 vector value +def nxv4bf16 : ValueType<64, 167>; // n x 4 x bf16 vector value +def nxv8bf16 : ValueType<128, 168>; // n x 8 x bf16 vector value +def nxv16bf16 : ValueType<256, 169>; // n x 16 x bf16 vector value +def nxv32bf16 : ValueType<512, 170>; // n x 32 x bf16 vector value + +def nxv1f32 : ValueType<32, 171>; // n x 1 x f32 vector value +def nxv2f32 : ValueType<64, 172>; // n x 2 x f32 vector value +def nxv4f32 : ValueType<128, 173>; // n x 4 x f32 vector value +def nxv8f32 : ValueType<256, 174>; // n x 8 x f32 vector value +def nxv16f32 : ValueType<512, 175>; // n x 16 x f32 vector value + +def nxv1f64 : ValueType<64, 176>; // n x 1 x f64 vector value +def nxv2f64 : ValueType<128, 177>; // n x 2 x f64 vector value +def nxv4f64 : ValueType<256, 178>; // n x 4 x f64 vector value +def nxv8f64 : ValueType<512, 179>; // n x 8 x f64 vector value + +def x86mmx : ValueType<64, 180>; // X86 MMX value +def FlagVT : ValueType<0, 181>; // Pre-RA sched glue +def isVoid : ValueType<0, 182>; // Produces no value +def untyped : ValueType<8, 183>; // Produces an untyped value +def funcref : ValueType<0, 184>; // WebAssembly's funcref type +def externref : ValueType<0, 185>; // WebAssembly's externref type +def x86amx : ValueType<8192, 186>; // X86 AMX value +def i64x8 : ValueType<512, 187>; // 8 Consecutive GPRs (AArch64) def token : ValueType<0, 248>; // TokenTy def MetadataVT : ValueType<0, 249>; // Metadata diff --git a/llvm/include/llvm/CodeGen/WasmEHFuncInfo.h b/llvm/include/llvm/CodeGen/WasmEHFuncInfo.h index 8b55a45b61e8b..60ee6493b1a1f 100644 --- a/llvm/include/llvm/CodeGen/WasmEHFuncInfo.h +++ b/llvm/include/llvm/CodeGen/WasmEHFuncInfo.h @@ -50,8 +50,6 @@ struct WasmEHFuncInfo { } void setUnwindDest(const BasicBlock *BB, const BasicBlock *Dest) { SrcToUnwindDest[BB] = Dest; - if (!UnwindDestToSrcs.count(Dest)) - UnwindDestToSrcs[Dest] = SmallPtrSet(); UnwindDestToSrcs[Dest].insert(BB); } bool hasUnwindDest(const BasicBlock *BB) const { @@ -76,8 +74,6 @@ struct WasmEHFuncInfo { } void setUnwindDest(MachineBasicBlock *MBB, MachineBasicBlock *Dest) { SrcToUnwindDest[MBB] = Dest; - if (!UnwindDestToSrcs.count(Dest)) - UnwindDestToSrcs[Dest] = SmallPtrSet(); UnwindDestToSrcs[Dest].insert(MBB); } bool hasUnwindDest(MachineBasicBlock *MBB) const { diff --git a/llvm/include/llvm/Debuginfod/HTTPClient.h b/llvm/include/llvm/Debuginfod/HTTPClient.h index 6c94961032e75..1c9f719051eca 100644 --- a/llvm/include/llvm/Debuginfod/HTTPClient.h +++ b/llvm/include/llvm/Debuginfod/HTTPClient.h @@ -27,6 +27,7 @@ enum class HTTPMethod { GET }; /// A stateless description of an outbound HTTP request. struct HTTPRequest { SmallString<128> Url; + SmallVector Headers; HTTPMethod Method = HTTPMethod::GET; bool FollowRedirects = true; HTTPRequest(StringRef Url); diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 0cf9d97317184..6de8a3ea93f1e 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -526,6 +526,7 @@ def OMP_Critical : Directive<"critical"> { } def OMP_TaskYield : Directive<"taskyield"> {} def OMP_Barrier : Directive<"barrier"> {} +def OMP_Error : Directive<"error"> {} def OMP_TaskWait : Directive<"taskwait"> { let allowedClauses = [ VersionedClause diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 8ba71d55584a2..c09306dcde086 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -24,6 +24,7 @@ namespace llvm { class CanonicalLoopInfo; +struct TargetRegionEntryInfo; class OffloadEntriesInfoManager; /// Move the instruction after an InsertPoint to the beginning of another @@ -1093,6 +1094,37 @@ class OpenMPIRBuilder { bool EmitDebug = false, bool ForEndCall = false); + /// Creates offloading entry for the provided entry ID \a ID, + /// address \a Addr, size \a Size, and flags \a Flags. + void createOffloadEntry(bool IsTargetCodegen, Constant *ID, Constant *Addr, + uint64_t Size, int32_t Flags, + GlobalValue::LinkageTypes); + + /// The kind of errors that can occur when emitting the offload entries and + /// metadata. + enum EmitMetadataErrorKind { + EMIT_MD_TARGET_REGION_ERROR, + EMIT_MD_DECLARE_TARGET_ERROR, + EMIT_MD_GLOBAL_VAR_LINK_ERROR + }; + + /// Callback function type + using EmitMetadataErrorReportFunctionTy = + std::function; + + // Emit the offloading entries and metadata so that the device codegen side + // can easily figure out what to emit. The produced metadata looks like + // this: + // + // !omp_offload.info = !{!1, ...} + // + // We only generate metadata for function that contain target regions. + void createOffloadEntriesAndInfoMetadata( + OffloadEntriesInfoManager &OffloadEntriesInfoManager, + bool IsTargetCodegen, bool IsEmbedded, + bool HasRequiresUnifiedSharedMemory, + EmitMetadataErrorReportFunctionTy &ErrorReportFunction); + public: /// Generator for __kmpc_copyprivate /// @@ -1681,6 +1713,19 @@ class OpenMPIRBuilder { BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, const Twine &Name = {}); + /// OMP Offload Info Metadata name string + const std::string ompOffloadInfoName = "omp_offload.info"; + + /// Loads all the offload entries information from the host IR + /// metadata. This function is only meant to be used with device code + /// generation. + /// + /// \param M Module to load Metadata info from. Module passed maybe + /// loaded from bitcode file, i.e, different from OpenMPIRBuilder::M module. + /// \param OffloadEntriesInfoManager Initialize Offload Entry information. + void + loadOffloadInfoMetadata(Module &M, + OffloadEntriesInfoManager &OffloadEntriesInfoManager); }; /// Data structure to contain the information needed to uniquely identify @@ -1690,23 +1735,24 @@ struct TargetRegionEntryInfo { unsigned DeviceID; unsigned FileID; unsigned Line; + unsigned Count; - TargetRegionEntryInfo() : ParentName(""), DeviceID(0), FileID(0), Line(0) {} + TargetRegionEntryInfo() + : ParentName(""), DeviceID(0), FileID(0), Line(0), Count(0) {} TargetRegionEntryInfo(StringRef ParentName, unsigned DeviceID, - unsigned FileID, unsigned Line) - : ParentName(ParentName), DeviceID(DeviceID), FileID(FileID), Line(Line) { - } + unsigned FileID, unsigned Line, unsigned Count = 0) + : ParentName(ParentName), DeviceID(DeviceID), FileID(FileID), Line(Line), + Count(Count) {} static void getTargetRegionEntryFnName(SmallVectorImpl &Name, StringRef ParentName, unsigned DeviceID, unsigned FileID, - unsigned Line); - - void getTargetRegionEntryFnName(SmallVectorImpl &Name); + unsigned Line, unsigned Count); bool operator<(const TargetRegionEntryInfo RHS) const { - return std::make_tuple(ParentName, DeviceID, FileID, Line) < - std::make_tuple(RHS.ParentName, RHS.DeviceID, RHS.FileID, RHS.Line); + return std::make_tuple(ParentName, DeviceID, FileID, Line, Count) < + std::make_tuple(RHS.ParentName, RHS.DeviceID, RHS.FileID, RHS.Line, + RHS.Count); } }; @@ -1814,14 +1860,19 @@ class OffloadEntriesInfoManager { void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, unsigned Order); /// Register target region entry. - void registerTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, + void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, Constant *Addr, Constant *ID, OMPTargetRegionEntryKind Flags, bool IsDevice); /// Return true if a target region entry with the provided information /// exists. - bool hasTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, + bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, bool IgnoreAddressId = false) const; + + // Return the Name based on \a EntryInfo using the next available Count. + void getTargetRegionEntryFnName(SmallVectorImpl &Name, + const TargetRegionEntryInfo &EntryInfo); + /// brief Applies action \a Action on all registered entries. typedef function_ref @@ -1894,6 +1945,23 @@ class OffloadEntriesInfoManager { const OffloadDeviceGlobalVarEntryInfoActTy &Action); private: + /// Return the count of entries at a particular source location. + unsigned + getTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo) const; + + /// Update the count of entries at a particular source location. + void + incrementTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo); + + static TargetRegionEntryInfo + getTargetRegionEntryCountKey(const TargetRegionEntryInfo &EntryInfo) { + return TargetRegionEntryInfo(EntryInfo.ParentName, EntryInfo.DeviceID, + EntryInfo.FileID, EntryInfo.Line, 0); + } + + // Count of entries at a location. + std::map OffloadEntriesTargetRegionCount; + // Storage for target region entries kind. typedef std::map OffloadEntriesTargetRegionTy; diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index 71abc8822730a..03964af5893f0 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -487,6 +487,7 @@ __OMP_RTL(__last, false, Void, ) #define EnumAttr(Kind) Attribute::get(Ctx, Attribute::AttrKind::Kind) #define EnumAttrInt(Kind, N) Attribute::get(Ctx, Attribute::AttrKind::Kind, N) #define AllocSizeAttr(N, M) Attribute::getWithAllocSizeArgs(Ctx, N, M) +#define MemoryAttr(ME) Attribute::getWithMemoryEffects(Ctx, ME) #define AttributeSet(...) \ AttributeSet::get(Ctx, ArrayRef({__VA_ARGS__})) @@ -496,27 +497,29 @@ __OMP_RTL(__last, false, Void, ) #define __OMP_ATTRS_SET(VarName, AttrSet) OMP_ATTRS_SET(VarName, AttrSet) -__OMP_ATTRS_SET(GetterAttrs, - OptimisticAttributes - ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(ReadOnly), - EnumAttr(NoSync), EnumAttr(NoFree), - EnumAttr(InaccessibleMemOnly), - EnumAttr(WillReturn)) - : AttributeSet(EnumAttr(NoUnwind))) -__OMP_ATTRS_SET(GetterArgWriteAttrs, - OptimisticAttributes - ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync), - EnumAttr(NoFree), - EnumAttr(InaccessibleMemOrArgMemOnly), - EnumAttr(WillReturn)) - : AttributeSet(EnumAttr(NoUnwind))) -__OMP_ATTRS_SET(SetterAttrs, - OptimisticAttributes - ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(WriteOnly), - EnumAttr(NoSync), EnumAttr(NoFree), - EnumAttr(InaccessibleMemOnly), - EnumAttr(WillReturn)) - : AttributeSet(EnumAttr(NoUnwind))) +__OMP_ATTRS_SET( + GetterAttrs, + OptimisticAttributes + ? AttributeSet( + EnumAttr(NoUnwind), EnumAttr(NoSync), EnumAttr(NoFree), + EnumAttr(WillReturn), + MemoryAttr(MemoryEffects::inaccessibleMemOnly(ModRefInfo::Ref))) + : AttributeSet(EnumAttr(NoUnwind))) +__OMP_ATTRS_SET( + GetterArgWriteAttrs, + OptimisticAttributes + ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync), EnumAttr(NoFree), + EnumAttr(WillReturn), + MemoryAttr(MemoryEffects::inaccessibleOrArgMemOnly())) + : AttributeSet(EnumAttr(NoUnwind))) +__OMP_ATTRS_SET( + SetterAttrs, + OptimisticAttributes + ? AttributeSet( + EnumAttr(NoUnwind), EnumAttr(NoSync), EnumAttr(NoFree), + EnumAttr(WillReturn), + MemoryAttr(MemoryEffects::inaccessibleMemOnly(ModRefInfo::Mod))) + : AttributeSet(EnumAttr(NoUnwind))) __OMP_ATTRS_SET(DefaultAttrs, OptimisticAttributes @@ -529,12 +532,13 @@ __OMP_ATTRS_SET(BarrierAttrs, ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(Convergent)) : AttributeSet(EnumAttr(NoUnwind), EnumAttr(Convergent))) -__OMP_ATTRS_SET(InaccessibleArgOnlyAttrs, - OptimisticAttributes - ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync), - EnumAttr(InaccessibleMemOrArgMemOnly), - EnumAttr(WillReturn), EnumAttr(NoFree)) - : AttributeSet(EnumAttr(NoUnwind))) +__OMP_ATTRS_SET( + InaccessibleArgOnlyAttrs, + OptimisticAttributes + ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync), EnumAttr(NoFree), + EnumAttr(WillReturn), + MemoryAttr(MemoryEffects::inaccessibleOrArgMemOnly())) + : AttributeSet(EnumAttr(NoUnwind))) __OMP_ATTRS_SET(AlwaysInlineAttrs, OptimisticAttributes @@ -542,12 +546,13 @@ __OMP_ATTRS_SET(AlwaysInlineAttrs, : AttributeSet(EnumAttr(AlwaysInline))) #if 0 -__OMP_ATTRS_SET(InaccessibleOnlyAttrs, - OptimisticAttributes - ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync), - EnumAttr(InaccessibleMemOnly), - EnumAttr(WillReturn), EnumAttr(NoFree)) - : AttributeSet(EnumAttr(NoUnwind))) +__OMP_ATTRS_SET( + InaccessibleOnlyAttrs, + OptimisticAttributes + ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync), EnumAttr(NoFree), + EnumAttr(WillReturn), + MemoryAttr(MemoryEffects::inaccessibleMemOnly())) + : AttributeSet(EnumAttr(NoUnwind))) #endif __OMP_ATTRS_SET(AllocAttrs, diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td index 595c9197de1dd..75fe534ac61ea 100644 --- a/llvm/include/llvm/IR/Attributes.td +++ b/llvm/include/llvm/IR/Attributes.td @@ -64,9 +64,6 @@ def AllocSize : IntAttr<"allocsize", [FnAttr]>; /// inline=always. def AlwaysInline : EnumAttr<"alwaysinline", [FnAttr]>; -/// Function can access memory only using pointers based on its arguments. -def ArgMemOnly : EnumAttr<"argmemonly", [FnAttr]>; - /// Callee is recognized as a builtin, despite nobuiltin attribute on its /// declaration. def Builtin : EnumAttr<"builtin", [FnAttr]>; @@ -106,14 +103,6 @@ def ElementType : TypeAttr<"elementtype", [ParamAttr]>; /// symbol. def FnRetThunkExtern : EnumAttr<"fn_ret_thunk_extern", [FnAttr]>; -/// Function may only access memory that is inaccessible from IR. -def InaccessibleMemOnly : EnumAttr<"inaccessiblememonly", [FnAttr]>; - -/// Function may only access memory that is either inaccessible from the IR, -/// or pointed to by its pointer arguments. -def InaccessibleMemOrArgMemOnly : EnumAttr<"inaccessiblemem_or_argmemonly", - [FnAttr]>; - /// Pass structure in an alloca. def InAlloca : TypeAttr<"inalloca", [ParamAttr]>; @@ -218,10 +207,10 @@ def OptimizeNone : EnumAttr<"optnone", [FnAttr]>; def Preallocated : TypeAttr<"preallocated", [FnAttr, ParamAttr]>; /// Function does not access memory. -def ReadNone : EnumAttr<"readnone", [FnAttr, ParamAttr]>; +def ReadNone : EnumAttr<"readnone", [ParamAttr]>; /// Function only reads from memory. -def ReadOnly : EnumAttr<"readonly", [FnAttr, ParamAttr]>; +def ReadOnly : EnumAttr<"readonly", [ParamAttr]>; /// Return value is always equal to this argument. def Returned : EnumAttr<"returned", [ParamAttr]>; @@ -306,7 +295,7 @@ def VScaleRange : IntAttr<"vscale_range", [FnAttr]>; def WillReturn : EnumAttr<"willreturn", [FnAttr]>; /// Function only writes to memory. -def WriteOnly : EnumAttr<"writeonly", [FnAttr, ParamAttr]>; +def WriteOnly : EnumAttr<"writeonly", [ParamAttr]>; /// Zero extended before/after call. def ZExt : EnumAttr<"zeroext", [ParamAttr, RetAttr]>; diff --git a/llvm/include/llvm/IR/DIBuilder.h b/llvm/include/llvm/IR/DIBuilder.h index dfb65ce341296..61fa4d8f3b9fd 100644 --- a/llvm/include/llvm/IR/DIBuilder.h +++ b/llvm/include/llvm/IR/DIBuilder.h @@ -48,7 +48,7 @@ namespace llvm { Function *LabelFn; ///< llvm.dbg.label Function *AddrFn; ///< llvm.dbg.addr - SmallVector AllEnumTypes; + SmallVector AllEnumTypes; /// Track the RetainTypes, since they can be updated later on. SmallVector AllRetainTypes; SmallVector AllSubprograms; diff --git a/llvm/include/llvm/IR/DebugInfo.h b/llvm/include/llvm/IR/DebugInfo.h index 730c69d0c622e..b35d447a7c891 100644 --- a/llvm/include/llvm/IR/DebugInfo.h +++ b/llvm/include/llvm/IR/DebugInfo.h @@ -159,6 +159,8 @@ class DebugInfoFinder { SmallPtrSet NodesSeen; }; +/// Return true if assignment tracking is enabled. +bool getEnableAssignmentTracking(); } // end namespace llvm #endif // LLVM_IR_DEBUGINFO_H diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h index 5b20bf3ade99a..f57691f6f9fc6 100644 --- a/llvm/include/llvm/IR/DebugInfoMetadata.h +++ b/llvm/include/llvm/IR/DebugInfoMetadata.h @@ -215,6 +215,7 @@ class DINode : public MDNode { case DIImportedEntityKind: case DIModuleKind: case DIGenericSubrangeKind: + case DIAssignIDKind: return true; } } @@ -295,6 +296,41 @@ class GenericDINode : public DINode { } }; +/// Assignment ID. +/// Used to link stores (as an attachment) and dbg.assigns (as an operand). +/// DIAssignID metadata is never uniqued as we compare instances using +/// referential equality (the instance/address is the ID). +class DIAssignID : public MDNode { + friend class LLVMContextImpl; + friend class MDNode; + + DIAssignID(LLVMContext &C, StorageType Storage) + : MDNode(C, DIAssignIDKind, Storage, None) {} + + ~DIAssignID() { dropAllReferences(); } + + static DIAssignID *getImpl(LLVMContext &Context, StorageType Storage, + bool ShouldCreate = true); + + TempDIAssignID cloneImpl() const { return getTemporary(getContext()); } + +public: + // This node has no operands to replace. + void replaceOperandWith(unsigned I, Metadata *New) = delete; + + static DIAssignID *getDistinct(LLVMContext &Context) { + return getImpl(Context, Distinct); + } + static TempDIAssignID getTemporary(LLVMContext &Context) { + return TempDIAssignID(getImpl(Context, Temporary)); + } + // NOTE: Do not define get(LLVMContext&) - see class comment. + + static bool classof(const Metadata *MD) { + return MD->getMetadataID() == DIAssignIDKind; + } +}; + /// Array subrange. /// /// TODO: Merge into node for DW_TAG_array_type, which should have a custom diff --git a/llvm/include/llvm/IR/FixedMetadataKinds.def b/llvm/include/llvm/IR/FixedMetadataKinds.def index 3d986325c5d33..8723bf2a0680c 100644 --- a/llvm/include/llvm/IR/FixedMetadataKinds.def +++ b/llvm/include/llvm/IR/FixedMetadataKinds.def @@ -49,3 +49,4 @@ LLVM_FIXED_MD_KIND(MD_memprof, "memprof", 34) LLVM_FIXED_MD_KIND(MD_callsite, "callsite", 35) LLVM_FIXED_MD_KIND(MD_kcfi_type, "kcfi_type", 36) LLVM_FIXED_MD_KIND(MD_pcsections, "pcsections", 37) +LLVM_FIXED_MD_KIND(MD_DIAssignID, "DIAssignID", 38) diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h index 7945c64c86103..22e98e91d4580 100644 --- a/llvm/include/llvm/IR/Function.h +++ b/llvm/include/llvm/IR/Function.h @@ -491,54 +491,35 @@ class LLVM_EXTERNAL_VISIBILITY Function : public GlobalObject, void setPresplitCoroutine() { addFnAttr(Attribute::PresplitCoroutine); } void setSplittedCoroutine() { removeFnAttr(Attribute::PresplitCoroutine); } + MemoryEffects getMemoryEffects() const; + void setMemoryEffects(MemoryEffects ME); + /// Determine if the function does not access memory. - bool doesNotAccessMemory() const { - return hasFnAttribute(Attribute::ReadNone); - } - void setDoesNotAccessMemory() { - addFnAttr(Attribute::ReadNone); - } + bool doesNotAccessMemory() const; + void setDoesNotAccessMemory(); /// Determine if the function does not access or only reads memory. - bool onlyReadsMemory() const { - return doesNotAccessMemory() || hasFnAttribute(Attribute::ReadOnly); - } - void setOnlyReadsMemory() { - addFnAttr(Attribute::ReadOnly); - } + bool onlyReadsMemory() const; + void setOnlyReadsMemory(); /// Determine if the function does not access or only writes memory. - bool onlyWritesMemory() const { - return doesNotAccessMemory() || hasFnAttribute(Attribute::WriteOnly); - } - void setOnlyWritesMemory() { - addFnAttr(Attribute::WriteOnly); - } + bool onlyWritesMemory() const; + void setOnlyWritesMemory(); /// Determine if the call can access memmory only using pointers based /// on its arguments. - bool onlyAccessesArgMemory() const { - return hasFnAttribute(Attribute::ArgMemOnly); - } - void setOnlyAccessesArgMemory() { addFnAttr(Attribute::ArgMemOnly); } + bool onlyAccessesArgMemory() const; + void setOnlyAccessesArgMemory(); /// Determine if the function may only access memory that is /// inaccessible from the IR. - bool onlyAccessesInaccessibleMemory() const { - return hasFnAttribute(Attribute::InaccessibleMemOnly); - } - void setOnlyAccessesInaccessibleMemory() { - addFnAttr(Attribute::InaccessibleMemOnly); - } + bool onlyAccessesInaccessibleMemory() const; + void setOnlyAccessesInaccessibleMemory(); /// Determine if the function may only access memory that is /// either inaccessible from the IR or pointed to by its arguments. - bool onlyAccessesInaccessibleMemOrArgMem() const { - return hasFnAttribute(Attribute::InaccessibleMemOrArgMemOnly); - } - void setOnlyAccessesInaccessibleMemOrArgMem() { - addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); - } + bool onlyAccessesInaccessibleMemOrArgMem() const; + void setOnlyAccessesInaccessibleMemOrArgMem(); /// Determine if the function cannot return. bool doesNotReturn() const { diff --git a/llvm/include/llvm/IR/GlobalAlias.h b/llvm/include/llvm/IR/GlobalAlias.h index 01134448a8fa7..de405da5ca231 100644 --- a/llvm/include/llvm/IR/GlobalAlias.h +++ b/llvm/include/llvm/IR/GlobalAlias.h @@ -93,8 +93,8 @@ class GlobalAlias : public GlobalValue, public ilist_node { } static bool isValidLinkage(LinkageTypes L) { - return isExternalLinkage(L) || isLocalLinkage(L) || - isWeakLinkage(L) || isLinkOnceLinkage(L); + return isExternalLinkage(L) || isLocalLinkage(L) || isWeakLinkage(L) || + isLinkOnceLinkage(L) || isAvailableExternallyLinkage(L); } // Methods for support type inquiry through isa, cast, and dyn_cast: diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h index 60390b18632ea..da081d0c40711 100644 --- a/llvm/include/llvm/IR/InstrTypes.h +++ b/llvm/include/llvm/IR/InstrTypes.h @@ -1847,47 +1847,37 @@ class CallBase : public Instruction { /// Return true if the call should not be inlined. bool isNoInline() const { return hasFnAttr(Attribute::NoInline); } void setIsNoInline() { addFnAttr(Attribute::NoInline); } + + MemoryEffects getMemoryEffects() const; + void setMemoryEffects(MemoryEffects ME); + /// Determine if the call does not access memory. - bool doesNotAccessMemory() const { return hasFnAttr(Attribute::ReadNone); } - void setDoesNotAccessMemory() { addFnAttr(Attribute::ReadNone); } + bool doesNotAccessMemory() const; + void setDoesNotAccessMemory(); /// Determine if the call does not access or only reads memory. - bool onlyReadsMemory() const { - return hasImpliedFnAttr(Attribute::ReadOnly); - } - - void setOnlyReadsMemory() { addFnAttr(Attribute::ReadOnly); } + bool onlyReadsMemory() const; + void setOnlyReadsMemory(); /// Determine if the call does not access or only writes memory. - bool onlyWritesMemory() const { - return hasImpliedFnAttr(Attribute::WriteOnly); - } - void setOnlyWritesMemory() { addFnAttr(Attribute::WriteOnly); } + bool onlyWritesMemory() const; + void setOnlyWritesMemory(); /// Determine if the call can access memmory only using pointers based /// on its arguments. - bool onlyAccessesArgMemory() const { - return hasFnAttr(Attribute::ArgMemOnly); - } - void setOnlyAccessesArgMemory() { addFnAttr(Attribute::ArgMemOnly); } + bool onlyAccessesArgMemory() const; + void setOnlyAccessesArgMemory(); /// Determine if the function may only access memory that is /// inaccessible from the IR. - bool onlyAccessesInaccessibleMemory() const { - return hasFnAttr(Attribute::InaccessibleMemOnly); - } - void setOnlyAccessesInaccessibleMemory() { - addFnAttr(Attribute::InaccessibleMemOnly); - } + bool onlyAccessesInaccessibleMemory() const; + void setOnlyAccessesInaccessibleMemory(); /// Determine if the function may only access memory that is /// either inaccessible from the IR or pointed to by its arguments. - bool onlyAccessesInaccessibleMemOrArgMem() const { - return hasFnAttr(Attribute::InaccessibleMemOrArgMemOnly); - } - void setOnlyAccessesInaccessibleMemOrArgMem() { - addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); - } + bool onlyAccessesInaccessibleMemOrArgMem() const; + void setOnlyAccessesInaccessibleMemOrArgMem(); + /// Determine if the call cannot return. bool doesNotReturn() const { return hasFnAttr(Attribute::NoReturn); } void setDoesNotReturn() { addFnAttr(Attribute::NoReturn); } @@ -2107,43 +2097,6 @@ class CallBase : public Instruction { return false; } - /// Is the function attribute S disallowed by some operand bundle on - /// this operand bundle user? - bool isFnAttrDisallowedByOpBundle(StringRef S) const { - // Operand bundles only possibly disallow memory access attributes. All - // String attributes are fine. - return false; - } - - /// Is the function attribute A disallowed by some operand bundle on - /// this operand bundle user? - bool isFnAttrDisallowedByOpBundle(Attribute::AttrKind A) const { - switch (A) { - default: - return false; - - case Attribute::InaccessibleMemOrArgMemOnly: - return hasReadingOperandBundles(); - - case Attribute::InaccessibleMemOnly: - return hasReadingOperandBundles(); - - case Attribute::ArgMemOnly: - return hasReadingOperandBundles(); - - case Attribute::ReadNone: - return hasReadingOperandBundles(); - - case Attribute::ReadOnly: - return hasClobberingOperandBundles(); - - case Attribute::WriteOnly: - return hasReadingOperandBundles(); - } - - llvm_unreachable("switch has a default case!"); - } - /// Used to keep track of an operand bundle. See the main comment on /// OperandBundleUser above. struct BundleOpInfo { @@ -2303,35 +2256,10 @@ class CallBase : public Instruction { if (Attrs.hasFnAttr(Kind)) return true; - // Operand bundles override attributes on the called function, but don't - // override attributes directly present on the call instruction. - if (isFnAttrDisallowedByOpBundle(Kind)) - return false; - return hasFnAttrOnCalledFunction(Kind); } template Attribute getFnAttrOnCalledFunction(AK Kind) const; - /// A specialized version of hasFnAttrImpl for when the caller wants to - /// know if an attribute's semantics are implied, not whether the attribute - /// is actually present. This distinction only exists when checking whether - /// something is readonly or writeonly since readnone implies both. The case - /// which motivates the specialized code is a callee with readnone, and an - /// operand bundle on the call which disallows readnone but not either - /// readonly or writeonly. - bool hasImpliedFnAttr(Attribute::AttrKind Kind) const { - assert((Kind == Attribute::ReadOnly || Kind == Attribute::WriteOnly) && - "use hasFnAttrImpl instead"); - if (Attrs.hasFnAttr(Kind) || Attrs.hasFnAttr(Attribute::ReadNone)) - return true; - - if (isFnAttrDisallowedByOpBundle(Kind)) - return false; - - return hasFnAttrOnCalledFunction(Kind) || - hasFnAttrOnCalledFunction(Attribute::ReadNone); - } - /// Determine whether the return value has the given attribute. Supports /// Attribute::AttrKind and StringRef as \p AttrKind types. template bool hasRetAttrImpl(AttrKind Kind) const { diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h index 4ff48c3669d50..f78e45c0e32ee 100644 --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -91,6 +91,7 @@ class IntrinsicInst : public CallInst { case Intrinsic::assume: case Intrinsic::sideeffect: case Intrinsic::pseudoprobe: + case Intrinsic::dbg_assign: case Intrinsic::dbg_declare: case Intrinsic::dbg_value: case Intrinsic::dbg_label: @@ -129,6 +130,7 @@ static inline bool isDbgInfoIntrinsic(Intrinsic::ID ID) { case Intrinsic::dbg_value: case Intrinsic::dbg_addr: case Intrinsic::dbg_label: + case Intrinsic::dbg_assign: return true; default: return false; @@ -231,10 +233,12 @@ class DbgVariableIntrinsic : public DbgInfoIntrinsic { bool hasArgList() const { return isa(getRawLocation()); } - /// Does this describe the address of a local variable. True for dbg.addr - /// and dbg.declare, but not dbg.value, which describes its value. + /// Does this describe the address of a local variable. True for dbg.addr and + /// dbg.declare, but not dbg.value, which describes its value, or dbg.assign, + /// which describes a combination of the variable's value and address. bool isAddressOfVariable() const { - return getIntrinsicID() != Intrinsic::dbg_value; + return getIntrinsicID() != Intrinsic::dbg_value && + getIntrinsicID() != Intrinsic::dbg_assign; } void setUndef() { @@ -286,6 +290,11 @@ class DbgVariableIntrinsic : public DbgInfoIntrinsic { /// is described. Optional getFragmentSizeInBits() const; + /// Get the FragmentInfo for the variable. + Optional getFragment() const { + return getExpression()->getFragmentInfo(); + } + /// \name Casting methods /// @{ static bool classof(const IntrinsicInst *I) { @@ -293,6 +302,7 @@ class DbgVariableIntrinsic : public DbgInfoIntrinsic { case Intrinsic::dbg_declare: case Intrinsic::dbg_value: case Intrinsic::dbg_addr: + case Intrinsic::dbg_assign: return true; default: return false; @@ -302,7 +312,7 @@ class DbgVariableIntrinsic : public DbgInfoIntrinsic { return isa(V) && classof(cast(V)); } /// @} -private: +protected: void setArgOperand(unsigned i, Value *v) { DbgInfoIntrinsic::setArgOperand(i, v); } @@ -363,7 +373,52 @@ class DbgValueInst : public DbgVariableIntrinsic { /// \name Casting methods /// @{ static bool classof(const IntrinsicInst *I) { - return I->getIntrinsicID() == Intrinsic::dbg_value; + return I->getIntrinsicID() == Intrinsic::dbg_value || + I->getIntrinsicID() == Intrinsic::dbg_assign; + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + /// @} +}; + +/// This represents the llvm.dbg.assign instruction. +class DbgAssignIntrinsic : public DbgValueInst { + enum Operands { + OpValue, + OpVar, + OpExpr, + OpAssignID, + OpAddress, + OpAddressExpr, + }; + +public: + Value *getAddress() const; + Metadata *getRawAddress() const { + return cast(getArgOperand(OpAddress))->getMetadata(); + } + Metadata *getRawAssignID() const { + return cast(getArgOperand(OpAssignID))->getMetadata(); + } + DIAssignID *getAssignID() const { return cast(getRawAssignID()); } + Metadata *getRawAddressExpression() const { + return cast(getArgOperand(OpAddressExpr))->getMetadata(); + } + DIExpression *getAddressExpression() const { + return cast(getRawAddressExpression()); + } + void setAddressExpression(DIExpression *NewExpr) { + setArgOperand(OpAddressExpr, + MetadataAsValue::get(NewExpr->getContext(), NewExpr)); + } + void setAssignId(DIAssignID *New); + void setAddress(Value *V); + void setValue(Value *V); + /// \name Casting methods + /// @{ + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::dbg_assign; } static bool classof(const Value *V) { return isa(V) && classof(cast(V)); diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 64d186c508ce8..3050bd2acec73 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -281,6 +281,7 @@ def llvm_v128i1_ty : LLVMType; // 128 x i1 def llvm_v256i1_ty : LLVMType; // 256 x i1 def llvm_v512i1_ty : LLVMType; // 512 x i1 def llvm_v1024i1_ty : LLVMType; //1024 x i1 +def llvm_v2048i1_ty : LLVMType; //2048 x i1 def llvm_v1i8_ty : LLVMType; // 1 x i8 def llvm_v2i8_ty : LLVMType; // 2 x i8 @@ -997,6 +998,13 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in { [llvm_metadata_ty, llvm_metadata_ty, llvm_metadata_ty]>; + def int_dbg_assign : DefaultAttrsIntrinsic<[], + [llvm_metadata_ty, + llvm_metadata_ty, + llvm_metadata_ty, + llvm_metadata_ty, + llvm_metadata_ty, + llvm_metadata_ty]>; def int_dbg_label : DefaultAttrsIntrinsic<[], [llvm_metadata_ty]>; } diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td index 3b28f958020ce..b8750abca2050 100644 --- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td +++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td @@ -19,64 +19,69 @@ let TargetPrefix = "wasm" in { // All intrinsics start with "llvm.wasm.". // Query the current memory size, and increase the current memory size. // Note that memory.size is not IntrNoMem because it must be sequenced with // respect to memory.grow calls. -def int_wasm_memory_size : Intrinsic<[llvm_anyint_ty], - [llvm_i32_ty], - [IntrReadMem]>; -def int_wasm_memory_grow : Intrinsic<[llvm_anyint_ty], - [llvm_i32_ty, LLVMMatchType<0>], - []>; +def int_wasm_memory_size : + DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_i32_ty], [IntrReadMem]>; +def int_wasm_memory_grow : + DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_i32_ty, LLVMMatchType<0>], []>; //===----------------------------------------------------------------------===// // ref.null intrinsics //===----------------------------------------------------------------------===// -def int_wasm_ref_null_extern : Intrinsic<[llvm_externref_ty], [], [IntrNoMem]>; -def int_wasm_ref_null_func : Intrinsic<[llvm_funcref_ty], [], [IntrNoMem]>; -def int_wasm_ref_is_null_extern : Intrinsic<[llvm_i32_ty], [llvm_externref_ty], - [IntrNoMem], "llvm.wasm.ref.is_null.extern">; -def int_wasm_ref_is_null_func : Intrinsic<[llvm_i32_ty], [llvm_funcref_ty], - [IntrNoMem], "llvm.wasm.ref.is_null.func">; +def int_wasm_ref_null_extern : + DefaultAttrsIntrinsic<[llvm_externref_ty], [], [IntrNoMem]>; +def int_wasm_ref_null_func : + DefaultAttrsIntrinsic<[llvm_funcref_ty], [], [IntrNoMem]>; +def int_wasm_ref_is_null_extern : + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_externref_ty], [IntrNoMem], + "llvm.wasm.ref.is_null.extern">; +def int_wasm_ref_is_null_func : + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_funcref_ty], + [IntrNoMem], "llvm.wasm.ref.is_null.func">; //===----------------------------------------------------------------------===// // Table intrinsics //===----------------------------------------------------------------------===// -def int_wasm_table_set_externref : Intrinsic<[], - [llvm_table_ty, llvm_i32_ty, llvm_externref_ty], - [IntrWriteMem]>; -def int_wasm_table_set_funcref : Intrinsic<[], - [llvm_table_ty, llvm_i32_ty, llvm_funcref_ty], - [IntrWriteMem]>; - -def int_wasm_table_get_externref : Intrinsic<[llvm_externref_ty], - [llvm_table_ty, llvm_i32_ty], - [IntrReadMem]>; -def int_wasm_table_get_funcref : Intrinsic<[llvm_funcref_ty], - [llvm_table_ty, llvm_i32_ty], - [IntrReadMem]>; +def int_wasm_table_set_externref : + DefaultAttrsIntrinsic<[], [llvm_table_ty, llvm_i32_ty, llvm_externref_ty], + [IntrWriteMem]>; +def int_wasm_table_set_funcref : + DefaultAttrsIntrinsic<[], [llvm_table_ty, llvm_i32_ty, llvm_funcref_ty], + [IntrWriteMem]>; + +def int_wasm_table_get_externref : + DefaultAttrsIntrinsic<[llvm_externref_ty], [llvm_table_ty, llvm_i32_ty], + [IntrReadMem]>; +def int_wasm_table_get_funcref : + DefaultAttrsIntrinsic<[llvm_funcref_ty], [llvm_table_ty, llvm_i32_ty], + [IntrReadMem]>; // Query the current table size, and increase the current table size. -def int_wasm_table_size : Intrinsic<[llvm_i32_ty], - [llvm_table_ty], - [IntrReadMem]>; -def int_wasm_table_copy : Intrinsic<[], - [llvm_table_ty, llvm_table_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - []>; -def int_wasm_table_grow_externref : Intrinsic<[llvm_i32_ty], - [llvm_table_ty, llvm_externref_ty, llvm_i32_ty], - []>; -def int_wasm_table_grow_funcref : Intrinsic<[llvm_i32_ty], - [llvm_table_ty, llvm_funcref_ty, llvm_i32_ty], - []>; -def int_wasm_table_fill_externref : Intrinsic<[], - [llvm_table_ty, llvm_i32_ty, llvm_externref_ty, llvm_i32_ty], - []>; -def int_wasm_table_fill_funcref : Intrinsic<[], - [llvm_table_ty, llvm_i32_ty, llvm_funcref_ty, llvm_i32_ty], - []>; +def int_wasm_table_size : + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_table_ty], [IntrReadMem]>; +def int_wasm_table_copy : + DefaultAttrsIntrinsic<[], + [llvm_table_ty, llvm_table_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty], []>; +def int_wasm_table_grow_externref : + DefaultAttrsIntrinsic<[llvm_i32_ty], + [llvm_table_ty, llvm_externref_ty, llvm_i32_ty], []>; +def int_wasm_table_grow_funcref : + DefaultAttrsIntrinsic<[llvm_i32_ty], + [llvm_table_ty, llvm_funcref_ty, llvm_i32_ty], []>; +def int_wasm_table_fill_externref : + DefaultAttrsIntrinsic<[], + [llvm_table_ty, llvm_i32_ty, llvm_externref_ty, + llvm_i32_ty], []>; +def int_wasm_table_fill_funcref : + DefaultAttrsIntrinsic<[], + [llvm_table_ty, llvm_i32_ty, llvm_funcref_ty, + llvm_i32_ty], []>; //===----------------------------------------------------------------------===// // Trapping float-to-int conversions //===----------------------------------------------------------------------===// +// These don't use default attributes, because they are not willreturn. def int_wasm_trunc_signed : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>; @@ -88,12 +93,12 @@ def int_wasm_trunc_unsigned : Intrinsic<[llvm_anyint_ty], // Saturating float-to-int conversions //===----------------------------------------------------------------------===// -def int_wasm_trunc_saturate_signed : Intrinsic<[llvm_anyint_ty], - [llvm_anyfloat_ty], - [IntrNoMem, IntrSpeculatable]>; -def int_wasm_trunc_saturate_unsigned : Intrinsic<[llvm_anyint_ty], - [llvm_anyfloat_ty], - [IntrNoMem, IntrSpeculatable]>; +def int_wasm_trunc_saturate_signed : + DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], + [IntrNoMem, IntrSpeculatable]>; +def int_wasm_trunc_saturate_unsigned : + DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], + [IntrNoMem, IntrSpeculatable]>; //===----------------------------------------------------------------------===// // Exception handling intrinsics @@ -108,32 +113,35 @@ def int_wasm_rethrow : Intrinsic<[], [], [Throws, IntrNoReturn]>; // Since wasm does not use landingpad instructions, these instructions return // exception pointer and selector values until we lower them in WasmEHPrepare. -def int_wasm_get_exception : Intrinsic<[llvm_ptr_ty], [llvm_token_ty], - [IntrHasSideEffects]>; -def int_wasm_get_ehselector : Intrinsic<[llvm_i32_ty], [llvm_token_ty], - [IntrHasSideEffects]>; +def int_wasm_get_exception : + DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_token_ty], [IntrHasSideEffects]>; +def int_wasm_get_ehselector : + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_token_ty], [IntrHasSideEffects]>; // wasm.catch returns the pointer to the exception object caught by wasm 'catch' // instruction. This returns a single pointer, which is the case for C++ // exceptions. The immediate argument is an index to for a tag, which is 0 for // C++ exceptions. -def int_wasm_catch : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], - [IntrHasSideEffects, ImmArg>]>; +def int_wasm_catch : + DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_i32_ty], + [IntrHasSideEffects, ImmArg>]>; // WebAssembly EH must maintain the landingpads in the order assigned to them // by WasmEHPrepare pass to generate landingpad table in EHStreamer. This is // used in order to give them the indices in WasmEHPrepare. -def int_wasm_landingpad_index: Intrinsic<[], [llvm_token_ty, llvm_i32_ty], - [IntrNoMem, ImmArg>]>; +def int_wasm_landingpad_index : + DefaultAttrsIntrinsic<[], [llvm_token_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; // Returns LSDA address of the current function. -def int_wasm_lsda : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; +def int_wasm_lsda : DefaultAttrsIntrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; //===----------------------------------------------------------------------===// // Atomic intrinsics //===----------------------------------------------------------------------===// // wait / notify +// These don't use default attributes, because they are not nosync. def int_wasm_memory_atomic_wait32 : Intrinsic<[llvm_i32_ty], [LLVMPointerType, llvm_i32_ty, llvm_i64_ty], @@ -157,152 +165,153 @@ def int_wasm_memory_atomic_notify: //===----------------------------------------------------------------------===// def int_wasm_swizzle : - Intrinsic<[llvm_v16i8_ty], - [llvm_v16i8_ty, llvm_v16i8_ty], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_shuffle : - Intrinsic<[llvm_v16i8_ty], - [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_sub_sat_signed : - Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_sub_sat_unsigned : - Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_avgr_unsigned : - Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_bitselect : - Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_anytrue : - Intrinsic<[llvm_i32_ty], - [llvm_anyvector_ty], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], + [llvm_anyvector_ty], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_alltrue : - Intrinsic<[llvm_i32_ty], - [llvm_anyvector_ty], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], + [llvm_anyvector_ty], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_bitmask : - Intrinsic<[llvm_i32_ty], - [llvm_anyvector_ty], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], + [llvm_anyvector_ty], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_dot : - Intrinsic<[llvm_v4i32_ty], - [llvm_v8i16_ty, llvm_v8i16_ty], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_v4i32_ty], + [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_narrow_signed : - Intrinsic<[llvm_anyvector_ty], - [llvm_anyvector_ty, LLVMMatchType<1>], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [llvm_anyvector_ty, LLVMMatchType<1>], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_narrow_unsigned : - Intrinsic<[llvm_anyvector_ty], - [llvm_anyvector_ty, LLVMMatchType<1>], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [llvm_anyvector_ty, LLVMMatchType<1>], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_q15mulr_sat_signed : - Intrinsic<[llvm_v8i16_ty], - [llvm_v8i16_ty, llvm_v8i16_ty], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_v8i16_ty], + [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_pmin : - Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_pmax : - Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_extadd_pairwise_signed : - Intrinsic<[llvm_anyvector_ty], - [LLVMSubdivide2VectorType<0>], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMSubdivide2VectorType<0>], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_extadd_pairwise_unsigned : - Intrinsic<[llvm_anyvector_ty], - [LLVMSubdivide2VectorType<0>], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMSubdivide2VectorType<0>], + [IntrNoMem, IntrSpeculatable]>; //===----------------------------------------------------------------------===// // Relaxed SIMD intrinsics (experimental) //===----------------------------------------------------------------------===// def int_wasm_fma : - Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_fms : - Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_laneselect : - Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_relaxed_swizzle : - Intrinsic<[llvm_v16i8_ty], - [llvm_v16i8_ty, llvm_v16i8_ty], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_relaxed_min : - Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_relaxed_max : - Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_relaxed_trunc_signed: - Intrinsic<[llvm_v4i32_ty], - [llvm_v4f32_ty], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_v4i32_ty], + [llvm_v4f32_ty], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_relaxed_trunc_unsigned: - Intrinsic<[llvm_v4i32_ty], - [llvm_v4f32_ty], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_v4i32_ty], + [llvm_v4f32_ty], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_relaxed_trunc_signed_zero: - Intrinsic<[llvm_v4i32_ty], - [llvm_v2f64_ty], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_v4i32_ty], + [llvm_v2f64_ty], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_relaxed_trunc_unsigned_zero: - Intrinsic<[llvm_v4i32_ty], - [llvm_v2f64_ty], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_v4i32_ty], + [llvm_v2f64_ty], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_relaxed_q15mulr_signed: - Intrinsic<[llvm_v8i16_ty], - [llvm_v8i16_ty, llvm_v8i16_ty], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_v8i16_ty], + [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_dot_i8x16_i7x16_signed: - Intrinsic<[llvm_v8i16_ty], - [llvm_v16i8_ty, llvm_v16i8_ty], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_v8i16_ty], + [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_dot_i8x16_i7x16_add_signed: - Intrinsic<[llvm_v4i32_ty], - [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v4i32_ty], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_v4i32_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v4i32_ty], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_relaxed_dot_bf16x8_add_f32: - Intrinsic<[llvm_v4f32_ty], - [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4f32_ty], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_v4f32_ty], + [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4f32_ty], + [IntrNoMem, IntrSpeculatable]>; //===----------------------------------------------------------------------===// @@ -310,18 +319,18 @@ def int_wasm_relaxed_dot_bf16x8_add_f32: //===----------------------------------------------------------------------===// def int_wasm_tls_size : - Intrinsic<[llvm_anyint_ty], - [], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_anyint_ty], + [], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_tls_align : - Intrinsic<[llvm_anyint_ty], - [], - [IntrNoMem, IntrSpeculatable]>; + DefaultAttrsIntrinsic<[llvm_anyint_ty], + [], + [IntrNoMem, IntrSpeculatable]>; def int_wasm_tls_base : - Intrinsic<[llvm_ptr_ty], - [], - [IntrReadMem]>; + DefaultAttrsIntrinsic<[llvm_ptr_ty], + [], + [IntrReadMem]>; } // TargetPrefix = "wasm" diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index f6cc72928fd26..72fbf8e9a4c8f 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -4370,638 +4370,646 @@ let TargetPrefix = "x86" in { let TargetPrefix = "x86" in { def int_x86_avx512_conflict_d_128 : ClangBuiltin<"__builtin_ia32_vpconflictsi_128">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; def int_x86_avx512_conflict_d_256 : ClangBuiltin<"__builtin_ia32_vpconflictsi_256">, - Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>; def int_x86_avx512_conflict_d_512 : ClangBuiltin<"__builtin_ia32_vpconflictsi_512">, - Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty], + [IntrNoMem]>; def int_x86_avx512_conflict_q_128 : ClangBuiltin<"__builtin_ia32_vpconflictdi_128">, - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; def int_x86_avx512_conflict_q_256 : ClangBuiltin<"__builtin_ia32_vpconflictdi_256">, - Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty], [IntrNoMem]>; def int_x86_avx512_conflict_q_512 : ClangBuiltin<"__builtin_ia32_vpconflictdi_512">, - Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty], [IntrNoMem]>; } // Compares let TargetPrefix = "x86" in { // 512-bit def int_x86_avx512_vcomi_sd : ClangBuiltin<"__builtin_ia32_vcomisd">, - Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, - llvm_v2f64_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg>, ImmArg>]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_v2f64_ty, + llvm_v2f64_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>, + ImmArg>]>; def int_x86_avx512_vcomi_ss : ClangBuiltin<"__builtin_ia32_vcomiss">, - Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, - llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg>, ImmArg>]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_v4f32_ty, + llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>, + ImmArg>]>; } // Compress, Expand let TargetPrefix = "x86" in { def int_x86_avx512_mask_compress : - Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], - [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], + [IntrNoMem]>; def int_x86_avx512_mask_expand : - Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], - [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], + [IntrNoMem]>; } // truncate let TargetPrefix = "x86" in { def int_x86_avx512_mask_pmov_qb_128 : - ClangBuiltin<"__builtin_ia32_pmovqb128_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v2i64_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovqb128_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v2i64_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmov_qb_mem_128 : - ClangBuiltin<"__builtin_ia32_pmovqb128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovqb128mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_qb_128 : - ClangBuiltin<"__builtin_ia32_pmovsqb128_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v2i64_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovsqb128_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v2i64_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovs_qb_mem_128 : - ClangBuiltin<"__builtin_ia32_pmovsqb128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovsqb128mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_qb_128 : - ClangBuiltin<"__builtin_ia32_pmovusqb128_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v2i64_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovusqb128_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v2i64_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovus_qb_mem_128 : - ClangBuiltin<"__builtin_ia32_pmovusqb128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovusqb128mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_qb_256 : - ClangBuiltin<"__builtin_ia32_pmovqb256_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v4i64_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovqb256_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v4i64_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmov_qb_mem_256 : - ClangBuiltin<"__builtin_ia32_pmovqb256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovqb256mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_qb_256 : - ClangBuiltin<"__builtin_ia32_pmovsqb256_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v4i64_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovsqb256_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v4i64_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovs_qb_mem_256 : - ClangBuiltin<"__builtin_ia32_pmovsqb256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovsqb256mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_qb_256 : - ClangBuiltin<"__builtin_ia32_pmovusqb256_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v4i64_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovusqb256_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v4i64_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovus_qb_mem_256 : - ClangBuiltin<"__builtin_ia32_pmovusqb256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovusqb256mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_qb_512 : - ClangBuiltin<"__builtin_ia32_pmovqb512_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v8i64_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovqb512_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v8i64_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmov_qb_mem_512 : - ClangBuiltin<"__builtin_ia32_pmovqb512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovqb512mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_qb_512 : - ClangBuiltin<"__builtin_ia32_pmovsqb512_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v8i64_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovsqb512_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v8i64_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovs_qb_mem_512 : - ClangBuiltin<"__builtin_ia32_pmovsqb512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovsqb512mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_qb_512 : - ClangBuiltin<"__builtin_ia32_pmovusqb512_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v8i64_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovusqb512_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v8i64_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovus_qb_mem_512 : - ClangBuiltin<"__builtin_ia32_pmovusqb512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovusqb512mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_qw_128 : - ClangBuiltin<"__builtin_ia32_pmovqw128_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v2i64_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovqw128_mask">, + DefaultAttrsIntrinsic<[llvm_v8i16_ty], + [llvm_v2i64_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmov_qw_mem_128 : - ClangBuiltin<"__builtin_ia32_pmovqw128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovqw128mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_qw_128 : - ClangBuiltin<"__builtin_ia32_pmovsqw128_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v2i64_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovsqw128_mask">, + DefaultAttrsIntrinsic<[llvm_v8i16_ty], + [llvm_v2i64_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovs_qw_mem_128 : - ClangBuiltin<"__builtin_ia32_pmovsqw128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovsqw128mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_qw_128 : - ClangBuiltin<"__builtin_ia32_pmovusqw128_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v2i64_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovusqw128_mask">, + DefaultAttrsIntrinsic<[llvm_v8i16_ty], + [llvm_v2i64_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovus_qw_mem_128 : - ClangBuiltin<"__builtin_ia32_pmovusqw128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovusqw128mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_qw_256 : - ClangBuiltin<"__builtin_ia32_pmovqw256_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v4i64_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovqw256_mask">, + DefaultAttrsIntrinsic<[llvm_v8i16_ty], + [llvm_v4i64_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmov_qw_mem_256 : - ClangBuiltin<"__builtin_ia32_pmovqw256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovqw256mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_qw_256 : - ClangBuiltin<"__builtin_ia32_pmovsqw256_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v4i64_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovsqw256_mask">, + DefaultAttrsIntrinsic<[llvm_v8i16_ty], + [llvm_v4i64_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovs_qw_mem_256 : - ClangBuiltin<"__builtin_ia32_pmovsqw256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovsqw256mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_qw_256 : - ClangBuiltin<"__builtin_ia32_pmovusqw256_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v4i64_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovusqw256_mask">, + DefaultAttrsIntrinsic<[llvm_v8i16_ty], + [llvm_v4i64_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovus_qw_mem_256 : - ClangBuiltin<"__builtin_ia32_pmovusqw256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovusqw256mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_qw_512 : - Intrinsic<[llvm_v8i16_ty], - [llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v8i16_ty], + [llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmov_qw_mem_512 : - ClangBuiltin<"__builtin_ia32_pmovqw512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovqw512mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_qw_512 : - ClangBuiltin<"__builtin_ia32_pmovsqw512_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovsqw512_mask">, + DefaultAttrsIntrinsic<[llvm_v8i16_ty], + [llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovs_qw_mem_512 : - ClangBuiltin<"__builtin_ia32_pmovsqw512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovsqw512mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_qw_512 : - ClangBuiltin<"__builtin_ia32_pmovusqw512_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovusqw512_mask">, + DefaultAttrsIntrinsic<[llvm_v8i16_ty], + [llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovus_qw_mem_512 : - ClangBuiltin<"__builtin_ia32_pmovusqw512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovusqw512mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_qd_128 : - ClangBuiltin<"__builtin_ia32_pmovqd128_mask">, - Intrinsic<[llvm_v4i32_ty], - [llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovqd128_mask">, + DefaultAttrsIntrinsic<[llvm_v4i32_ty], + [llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmov_qd_mem_128 : - ClangBuiltin<"__builtin_ia32_pmovqd128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovqd128mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_qd_128 : - ClangBuiltin<"__builtin_ia32_pmovsqd128_mask">, - Intrinsic<[llvm_v4i32_ty], - [llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovsqd128_mask">, + DefaultAttrsIntrinsic<[llvm_v4i32_ty], + [llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovs_qd_mem_128 : - ClangBuiltin<"__builtin_ia32_pmovsqd128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovsqd128mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_qd_128 : - ClangBuiltin<"__builtin_ia32_pmovusqd128_mask">, - Intrinsic<[llvm_v4i32_ty], - [llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovusqd128_mask">, + DefaultAttrsIntrinsic<[llvm_v4i32_ty], + [llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovus_qd_mem_128 : - ClangBuiltin<"__builtin_ia32_pmovusqd128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovusqd128mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_qd_mem_256 : - ClangBuiltin<"__builtin_ia32_pmovqd256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovqd256mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_qd_256 : - ClangBuiltin<"__builtin_ia32_pmovsqd256_mask">, - Intrinsic<[llvm_v4i32_ty], - [llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovsqd256_mask">, + DefaultAttrsIntrinsic<[llvm_v4i32_ty], + [llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovs_qd_mem_256 : - ClangBuiltin<"__builtin_ia32_pmovsqd256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovsqd256mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_qd_256 : - ClangBuiltin<"__builtin_ia32_pmovusqd256_mask">, - Intrinsic<[llvm_v4i32_ty], - [llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovusqd256_mask">, + DefaultAttrsIntrinsic<[llvm_v4i32_ty], + [llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovus_qd_mem_256 : - ClangBuiltin<"__builtin_ia32_pmovusqd256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovusqd256mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_qd_mem_512 : - ClangBuiltin<"__builtin_ia32_pmovqd512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovqd512mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_qd_512 : - ClangBuiltin<"__builtin_ia32_pmovsqd512_mask">, - Intrinsic<[llvm_v8i32_ty], - [llvm_v8i64_ty, llvm_v8i32_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovsqd512_mask">, + DefaultAttrsIntrinsic<[llvm_v8i32_ty], + [llvm_v8i64_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovs_qd_mem_512 : - ClangBuiltin<"__builtin_ia32_pmovsqd512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovsqd512mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_qd_512 : - ClangBuiltin<"__builtin_ia32_pmovusqd512_mask">, - Intrinsic<[llvm_v8i32_ty], - [llvm_v8i64_ty, llvm_v8i32_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovusqd512_mask">, + DefaultAttrsIntrinsic<[llvm_v8i32_ty], + [llvm_v8i64_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovus_qd_mem_512 : - ClangBuiltin<"__builtin_ia32_pmovusqd512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovusqd512mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_db_128 : - ClangBuiltin<"__builtin_ia32_pmovdb128_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v4i32_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovdb128_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v4i32_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmov_db_mem_128 : - ClangBuiltin<"__builtin_ia32_pmovdb128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovdb128mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_db_128 : - ClangBuiltin<"__builtin_ia32_pmovsdb128_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v4i32_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovsdb128_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v4i32_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovs_db_mem_128 : - ClangBuiltin<"__builtin_ia32_pmovsdb128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovsdb128mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_db_128 : - ClangBuiltin<"__builtin_ia32_pmovusdb128_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v4i32_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovusdb128_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v4i32_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovus_db_mem_128 : - ClangBuiltin<"__builtin_ia32_pmovusdb128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovusdb128mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_db_256 : - ClangBuiltin<"__builtin_ia32_pmovdb256_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v8i32_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovdb256_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v8i32_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmov_db_mem_256 : - ClangBuiltin<"__builtin_ia32_pmovdb256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovdb256mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_db_256 : - ClangBuiltin<"__builtin_ia32_pmovsdb256_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v8i32_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovsdb256_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v8i32_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovs_db_mem_256 : - ClangBuiltin<"__builtin_ia32_pmovsdb256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovsdb256mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_db_256 : - ClangBuiltin<"__builtin_ia32_pmovusdb256_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v8i32_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovusdb256_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v8i32_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovus_db_mem_256 : - ClangBuiltin<"__builtin_ia32_pmovusdb256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovusdb256mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_db_512 : - Intrinsic<[llvm_v16i8_ty], - [llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty], - [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmov_db_mem_512 : - ClangBuiltin<"__builtin_ia32_pmovdb512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovdb512mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_db_512 : - ClangBuiltin<"__builtin_ia32_pmovsdb512_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovsdb512_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovs_db_mem_512 : - ClangBuiltin<"__builtin_ia32_pmovsdb512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovsdb512mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_db_512 : - ClangBuiltin<"__builtin_ia32_pmovusdb512_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovusdb512_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovus_db_mem_512 : - ClangBuiltin<"__builtin_ia32_pmovusdb512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovusdb512mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_dw_128 : - ClangBuiltin<"__builtin_ia32_pmovdw128_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v4i32_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovdw128_mask">, + DefaultAttrsIntrinsic<[llvm_v8i16_ty], + [llvm_v4i32_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmov_dw_mem_128 : - ClangBuiltin<"__builtin_ia32_pmovdw128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovdw128mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_dw_128 : - ClangBuiltin<"__builtin_ia32_pmovsdw128_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v4i32_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovsdw128_mask">, + DefaultAttrsIntrinsic<[llvm_v8i16_ty], + [llvm_v4i32_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovs_dw_mem_128 : - ClangBuiltin<"__builtin_ia32_pmovsdw128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovsdw128mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_dw_128 : - ClangBuiltin<"__builtin_ia32_pmovusdw128_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v4i32_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovusdw128_mask">, + DefaultAttrsIntrinsic<[llvm_v8i16_ty], + [llvm_v4i32_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovus_dw_mem_128 : - ClangBuiltin<"__builtin_ia32_pmovusdw128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovusdw128mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_dw_256 : - ClangBuiltin<"__builtin_ia32_pmovdw256_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v8i32_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovdw256_mask">, + DefaultAttrsIntrinsic<[llvm_v8i16_ty], + [llvm_v8i32_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmov_dw_mem_256 : - ClangBuiltin<"__builtin_ia32_pmovdw256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovdw256mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_dw_256 : - ClangBuiltin<"__builtin_ia32_pmovsdw256_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v8i32_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovsdw256_mask">, + DefaultAttrsIntrinsic<[llvm_v8i16_ty], + [llvm_v8i32_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovs_dw_mem_256 : - ClangBuiltin<"__builtin_ia32_pmovsdw256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovsdw256mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_dw_256 : - ClangBuiltin<"__builtin_ia32_pmovusdw256_mask">, - Intrinsic<[llvm_v8i16_ty], - [llvm_v8i32_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovusdw256_mask">, + DefaultAttrsIntrinsic<[llvm_v8i16_ty], + [llvm_v8i32_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovus_dw_mem_256 : - ClangBuiltin<"__builtin_ia32_pmovusdw256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovusdw256mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_dw_512 : - Intrinsic<[llvm_v16i16_ty], - [llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty], - [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v16i16_ty], + [llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmov_dw_mem_512 : - ClangBuiltin<"__builtin_ia32_pmovdw512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovdw512mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_dw_512 : - ClangBuiltin<"__builtin_ia32_pmovsdw512_mask">, - Intrinsic<[llvm_v16i16_ty], - [llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovsdw512_mask">, + DefaultAttrsIntrinsic<[llvm_v16i16_ty], + [llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovs_dw_mem_512 : - ClangBuiltin<"__builtin_ia32_pmovsdw512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovsdw512mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_dw_512 : - ClangBuiltin<"__builtin_ia32_pmovusdw512_mask">, - Intrinsic<[llvm_v16i16_ty], - [llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovusdw512_mask">, + DefaultAttrsIntrinsic<[llvm_v16i16_ty], + [llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovus_dw_mem_512 : - ClangBuiltin<"__builtin_ia32_pmovusdw512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovusdw512mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_wb_128 : - ClangBuiltin<"__builtin_ia32_pmovwb128_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v8i16_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovwb128_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v8i16_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmov_wb_mem_128 : - ClangBuiltin<"__builtin_ia32_pmovwb128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovwb128mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_wb_128 : - ClangBuiltin<"__builtin_ia32_pmovswb128_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v8i16_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovswb128_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v8i16_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovs_wb_mem_128 : - ClangBuiltin<"__builtin_ia32_pmovswb128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovswb128mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_wb_128 : - ClangBuiltin<"__builtin_ia32_pmovuswb128_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v8i16_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovuswb128_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v8i16_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovus_wb_mem_128 : - ClangBuiltin<"__builtin_ia32_pmovuswb128mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovuswb128mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_wb_mem_256 : - ClangBuiltin<"__builtin_ia32_pmovwb256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovwb256mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_wb_256 : - ClangBuiltin<"__builtin_ia32_pmovswb256_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v16i16_ty, llvm_v16i8_ty, llvm_i16_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovswb256_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v16i16_ty, llvm_v16i8_ty, llvm_i16_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovs_wb_mem_256 : - ClangBuiltin<"__builtin_ia32_pmovswb256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovswb256mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_wb_256 : - ClangBuiltin<"__builtin_ia32_pmovuswb256_mask">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v16i16_ty, llvm_v16i8_ty, llvm_i16_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovuswb256_mask">, + DefaultAttrsIntrinsic<[llvm_v16i8_ty], + [llvm_v16i16_ty, llvm_v16i8_ty, llvm_i16_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovus_wb_mem_256 : - ClangBuiltin<"__builtin_ia32_pmovuswb256mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovuswb256mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmov_wb_mem_512 : - ClangBuiltin<"__builtin_ia32_pmovwb512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovwb512mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovs_wb_512 : - ClangBuiltin<"__builtin_ia32_pmovswb512_mask">, - Intrinsic<[llvm_v32i8_ty], - [llvm_v32i16_ty, llvm_v32i8_ty, llvm_i32_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovswb512_mask">, + DefaultAttrsIntrinsic<[llvm_v32i8_ty], + [llvm_v32i16_ty, llvm_v32i8_ty, llvm_i32_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovs_wb_mem_512 : - ClangBuiltin<"__builtin_ia32_pmovswb512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovswb512mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty], + [IntrArgMemOnly]>; def int_x86_avx512_mask_pmovus_wb_512 : - ClangBuiltin<"__builtin_ia32_pmovuswb512_mask">, - Intrinsic<[llvm_v32i8_ty], - [llvm_v32i16_ty, llvm_v32i8_ty, llvm_i32_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_pmovuswb512_mask">, + DefaultAttrsIntrinsic<[llvm_v32i8_ty], + [llvm_v32i16_ty, llvm_v32i8_ty, llvm_i32_ty], + [IntrNoMem]>; def int_x86_avx512_mask_pmovus_wb_mem_512 : - ClangBuiltin<"__builtin_ia32_pmovuswb512mem_mask">, - Intrinsic<[], - [llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty], - [IntrArgMemOnly]>; + ClangBuiltin<"__builtin_ia32_pmovuswb512mem_mask">, + DefaultAttrsIntrinsic<[], + [llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty], + [IntrArgMemOnly]>; } // Bitwise ternary logic let TargetPrefix = "x86" in { def int_x86_avx512_pternlog_d_128 : - ClangBuiltin<"__builtin_ia32_pternlogd128">, - Intrinsic<[llvm_v4i32_ty], - [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg>]>; + ClangBuiltin<"__builtin_ia32_pternlogd128">, + DefaultAttrsIntrinsic<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, + llvm_i32_ty], + [IntrNoMem, ImmArg>]>; def int_x86_avx512_pternlog_d_256 : - ClangBuiltin<"__builtin_ia32_pternlogd256">, - Intrinsic<[llvm_v8i32_ty], - [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg>]>; + ClangBuiltin<"__builtin_ia32_pternlogd256">, + DefaultAttrsIntrinsic<[llvm_v8i32_ty], + [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, + llvm_i32_ty], + [IntrNoMem, ImmArg>]>; def int_x86_avx512_pternlog_d_512 : - ClangBuiltin<"__builtin_ia32_pternlogd512">, - Intrinsic<[llvm_v16i32_ty], - [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, - llvm_i32_ty], [IntrNoMem, ImmArg>]>; + ClangBuiltin<"__builtin_ia32_pternlogd512">, + DefaultAttrsIntrinsic<[llvm_v16i32_ty], + [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, + llvm_i32_ty], [IntrNoMem, ImmArg>]>; def int_x86_avx512_pternlog_q_128 : - ClangBuiltin<"__builtin_ia32_pternlogq128">, - Intrinsic<[llvm_v2i64_ty], - [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], - [IntrNoMem, ImmArg>]>; + ClangBuiltin<"__builtin_ia32_pternlogq128">, + DefaultAttrsIntrinsic<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, + llvm_i32_ty], + [IntrNoMem, ImmArg>]>; def int_x86_avx512_pternlog_q_256 : - ClangBuiltin<"__builtin_ia32_pternlogq256">, - Intrinsic<[llvm_v4i64_ty], - [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], - [IntrNoMem, ImmArg>]>; + ClangBuiltin<"__builtin_ia32_pternlogq256">, + DefaultAttrsIntrinsic<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, + llvm_i32_ty], + [IntrNoMem, ImmArg>]>; def int_x86_avx512_pternlog_q_512 : - ClangBuiltin<"__builtin_ia32_pternlogq512">, - Intrinsic<[llvm_v8i64_ty], - [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty], - [IntrNoMem, ImmArg>]>; + ClangBuiltin<"__builtin_ia32_pternlogq512">, + DefaultAttrsIntrinsic<[llvm_v8i64_ty], + [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, + llvm_i32_ty], + [IntrNoMem, ImmArg>]>; } // vp2intersect let TargetPrefix = "x86" in { def int_x86_avx512_vp2intersect_q_512 : - Intrinsic<[llvm_v8i1_ty, llvm_v8i1_ty], - [llvm_v8i64_ty, llvm_v8i64_ty], - [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v8i1_ty, llvm_v8i1_ty], + [llvm_v8i64_ty, llvm_v8i64_ty], + [IntrNoMem]>; def int_x86_avx512_vp2intersect_q_256 : - Intrinsic<[llvm_v4i1_ty, llvm_v4i1_ty], - [llvm_v4i64_ty, llvm_v4i64_ty], - [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v4i1_ty, llvm_v4i1_ty], + [llvm_v4i64_ty, llvm_v4i64_ty], + [IntrNoMem]>; def int_x86_avx512_vp2intersect_q_128 : - Intrinsic<[llvm_v2i1_ty, llvm_v2i1_ty], - [llvm_v2i64_ty, llvm_v2i64_ty], - [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v2i1_ty, llvm_v2i1_ty], + [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; def int_x86_avx512_vp2intersect_d_512 : - Intrinsic<[llvm_v16i1_ty, llvm_v16i1_ty], - [llvm_v16i32_ty, llvm_v16i32_ty], - [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v16i1_ty, llvm_v16i1_ty], + [llvm_v16i32_ty, llvm_v16i32_ty], + [IntrNoMem]>; def int_x86_avx512_vp2intersect_d_256 : - Intrinsic<[llvm_v8i1_ty, llvm_v8i1_ty], - [llvm_v8i32_ty, llvm_v8i32_ty], - [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v8i1_ty, llvm_v8i1_ty], + [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; def int_x86_avx512_vp2intersect_d_128 : - Intrinsic<[llvm_v4i1_ty, llvm_v4i1_ty], - [llvm_v4i32_ty, llvm_v4i32_ty], - [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v4i1_ty, llvm_v4i1_ty], + [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; } // Misc. @@ -5009,57 +5017,70 @@ let TargetPrefix = "x86" in { // NOTE: These comparison intrinsics are not used by clang as long as the // distinction in signaling behaviour is not implemented. def int_x86_avx512_mask_cmp_ps_512 : - Intrinsic<[llvm_v16i1_ty], [llvm_v16f32_ty, llvm_v16f32_ty, - llvm_i32_ty, llvm_v16i1_ty, llvm_i32_ty], - [IntrNoMem, ImmArg>, ImmArg>]>; + DefaultAttrsIntrinsic<[llvm_v16i1_ty], [llvm_v16f32_ty, llvm_v16f32_ty, + llvm_i32_ty, llvm_v16i1_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>, + ImmArg>]>; def int_x86_avx512_mask_cmp_pd_512 : - Intrinsic<[llvm_v8i1_ty], [llvm_v8f64_ty, llvm_v8f64_ty, - llvm_i32_ty, llvm_v8i1_ty, llvm_i32_ty], - [IntrNoMem, ImmArg>, ImmArg>]>; + DefaultAttrsIntrinsic<[llvm_v8i1_ty], [llvm_v8f64_ty, llvm_v8f64_ty, + llvm_i32_ty, llvm_v8i1_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>, + ImmArg>]>; def int_x86_avx512_mask_cmp_ps_256 : - Intrinsic<[llvm_v8i1_ty], [llvm_v8f32_ty, llvm_v8f32_ty, - llvm_i32_ty, llvm_v8i1_ty], [IntrNoMem, ImmArg>]>; + DefaultAttrsIntrinsic<[llvm_v8i1_ty], [llvm_v8f32_ty, llvm_v8f32_ty, + llvm_i32_ty, llvm_v8i1_ty], + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cmp_pd_256 : - Intrinsic<[llvm_v4i1_ty], [llvm_v4f64_ty, llvm_v4f64_ty, - llvm_i32_ty, llvm_v4i1_ty], [IntrNoMem, ImmArg>]>; + DefaultAttrsIntrinsic<[llvm_v4i1_ty], [llvm_v4f64_ty, llvm_v4f64_ty, + llvm_i32_ty, llvm_v4i1_ty], + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cmp_ps_128 : - Intrinsic<[llvm_v4i1_ty], [llvm_v4f32_ty, llvm_v4f32_ty, - llvm_i32_ty, llvm_v4i1_ty], [IntrNoMem, ImmArg>]>; + DefaultAttrsIntrinsic<[llvm_v4i1_ty], [llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i32_ty, llvm_v4i1_ty], + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cmp_pd_128 : - Intrinsic<[llvm_v2i1_ty], [llvm_v2f64_ty, llvm_v2f64_ty, - llvm_i32_ty, llvm_v2i1_ty], [IntrNoMem, ImmArg>]>; + DefaultAttrsIntrinsic<[llvm_v2i1_ty], [llvm_v2f64_ty, llvm_v2f64_ty, + llvm_i32_ty, llvm_v2i1_ty], + [IntrNoMem, ImmArg>]>; def int_x86_avx512_mask_cmp_ss : - ClangBuiltin<"__builtin_ia32_cmpss_mask">, - Intrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_v4f32_ty, - llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg>, ImmArg>]>; + ClangBuiltin<"__builtin_ia32_cmpss_mask">, + DefaultAttrsIntrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>, + ImmArg>]>; def int_x86_avx512_mask_cmp_sd : - ClangBuiltin<"__builtin_ia32_cmpsd_mask">, - Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_v2f64_ty, - llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], - [IntrNoMem, ImmArg>, ImmArg>]>; + ClangBuiltin<"__builtin_ia32_cmpsd_mask">, + DefaultAttrsIntrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_v2f64_ty, + llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>, + ImmArg>]>; } //===----------------------------------------------------------------------===// // SHA intrinsics let TargetPrefix = "x86" in { def int_x86_sha1rnds4 : ClangBuiltin<"__builtin_ia32_sha1rnds4">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], - [IntrNoMem, ImmArg>]>; + DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, + llvm_i8_ty], [IntrNoMem, ImmArg>]>; def int_x86_sha1nexte : ClangBuiltin<"__builtin_ia32_sha1nexte">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; def int_x86_sha1msg1 : ClangBuiltin<"__builtin_ia32_sha1msg1">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; def int_x86_sha1msg2 : ClangBuiltin<"__builtin_ia32_sha1msg2">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; def int_x86_sha256rnds2 : ClangBuiltin<"__builtin_ia32_sha256rnds2">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], - [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, + llvm_v4i32_ty], [IntrNoMem]>; def int_x86_sha256msg1 : ClangBuiltin<"__builtin_ia32_sha256msg1">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; def int_x86_sha256msg2 : ClangBuiltin<"__builtin_ia32_sha256msg2">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; } //===----------------------------------------------------------------------===// @@ -5144,40 +5165,43 @@ let TargetPrefix = "x86" in { let TargetPrefix = "x86" in { def int_x86_avx512bf16_cvtne2ps2bf16_128: - ClangBuiltin<"__builtin_ia32_cvtne2ps2bf16_128">, - Intrinsic<[llvm_v8bf16_ty], [llvm_v4f32_ty, llvm_v4f32_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_cvtne2ps2bf16_128">, + DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; def int_x86_avx512bf16_cvtne2ps2bf16_256: - ClangBuiltin<"__builtin_ia32_cvtne2ps2bf16_256">, - Intrinsic<[llvm_v16bf16_ty], [llvm_v8f32_ty, llvm_v8f32_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_cvtne2ps2bf16_256">, + DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v8f32_ty, llvm_v8f32_ty], + [IntrNoMem]>; def int_x86_avx512bf16_cvtne2ps2bf16_512: - ClangBuiltin<"__builtin_ia32_cvtne2ps2bf16_512">, - Intrinsic<[llvm_v32bf16_ty], [llvm_v16f32_ty, llvm_v16f32_ty], - [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_cvtne2ps2bf16_512">, + DefaultAttrsIntrinsic<[llvm_v32bf16_ty], [llvm_v16f32_ty, llvm_v16f32_ty], + [IntrNoMem]>; // Intrinsic must be masked due to it producing less than 128 bits of results. def int_x86_avx512bf16_mask_cvtneps2bf16_128: - Intrinsic<[llvm_v8bf16_ty], - [llvm_v4f32_ty, llvm_v8bf16_ty, llvm_v4i1_ty], - [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v8bf16_ty], + [llvm_v4f32_ty, llvm_v8bf16_ty, llvm_v4i1_ty], + [IntrNoMem]>; def int_x86_avx512bf16_cvtneps2bf16_256: - ClangBuiltin<"__builtin_ia32_cvtneps2bf16_256">, - Intrinsic<[llvm_v8bf16_ty], [llvm_v8f32_ty], [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_cvtneps2bf16_256">, + DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8f32_ty], [IntrNoMem]>; def int_x86_avx512bf16_cvtneps2bf16_512: - ClangBuiltin<"__builtin_ia32_cvtneps2bf16_512">, - Intrinsic<[llvm_v16bf16_ty], [llvm_v16f32_ty], [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_cvtneps2bf16_512">, + DefaultAttrsIntrinsic<[llvm_v16bf16_ty], [llvm_v16f32_ty], [IntrNoMem]>; def int_x86_avx512bf16_dpbf16ps_128: - ClangBuiltin<"__builtin_ia32_dpbf16ps_128">, - Intrinsic<[llvm_v4f32_ty], - [llvm_v4f32_ty, llvm_v8bf16_ty, llvm_v8bf16_ty], [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_dpbf16ps_128">, + DefaultAttrsIntrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v8bf16_ty, llvm_v8bf16_ty], + [IntrNoMem]>; def int_x86_avx512bf16_dpbf16ps_256: - ClangBuiltin<"__builtin_ia32_dpbf16ps_256">, - Intrinsic<[llvm_v8f32_ty], - [llvm_v8f32_ty, llvm_v16bf16_ty, llvm_v16bf16_ty], [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_dpbf16ps_256">, + DefaultAttrsIntrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v16bf16_ty, llvm_v16bf16_ty], + [IntrNoMem]>; def int_x86_avx512bf16_dpbf16ps_512: - ClangBuiltin<"__builtin_ia32_dpbf16ps_512">, - Intrinsic<[llvm_v16f32_ty], - [llvm_v16f32_ty, llvm_v32bf16_ty, llvm_v32bf16_ty], [IntrNoMem]>; + ClangBuiltin<"__builtin_ia32_dpbf16ps_512">, + DefaultAttrsIntrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_v32bf16_ty, llvm_v32bf16_ty], + [IntrNoMem]>; } //===----------------------------------------------------------------------===// @@ -5301,6 +5325,11 @@ let TargetPrefix = "x86" in { Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty], [ImmArg>, ImmArg>, ImmArg>]>; + // AMX-FP16 - Intel FP16 AMX extensions + def int_x86_tdpfp16ps : ClangBuiltin<"__builtin_ia32_tdpfp16ps">, + Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty], + [ImmArg>, ImmArg>, + ImmArg>]>; // AMX - internal intrinsics def int_x86_ldtilecfg_internal : ClangBuiltin<"__builtin_ia32_tile_loadconfig_internal">, @@ -5354,12 +5383,14 @@ let TargetPrefix = "x86" in { llvm_x86amx_ty, llvm_x86amx_ty, llvm_x86amx_ty], []>; def int_x86_cast_vector_to_tile: - Intrinsic<[llvm_x86amx_ty], [llvm_anyvector_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_x86amx_ty], [llvm_anyvector_ty], [IntrNoMem]>; def int_x86_cast_tile_to_vector: - Intrinsic<[llvm_anyvector_ty], [llvm_x86amx_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_x86amx_ty], [IntrNoMem]>; } +//===----------------------------------------------------------------------===// let TargetPrefix = "x86" in { +// CMPCCXADD def int_x86_cmpccxadd32 : ClangBuiltin<"__builtin_ia32_cmpccxadd32">, Intrinsic<[llvm_i32_ty], @@ -5371,62 +5402,90 @@ def int_x86_cmpccxadd64 Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], [IntrArgMemOnly, ImmArg>]>; -} -//===----------------------------------------------------------------------===// -let TargetPrefix = "x86" in { -// AMX_FP16 - Intel FP16 AMX extensions - def int_x86_tdpfp16ps : ClangBuiltin<"__builtin_ia32_tdpfp16ps">, - Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty], - [ImmArg>, - ImmArg>, ImmArg>]>; -def int_x86_vbcstnebf162ps128 : ClangBuiltin<"__builtin_ia32_vbcstnebf162ps128">, - Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; -def int_x86_vbcstnebf162ps256 : ClangBuiltin<"__builtin_ia32_vbcstnebf162ps256">, - Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; -def int_x86_vbcstnesh2ps128 : ClangBuiltin<"__builtin_ia32_vbcstnesh2ps128">, - Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; -def int_x86_vbcstnesh2ps256 : ClangBuiltin<"__builtin_ia32_vbcstnesh2ps256">, - Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; -def int_x86_vcvtneebf162ps128 : ClangBuiltin<"__builtin_ia32_vcvtneebf162ps128">, - Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; -def int_x86_vcvtneebf162ps256 : ClangBuiltin<"__builtin_ia32_vcvtneebf162ps256">, - Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; -def int_x86_vcvtneeph2ps128 : ClangBuiltin<"__builtin_ia32_vcvtneeph2ps128">, - Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; -def int_x86_vcvtneeph2ps256 : ClangBuiltin<"__builtin_ia32_vcvtneeph2ps256">, - Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; -def int_x86_vcvtneobf162ps128 : ClangBuiltin<"__builtin_ia32_vcvtneobf162ps128">, - Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; -def int_x86_vcvtneobf162ps256 : ClangBuiltin<"__builtin_ia32_vcvtneobf162ps256">, - Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; -def int_x86_vcvtneoph2ps128 : ClangBuiltin<"__builtin_ia32_vcvtneoph2ps128">, - Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; -def int_x86_vcvtneoph2ps256 : ClangBuiltin<"__builtin_ia32_vcvtneoph2ps256">, - Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>; -def int_x86_vcvtneps2bf16128 : ClangBuiltin<"__builtin_ia32_vcvtneps2bf16128">, - Intrinsic<[llvm_v8bf16_ty], [llvm_v4f32_ty], [ IntrNoMem ]>; -def int_x86_vcvtneps2bf16256 : ClangBuiltin<"__builtin_ia32_vcvtneps2bf16256">, - Intrinsic<[llvm_v8bf16_ty], [llvm_v8f32_ty], [ IntrNoMem ]>; + +// AVX-NE-CONVERT +def int_x86_vbcstnebf162ps128 + : ClangBuiltin<"__builtin_ia32_vbcstnebf162ps128">, + DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_x86_vbcstnebf162ps256 + : ClangBuiltin<"__builtin_ia32_vbcstnebf162ps256">, + DefaultAttrsIntrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_x86_vbcstnesh2ps128 + : ClangBuiltin<"__builtin_ia32_vbcstnesh2ps128">, + DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_x86_vbcstnesh2ps256 + : ClangBuiltin<"__builtin_ia32_vbcstnesh2ps256">, + DefaultAttrsIntrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_x86_vcvtneebf162ps128 + : ClangBuiltin<"__builtin_ia32_vcvtneebf162ps128">, + DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_x86_vcvtneebf162ps256 + : ClangBuiltin<"__builtin_ia32_vcvtneebf162ps256">, + DefaultAttrsIntrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_x86_vcvtneeph2ps128 + : ClangBuiltin<"__builtin_ia32_vcvtneeph2ps128">, + DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_x86_vcvtneeph2ps256 + : ClangBuiltin<"__builtin_ia32_vcvtneeph2ps256">, + DefaultAttrsIntrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_x86_vcvtneobf162ps128 + : ClangBuiltin<"__builtin_ia32_vcvtneobf162ps128">, + DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_x86_vcvtneobf162ps256 + : ClangBuiltin<"__builtin_ia32_vcvtneobf162ps256">, + DefaultAttrsIntrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_x86_vcvtneoph2ps128 + : ClangBuiltin<"__builtin_ia32_vcvtneoph2ps128">, + DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_x86_vcvtneoph2ps256 + : ClangBuiltin<"__builtin_ia32_vcvtneoph2ps256">, + DefaultAttrsIntrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_x86_vcvtneps2bf16128 + : ClangBuiltin<"__builtin_ia32_vcvtneps2bf16128">, + DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v4f32_ty], [IntrNoMem]>; +def int_x86_vcvtneps2bf16256 + : ClangBuiltin<"__builtin_ia32_vcvtneps2bf16256">, + DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8f32_ty], [IntrNoMem]>; } //===----------------------------------------------------------------------===// // RAO-INT intrinsics let TargetPrefix = "x86" in { - def int_x86_aadd32 : ClangBuiltin<"__builtin_ia32_aadd32">, - Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], []>; - def int_x86_aadd64 : ClangBuiltin<"__builtin_ia32_aadd64">, - Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], []>; - def int_x86_aand32 : ClangBuiltin<"__builtin_ia32_aand32">, - Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], []>; - def int_x86_aand64 : ClangBuiltin<"__builtin_ia32_aand64">, - Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], []>; - def int_x86_aor32 : ClangBuiltin<"__builtin_ia32_aor32">, - Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], []>; - def int_x86_aor64 : ClangBuiltin<"__builtin_ia32_aor64">, - Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], []>; - def int_x86_axor32 : ClangBuiltin<"__builtin_ia32_axor32">, - Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], []>; - def int_x86_axor64 : ClangBuiltin<"__builtin_ia32_axor64">, - Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], []>; + def int_x86_aadd32 + : ClangBuiltin<"__builtin_ia32_aadd32">, + Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrArgMemOnly]>; + def int_x86_aadd64 + : ClangBuiltin<"__builtin_ia32_aadd64">, + Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], [IntrArgMemOnly]>; + def int_x86_aand32 + : ClangBuiltin<"__builtin_ia32_aand32">, + Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrArgMemOnly]>; + def int_x86_aand64 + : ClangBuiltin<"__builtin_ia32_aand64">, + Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], [IntrArgMemOnly]>; + def int_x86_aor32 + : ClangBuiltin<"__builtin_ia32_aor32">, + Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrArgMemOnly]>; + def int_x86_aor64 + : ClangBuiltin<"__builtin_ia32_aor64">, + Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], [IntrArgMemOnly]>; + def int_x86_axor32 + : ClangBuiltin<"__builtin_ia32_axor32">, + Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrArgMemOnly]>; + def int_x86_axor64 + : ClangBuiltin<"__builtin_ia32_axor64">, + Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], [IntrArgMemOnly]>; } //===----------------------------------------------------------------------===// @@ -5448,751 +5507,848 @@ let TargetPrefix = "x86" in { let TargetPrefix = "x86" in { def int_x86_avx512fp16_add_ph_512 : ClangBuiltin<"__builtin_ia32_addph512">, - Intrinsic<[ llvm_v32f16_ty ], - [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_sub_ph_512 : ClangBuiltin<"__builtin_ia32_subph512">, - Intrinsic<[ llvm_v32f16_ty ], - [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mul_ph_512 : ClangBuiltin<"__builtin_ia32_mulph512">, - Intrinsic<[ llvm_v32f16_ty ], - [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_div_ph_512 : ClangBuiltin<"__builtin_ia32_divph512">, - Intrinsic<[ llvm_v32f16_ty ], - [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_max_ph_128 : ClangBuiltin<"__builtin_ia32_maxph128">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_max_ph_256 : ClangBuiltin<"__builtin_ia32_maxph256">, - Intrinsic<[ llvm_v16f16_ty ], - [ llvm_v16f16_ty, llvm_v16f16_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v16f16_ty ], + [ llvm_v16f16_ty, llvm_v16f16_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_max_ph_512 : ClangBuiltin<"__builtin_ia32_maxph512">, - Intrinsic<[ llvm_v32f16_ty ], - [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_min_ph_128 : ClangBuiltin<"__builtin_ia32_minph128">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty ], [ IntrNoMem ]>; def int_x86_avx512fp16_min_ph_256 : ClangBuiltin<"__builtin_ia32_minph256">, - Intrinsic<[ llvm_v16f16_ty ], - [ llvm_v16f16_ty, llvm_v16f16_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v16f16_ty ], + [ llvm_v16f16_ty, llvm_v16f16_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_min_ph_512 : ClangBuiltin<"__builtin_ia32_minph512">, - Intrinsic<[ llvm_v32f16_ty ], - [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_cmp_ph_512 - : Intrinsic<[ llvm_v32i1_ty ], - [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty, llvm_v32i1_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg>, ImmArg> ]>; + : DefaultAttrsIntrinsic<[ llvm_v32i1_ty ], + [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty, + llvm_v32i1_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg>, ImmArg> ]>; def int_x86_avx512fp16_mask_cmp_ph_256 - : Intrinsic<[ llvm_v16i1_ty ], - [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_i32_ty, llvm_v16i1_ty ], - [ IntrNoMem, ImmArg> ]>; + : DefaultAttrsIntrinsic<[ llvm_v16i1_ty ], + [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_i32_ty, + llvm_v16i1_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_cmp_ph_128 - : Intrinsic<[ llvm_v8i1_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, llvm_v8i1_ty ], - [ IntrNoMem, ImmArg> ]>; + : DefaultAttrsIntrinsic<[ llvm_v8i1_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, + llvm_v8i1_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_add_sh_round : ClangBuiltin<"__builtin_ia32_addsh_round_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_sub_sh_round : ClangBuiltin<"__builtin_ia32_subsh_round_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_mul_sh_round : ClangBuiltin<"__builtin_ia32_mulsh_round_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_div_sh_round : ClangBuiltin<"__builtin_ia32_divsh_round_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_min_sh_round : ClangBuiltin<"__builtin_ia32_minsh_round_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_max_sh_round : ClangBuiltin<"__builtin_ia32_maxsh_round_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_cmp_sh : ClangBuiltin<"__builtin_ia32_cmpsh_mask">, - Intrinsic<[ llvm_i8_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg>, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_i8_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg>, + ImmArg> ]>; def int_x86_avx512fp16_vcomi_sh : ClangBuiltin<"__builtin_ia32_vcomish">, - Intrinsic<[ llvm_i32_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg>, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_i32_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg>, + ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtph2psx_128 : ClangBuiltin<"__builtin_ia32_vcvtph2psx128_mask">, - Intrinsic<[ llvm_v4f32_ty ], - [ llvm_v8f16_ty, llvm_v4f32_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v4f32_ty ], + [ llvm_v8f16_ty, llvm_v4f32_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2psx_256 : ClangBuiltin<"__builtin_ia32_vcvtph2psx256_mask">, - Intrinsic<[ llvm_v8f32_ty ], - [ llvm_v8f16_ty, llvm_v8f32_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f32_ty ], + [ llvm_v8f16_ty, llvm_v8f32_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2psx_512 : ClangBuiltin<"__builtin_ia32_vcvtph2psx512_mask">, - Intrinsic<[ llvm_v16f32_ty ], - [ llvm_v16f16_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v16f32_ty ], + [ llvm_v16f16_ty, llvm_v16f32_ty, llvm_i16_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtps2phx_128 : ClangBuiltin<"__builtin_ia32_vcvtps2phx128_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v4f32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v4f32_ty, llvm_v8f16_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtps2phx_256 : ClangBuiltin<"__builtin_ia32_vcvtps2phx256_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f32_ty, llvm_v8f16_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtps2phx_512 : ClangBuiltin<"__builtin_ia32_vcvtps2phx512_mask">, - Intrinsic<[ llvm_v16f16_ty ], - [ llvm_v16f32_ty, llvm_v16f16_ty, llvm_i16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v16f16_ty ], + [ llvm_v16f32_ty, llvm_v16f16_ty, llvm_i16_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtpd2ph_128 : ClangBuiltin<"__builtin_ia32_vcvtpd2ph128_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v2f64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v2f64_ty, llvm_v8f16_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtpd2ph_256 : ClangBuiltin<"__builtin_ia32_vcvtpd2ph256_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v4f64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v4f64_ty, llvm_v8f16_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtpd2ph_512 : ClangBuiltin<"__builtin_ia32_vcvtpd2ph512_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f64_ty, llvm_v8f16_ty, llvm_i8_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f64_ty, llvm_v8f16_ty, llvm_i8_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtph2pd_128 : ClangBuiltin<"__builtin_ia32_vcvtph2pd128_mask">, - Intrinsic<[ llvm_v2f64_ty ], - [ llvm_v8f16_ty, llvm_v2f64_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v2f64_ty ], + [ llvm_v8f16_ty, llvm_v2f64_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2pd_256 : ClangBuiltin<"__builtin_ia32_vcvtph2pd256_mask">, - Intrinsic<[ llvm_v4f64_ty ], - [ llvm_v8f16_ty, llvm_v4f64_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v4f64_ty ], + [ llvm_v8f16_ty, llvm_v4f64_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2pd_512 : ClangBuiltin<"__builtin_ia32_vcvtph2pd512_mask">, - Intrinsic<[ llvm_v8f64_ty ], - [ llvm_v8f16_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f64_ty ], + [ llvm_v8f16_ty, llvm_v8f64_ty, llvm_i8_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtsh2ss_round : ClangBuiltin<"__builtin_ia32_vcvtsh2ss_round_mask">, - Intrinsic<[ llvm_v4f32_ty ], - [ llvm_v4f32_ty, llvm_v8f16_ty, llvm_v4f32_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v4f32_ty ], + [ llvm_v4f32_ty, llvm_v8f16_ty, llvm_v4f32_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtss2sh_round : ClangBuiltin<"__builtin_ia32_vcvtss2sh_round_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v4f32_ty, llvm_v8f16_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v4f32_ty, llvm_v8f16_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtsd2sh_round : ClangBuiltin<"__builtin_ia32_vcvtsd2sh_round_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v2f64_ty, llvm_v8f16_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v2f64_ty, llvm_v8f16_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtsh2sd_round : ClangBuiltin<"__builtin_ia32_vcvtsh2sd_round_mask">, - Intrinsic<[ llvm_v2f64_ty ], - [ llvm_v2f64_ty, llvm_v8f16_ty, llvm_v2f64_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v2f64_ty ], + [ llvm_v2f64_ty, llvm_v8f16_ty, llvm_v2f64_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtph2w_128 : ClangBuiltin<"__builtin_ia32_vcvtph2w128_mask">, - Intrinsic<[ llvm_v8i16_ty ], - [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8i16_ty ], + [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2w_256 : ClangBuiltin<"__builtin_ia32_vcvtph2w256_mask">, - Intrinsic<[ llvm_v16i16_ty ], - [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v16i16_ty ], + [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2w_512 : ClangBuiltin<"__builtin_ia32_vcvtph2w512_mask">, - Intrinsic<[ llvm_v32i16_ty ], - [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v32i16_ty ], + [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvttph2w_128 : ClangBuiltin<"__builtin_ia32_vcvttph2w128_mask">, - Intrinsic<[ llvm_v8i16_ty ], - [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8i16_ty ], + [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvttph2w_256 : ClangBuiltin<"__builtin_ia32_vcvttph2w256_mask">, - Intrinsic<[ llvm_v16i16_ty ], - [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v16i16_ty ], + [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvttph2w_512 : ClangBuiltin<"__builtin_ia32_vcvttph2w512_mask">, - Intrinsic<[ llvm_v32i16_ty ], - [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v32i16_ty ], + [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtph2uw_128 : ClangBuiltin<"__builtin_ia32_vcvtph2uw128_mask">, - Intrinsic<[ llvm_v8i16_ty ], - [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8i16_ty ], + [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2uw_256 : ClangBuiltin<"__builtin_ia32_vcvtph2uw256_mask">, - Intrinsic<[ llvm_v16i16_ty ], - [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v16i16_ty ], + [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2uw_512 : ClangBuiltin<"__builtin_ia32_vcvtph2uw512_mask">, - Intrinsic<[ llvm_v32i16_ty ], - [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v32i16_ty ], + [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvttph2uw_128 : ClangBuiltin<"__builtin_ia32_vcvttph2uw128_mask">, - Intrinsic<[ llvm_v8i16_ty ], - [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8i16_ty ], + [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvttph2uw_256 : ClangBuiltin<"__builtin_ia32_vcvttph2uw256_mask">, - Intrinsic<[ llvm_v16i16_ty ], - [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v16i16_ty ], + [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvttph2uw_512 : ClangBuiltin<"__builtin_ia32_vcvttph2uw512_mask">, - Intrinsic<[ llvm_v32i16_ty ], - [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v32i16_ty ], + [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtph2dq_128 : ClangBuiltin<"__builtin_ia32_vcvtph2dq128_mask">, - Intrinsic<[ llvm_v4i32_ty ], - [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v4i32_ty ], + [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2dq_256 : ClangBuiltin<"__builtin_ia32_vcvtph2dq256_mask">, - Intrinsic<[ llvm_v8i32_ty ], - [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8i32_ty ], + [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2dq_512 : ClangBuiltin<"__builtin_ia32_vcvtph2dq512_mask">, - Intrinsic<[ llvm_v16i32_ty ], - [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v16i32_ty ], + [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtph2udq_128 : ClangBuiltin<"__builtin_ia32_vcvtph2udq128_mask">, - Intrinsic<[ llvm_v4i32_ty ], - [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v4i32_ty ], + [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2udq_256 : ClangBuiltin<"__builtin_ia32_vcvtph2udq256_mask">, - Intrinsic<[ llvm_v8i32_ty ], - [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8i32_ty ], + [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2udq_512 : ClangBuiltin<"__builtin_ia32_vcvtph2udq512_mask">, - Intrinsic<[ llvm_v16i32_ty ], - [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v16i32_ty ], + [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtdq2ph_128 : ClangBuiltin<"__builtin_ia32_vcvtdq2ph128_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v4i32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v4i32_ty, llvm_v8f16_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtudq2ph_128 : ClangBuiltin<"__builtin_ia32_vcvtudq2ph128_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v4i32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v4i32_ty, llvm_v8f16_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvttph2dq_128 : ClangBuiltin<"__builtin_ia32_vcvttph2dq128_mask">, - Intrinsic<[ llvm_v4i32_ty ], - [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v4i32_ty ], + [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvttph2dq_256 : ClangBuiltin<"__builtin_ia32_vcvttph2dq256_mask">, - Intrinsic<[ llvm_v8i32_ty ], - [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8i32_ty ], + [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvttph2dq_512 : ClangBuiltin<"__builtin_ia32_vcvttph2dq512_mask">, - Intrinsic<[ llvm_v16i32_ty ], - [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v16i32_ty ], + [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvttph2udq_128 : ClangBuiltin<"__builtin_ia32_vcvttph2udq128_mask">, - Intrinsic<[ llvm_v4i32_ty ], - [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v4i32_ty ], + [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvttph2udq_256 : ClangBuiltin<"__builtin_ia32_vcvttph2udq256_mask">, - Intrinsic<[ llvm_v8i32_ty ], - [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8i32_ty ], + [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvttph2udq_512 : ClangBuiltin<"__builtin_ia32_vcvttph2udq512_mask">, - Intrinsic<[ llvm_v16i32_ty ], - [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v16i32_ty ], + [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtqq2ph_128 : ClangBuiltin<"__builtin_ia32_vcvtqq2ph128_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v2i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v2i64_ty, llvm_v8f16_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtqq2ph_256 : ClangBuiltin<"__builtin_ia32_vcvtqq2ph256_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v4i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v4i64_ty, llvm_v8f16_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2qq_128 : ClangBuiltin<"__builtin_ia32_vcvtph2qq128_mask">, - Intrinsic<[ llvm_v2i64_ty ], - [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v2i64_ty ], + [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2qq_256 : ClangBuiltin<"__builtin_ia32_vcvtph2qq256_mask">, - Intrinsic<[ llvm_v4i64_ty ], - [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v4i64_ty ], + [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2qq_512 : ClangBuiltin<"__builtin_ia32_vcvtph2qq512_mask">, - Intrinsic<[ llvm_v8i64_ty ], - [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8i64_ty ], + [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvtuqq2ph_128 : ClangBuiltin<"__builtin_ia32_vcvtuqq2ph128_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v2i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v2i64_ty, llvm_v8f16_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtuqq2ph_256 : ClangBuiltin<"__builtin_ia32_vcvtuqq2ph256_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v4i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v4i64_ty, llvm_v8f16_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2uqq_128 : ClangBuiltin<"__builtin_ia32_vcvtph2uqq128_mask">, - Intrinsic<[ llvm_v2i64_ty ], - [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v2i64_ty ], + [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2uqq_256 : ClangBuiltin<"__builtin_ia32_vcvtph2uqq256_mask">, - Intrinsic<[ llvm_v4i64_ty ], - [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v4i64_ty ], + [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvtph2uqq_512 : ClangBuiltin<"__builtin_ia32_vcvtph2uqq512_mask">, - Intrinsic<[ llvm_v8i64_ty ], - [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8i64_ty ], + [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvttph2qq_128 : ClangBuiltin<"__builtin_ia32_vcvttph2qq128_mask">, - Intrinsic<[ llvm_v2i64_ty ], - [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v2i64_ty ], + [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvttph2qq_256 : ClangBuiltin<"__builtin_ia32_vcvttph2qq256_mask">, - Intrinsic<[ llvm_v4i64_ty ], - [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v4i64_ty ], + [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvttph2qq_512 : ClangBuiltin<"__builtin_ia32_vcvttph2qq512_mask">, - Intrinsic<[ llvm_v8i64_ty ], - [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8i64_ty ], + [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vcvttph2uqq_128 : ClangBuiltin<"__builtin_ia32_vcvttph2uqq128_mask">, - Intrinsic<[ llvm_v2i64_ty ], - [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v2i64_ty ], + [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvttph2uqq_256 : ClangBuiltin<"__builtin_ia32_vcvttph2uqq256_mask">, - Intrinsic<[ llvm_v4i64_ty ], - [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v4i64_ty ], + [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vcvttph2uqq_512 : ClangBuiltin<"__builtin_ia32_vcvttph2uqq512_mask">, - Intrinsic<[ llvm_v8i64_ty ], - [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8i64_ty ], + [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vcvtsh2si32 : ClangBuiltin<"__builtin_ia32_vcvtsh2si32">, - Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vcvtsh2usi32 : ClangBuiltin<"__builtin_ia32_vcvtsh2usi32">, - Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vcvtsh2si64 : ClangBuiltin<"__builtin_ia32_vcvtsh2si64">, - Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vcvtsh2usi64 : ClangBuiltin<"__builtin_ia32_vcvtsh2usi64">, - Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vcvtusi2sh : ClangBuiltin<"__builtin_ia32_vcvtusi2sh">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_i32_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_i32_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vcvtusi642sh : ClangBuiltin<"__builtin_ia32_vcvtusi642sh">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_i64_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_i64_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vcvtsi2sh : ClangBuiltin<"__builtin_ia32_vcvtsi2sh">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_i32_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_i32_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vcvtsi642sh : ClangBuiltin<"__builtin_ia32_vcvtsi642sh">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_i64_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_i64_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vcvttsh2si32 : ClangBuiltin<"__builtin_ia32_vcvttsh2si32">, - Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vcvttsh2si64 : ClangBuiltin<"__builtin_ia32_vcvttsh2si64">, - Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vcvttsh2usi32 : ClangBuiltin<"__builtin_ia32_vcvttsh2usi32">, - Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vcvttsh2usi64 : ClangBuiltin<"__builtin_ia32_vcvttsh2usi64">, - Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_sqrt_ph_512 - : Intrinsic<[ llvm_v32f16_ty ], [ llvm_v32f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + : DefaultAttrsIntrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_sqrt_sh - : Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + : DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_rsqrt_ph_128 : ClangBuiltin<"__builtin_ia32_rsqrtph128_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_rsqrt_ph_256 : ClangBuiltin<"__builtin_ia32_rsqrtph256_mask">, - Intrinsic<[ llvm_v16f16_ty ], - [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v16f16_ty ], + [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_rsqrt_ph_512 : ClangBuiltin<"__builtin_ia32_rsqrtph512_mask">, - Intrinsic<[ llvm_v32f16_ty ], - [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_rsqrt_sh : ClangBuiltin<"__builtin_ia32_rsqrtsh_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, + llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_rcp_ph_128 : ClangBuiltin<"__builtin_ia32_rcpph128_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_rcp_ph_256 : ClangBuiltin<"__builtin_ia32_rcpph256_mask">, - Intrinsic<[ llvm_v16f16_ty ], - [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v16f16_ty ], + [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_rcp_ph_512 : ClangBuiltin<"__builtin_ia32_rcpph512_mask">, - Intrinsic<[ llvm_v32f16_ty ], - [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_rcp_sh : ClangBuiltin<"__builtin_ia32_rcpsh_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, + llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_reduce_ph_128 : ClangBuiltin<"__builtin_ia32_reduceph128_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_i32_ty, llvm_v8f16_ty, llvm_i8_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_i32_ty, llvm_v8f16_ty, + llvm_i8_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_reduce_ph_256 : ClangBuiltin<"__builtin_ia32_reduceph256_mask">, - Intrinsic<[ llvm_v16f16_ty ], - [ llvm_v16f16_ty, llvm_i32_ty, llvm_v16f16_ty, llvm_i16_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v16f16_ty ], + [ llvm_v16f16_ty, llvm_i32_ty, llvm_v16f16_ty, + llvm_i16_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_reduce_ph_512 : ClangBuiltin<"__builtin_ia32_reduceph512_mask">, - Intrinsic<[ llvm_v32f16_ty ], - [ llvm_v32f16_ty, llvm_i32_ty, llvm_v32f16_ty, llvm_i32_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg>, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_i32_ty, llvm_v32f16_ty, + llvm_i32_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg>, + ImmArg> ]>; def int_x86_avx512fp16_mask_reduce_sh : ClangBuiltin<"__builtin_ia32_reducesh_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, - llvm_i32_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg>, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, + llvm_i8_ty, llvm_i32_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg>, + ImmArg> ]>; def int_x86_avx512fp16_fpclass_ph_128 - : Intrinsic<[ llvm_v8i1_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + : DefaultAttrsIntrinsic<[ llvm_v8i1_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_fpclass_ph_256 - : Intrinsic<[ llvm_v16i1_ty ], [ llvm_v16f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + : DefaultAttrsIntrinsic<[ llvm_v16i1_ty ], + [ llvm_v16f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_fpclass_ph_512 - : Intrinsic<[ llvm_v32i1_ty ], [ llvm_v32f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + : DefaultAttrsIntrinsic<[ llvm_v32i1_ty ], + [ llvm_v32f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_fpclass_sh : ClangBuiltin<"__builtin_ia32_fpclasssh_mask">, - Intrinsic<[ llvm_i8_ty ], [ llvm_v8f16_ty, llvm_i32_ty, llvm_i8_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_i8_ty ], + [ llvm_v8f16_ty, llvm_i32_ty, llvm_i8_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_getexp_ph_128 : ClangBuiltin<"__builtin_ia32_getexpph128_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_getexp_ph_256 : ClangBuiltin<"__builtin_ia32_getexpph256_mask">, - Intrinsic<[ llvm_v16f16_ty ], - [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v16f16_ty ], + [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_getexp_ph_512 : ClangBuiltin<"__builtin_ia32_getexpph512_mask">, - Intrinsic<[ llvm_v32f16_ty ], - [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_getexp_sh : ClangBuiltin<"__builtin_ia32_getexpsh128_round_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_getmant_ph_128 : ClangBuiltin<"__builtin_ia32_getmantph128_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_i32_ty, llvm_v8f16_ty, llvm_i8_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_i32_ty, llvm_v8f16_ty, + llvm_i8_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_getmant_ph_256 : ClangBuiltin<"__builtin_ia32_getmantph256_mask">, - Intrinsic<[ llvm_v16f16_ty ], - [ llvm_v16f16_ty, llvm_i32_ty, llvm_v16f16_ty, llvm_i16_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v16f16_ty ], + [ llvm_v16f16_ty, llvm_i32_ty, llvm_v16f16_ty, + llvm_i16_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_getmant_ph_512 : ClangBuiltin<"__builtin_ia32_getmantph512_mask">, - Intrinsic<[ llvm_v32f16_ty ], - [ llvm_v32f16_ty, llvm_i32_ty, llvm_v32f16_ty, llvm_i32_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg>, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_i32_ty, llvm_v32f16_ty, + llvm_i32_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg>, + ImmArg> ]>; def int_x86_avx512fp16_mask_getmant_sh : ClangBuiltin<"__builtin_ia32_getmantsh_round_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, llvm_v8f16_ty, - llvm_i8_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg>, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, + llvm_v8f16_ty, llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg>, + ImmArg> ]>; def int_x86_avx512fp16_mask_rndscale_ph_128 : ClangBuiltin<"__builtin_ia32_rndscaleph_128_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_i32_ty, llvm_v8f16_ty, llvm_i8_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_i32_ty, llvm_v8f16_ty, + llvm_i8_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_rndscale_ph_256 : ClangBuiltin<"__builtin_ia32_rndscaleph_256_mask">, - Intrinsic<[ llvm_v16f16_ty ], - [ llvm_v16f16_ty, llvm_i32_ty, llvm_v16f16_ty, llvm_i16_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v16f16_ty ], + [ llvm_v16f16_ty, llvm_i32_ty, llvm_v16f16_ty, + llvm_i16_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_rndscale_ph_512 : ClangBuiltin<"__builtin_ia32_rndscaleph_mask">, - Intrinsic<[ llvm_v32f16_ty ], - [ llvm_v32f16_ty, llvm_i32_ty, llvm_v32f16_ty, llvm_i32_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg>, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_i32_ty, llvm_v32f16_ty, + llvm_i32_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg>, + ImmArg> ]>; def int_x86_avx512fp16_mask_rndscale_sh : ClangBuiltin<"__builtin_ia32_rndscalesh_round_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, - llvm_i32_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg>, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, + llvm_i8_ty, llvm_i32_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg>, + ImmArg> ]>; def int_x86_avx512fp16_mask_scalef_ph_128 : ClangBuiltin<"__builtin_ia32_scalefph128_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, + llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_scalef_ph_256 : ClangBuiltin<"__builtin_ia32_scalefph256_mask">, - Intrinsic<[ llvm_v16f16_ty ], - [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v16f16_ty ], + [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_v16f16_ty, + llvm_i16_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_scalef_ph_512 : ClangBuiltin<"__builtin_ia32_scalefph512_mask">, - Intrinsic<[ llvm_v32f16_ty ], - [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_v32f16_ty, + llvm_i32_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_scalef_sh : ClangBuiltin<"__builtin_ia32_scalefsh_round_mask">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vfmadd_ph_512 - : Intrinsic<[ llvm_v32f16_ty ], - [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + : DefaultAttrsIntrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_v32f16_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vfmaddsub_ph_128 : ClangBuiltin<"__builtin_ia32_vfmaddsubph">, - Intrinsic<[ llvm_v8f16_ty ], - [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_vfmaddsub_ph_256 : ClangBuiltin<"__builtin_ia32_vfmaddsubph256">, - Intrinsic<[ llvm_v16f16_ty ], - [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_v16f16_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v16f16_ty ], + [ llvm_v16f16_ty, llvm_v16f16_ty, + llvm_v16f16_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_vfmaddsub_ph_512 - : Intrinsic<[ llvm_v32f16_ty ], - [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + : DefaultAttrsIntrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_v32f16_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_vfmadd_f16 - : Intrinsic<[ llvm_half_ty ], - [ llvm_half_ty, llvm_half_ty, llvm_half_ty, llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + : DefaultAttrsIntrinsic<[ llvm_half_ty ], + [ llvm_half_ty, llvm_half_ty, llvm_half_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vfcmadd_cph_128 : ClangBuiltin<"__builtin_ia32_vfcmaddcph128_mask">, - Intrinsic<[ llvm_v4f32_ty ], - [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v4f32_ty ], + [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_maskz_vfcmadd_cph_128 : ClangBuiltin<"__builtin_ia32_vfcmaddcph128_maskz">, - Intrinsic<[ llvm_v4f32_ty ], - [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v4f32_ty ], + [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vfcmadd_cph_256 : ClangBuiltin<"__builtin_ia32_vfcmaddcph256_mask">, - Intrinsic<[ llvm_v8f32_ty ], - [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f32_ty ], + [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, + llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_maskz_vfcmadd_cph_256 : ClangBuiltin<"__builtin_ia32_vfcmaddcph256_maskz">, - Intrinsic<[ llvm_v8f32_ty ], - [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f32_ty ], + [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, + llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vfcmadd_cph_512 : ClangBuiltin<"__builtin_ia32_vfcmaddcph512_mask3">, - Intrinsic<[ llvm_v16f32_ty ], - [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v16f32_ty ], + [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, + llvm_i16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_maskz_vfcmadd_cph_512 : ClangBuiltin<"__builtin_ia32_vfcmaddcph512_maskz">, - Intrinsic<[ llvm_v16f32_ty ], - [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v16f32_ty ], + [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, + llvm_i16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vfmadd_cph_128 : ClangBuiltin<"__builtin_ia32_vfmaddcph128_mask">, - Intrinsic<[ llvm_v4f32_ty ], - [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v4f32_ty ], + [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_maskz_vfmadd_cph_128 : ClangBuiltin<"__builtin_ia32_vfmaddcph128_maskz">, - Intrinsic<[ llvm_v4f32_ty ], - [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v4f32_ty ], + [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vfmadd_cph_256 : ClangBuiltin<"__builtin_ia32_vfmaddcph256_mask">, - Intrinsic<[ llvm_v8f32_ty ], - [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f32_ty ], + [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, + llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_maskz_vfmadd_cph_256 : ClangBuiltin<"__builtin_ia32_vfmaddcph256_maskz">, - Intrinsic<[ llvm_v8f32_ty ], - [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f32_ty ], + [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, + llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vfmadd_cph_512 : ClangBuiltin<"__builtin_ia32_vfmaddcph512_mask3">, - Intrinsic<[ llvm_v16f32_ty ], - [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v16f32_ty ], + [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, + llvm_i16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_maskz_vfmadd_cph_512 : ClangBuiltin<"__builtin_ia32_vfmaddcph512_maskz">, - Intrinsic<[ llvm_v16f32_ty ], - [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v16f32_ty ], + [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, + llvm_i16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vfmadd_csh : ClangBuiltin<"__builtin_ia32_vfmaddcsh_mask">, - Intrinsic<[ llvm_v4f32_ty ], - [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v4f32_ty ], + [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_maskz_vfmadd_csh : ClangBuiltin<"__builtin_ia32_vfmaddcsh_maskz">, - Intrinsic<[ llvm_v4f32_ty ], - [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v4f32_ty ], + [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vfcmadd_csh : ClangBuiltin<"__builtin_ia32_vfcmaddcsh_mask">, - Intrinsic<[ llvm_v4f32_ty ], - [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v4f32_ty ], + [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_maskz_vfcmadd_csh : ClangBuiltin<"__builtin_ia32_vfcmaddcsh_maskz">, - Intrinsic<[ llvm_v4f32_ty ], - [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v4f32_ty ], + [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vfmul_cph_128 : ClangBuiltin<"__builtin_ia32_vfmulcph128_mask">, - Intrinsic<[ llvm_v4f32_ty ], - [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v4f32_ty ], + [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vfcmul_cph_128 : ClangBuiltin<"__builtin_ia32_vfcmulcph128_mask">, - Intrinsic<[ llvm_v4f32_ty ], - [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v4f32_ty ], + [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vfmul_cph_256 : ClangBuiltin<"__builtin_ia32_vfmulcph256_mask">, - Intrinsic<[ llvm_v8f32_ty ], - [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f32_ty ], + [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, + llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vfcmul_cph_256 : ClangBuiltin<"__builtin_ia32_vfcmulcph256_mask">, - Intrinsic<[ llvm_v8f32_ty ], - [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ], - [ IntrNoMem ]>; + DefaultAttrsIntrinsic<[ llvm_v8f32_ty ], + [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, + llvm_i8_ty ], + [ IntrNoMem ]>; def int_x86_avx512fp16_mask_vfmul_cph_512 : ClangBuiltin<"__builtin_ia32_vfmulcph512_mask">, - Intrinsic<[ llvm_v16f32_ty ], - [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v16f32_ty ], + [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, + llvm_i16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vfcmul_cph_512 : ClangBuiltin<"__builtin_ia32_vfcmulcph512_mask">, - Intrinsic<[ llvm_v16f32_ty ], - [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v16f32_ty ], + [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, + llvm_i16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vfmul_csh : ClangBuiltin<"__builtin_ia32_vfmulcsh_mask">, - Intrinsic<[ llvm_v4f32_ty ], - [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v4f32_ty ], + [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; def int_x86_avx512fp16_mask_vfcmul_csh : ClangBuiltin<"__builtin_ia32_vfcmulcsh_mask">, - Intrinsic<[ llvm_v4f32_ty ], - [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, - llvm_i32_ty ], - [ IntrNoMem, ImmArg> ]>; + DefaultAttrsIntrinsic<[ llvm_v4f32_ty ], + [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg> ]>; } diff --git a/llvm/include/llvm/IR/Metadata.def b/llvm/include/llvm/IR/Metadata.def index bbf349e6b508c..36c34c1d2347c 100644 --- a/llvm/include/llvm/IR/Metadata.def +++ b/llvm/include/llvm/IR/Metadata.def @@ -110,6 +110,7 @@ HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DILocalVariable) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DILabel) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIObjCProperty) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIImportedEntity) +HANDLE_SPECIALIZED_MDNODE_LEAF(DIAssignID) HANDLE_SPECIALIZED_MDNODE_BRANCH(DIMacroNode) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIMacro) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIMacroFile) diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h index 98f6bea054314..f6d22e6e70be6 100644 --- a/llvm/include/llvm/Object/ELFObjectFile.h +++ b/llvm/include/llvm/Object/ELFObjectFile.h @@ -773,6 +773,9 @@ Expected ELFObjectFile::getSymbolFlags(DataRefImpl Sym) const { if (isExportedToOtherDSO(ESym)) Result |= SymbolRef::SF_Exported; + if (ESym->getType() == ELF::STT_GNU_IFUNC) + Result |= SymbolRef::SF_Indirect; + if (ESym->getVisibility() == ELF::STV_HIDDEN) Result |= SymbolRef::SF_Hidden; diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h index 68f07f39afa63..d7d76c30d1a6b 100644 --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -75,6 +75,9 @@ class PipelineTuningOptions { /// false. bool MergeFunctions; + /// Tuning option to override the default inliner threshold. + int InlinerThreshold; + // Experimental option to eagerly invalidate more analyses. This has the // potential to decrease max memory usage in exchange for more compile time. // This may affect codegen due to either passes using analyses only when diff --git a/llvm/include/llvm/Support/AArch64TargetParser.def b/llvm/include/llvm/Support/AArch64TargetParser.def index 2005638e38c3a..e1792a57e35e0 100644 --- a/llvm/include/llvm/Support/AArch64TargetParser.def +++ b/llvm/include/llvm/Support/AArch64TargetParser.def @@ -129,6 +129,7 @@ AARCH64_ARCH_EXT_NAME("sve2-sm4", AArch64::AEK_SVE2SM4, "+sve2-sm4", AARCH64_ARCH_EXT_NAME("sve2-sha3", AArch64::AEK_SVE2SHA3, "+sve2-sha3", "-sve2-sha3") AARCH64_ARCH_EXT_NAME("sve2-bitperm", AArch64::AEK_SVE2BITPERM, "+sve2-bitperm", "-sve2-bitperm") AARCH64_ARCH_EXT_NAME("sve2p1", AArch64::AEK_SVE2p1, "+sve2p1", "-sve2p1") +AARCH64_ARCH_EXT_NAME("b16b16", AArch64::AEK_B16B16, "+b16b16", "-b16b16") AARCH64_ARCH_EXT_NAME("rcpc", AArch64::AEK_RCPC, "+rcpc", "-rcpc") AARCH64_ARCH_EXT_NAME("rng", AArch64::AEK_RAND, "+rand", "-rand") AARCH64_ARCH_EXT_NAME("memtag", AArch64::AEK_MTE, "+mte", "-mte") @@ -148,6 +149,7 @@ AARCH64_ARCH_EXT_NAME("sme", AArch64::AEK_SME, "+sme", AARCH64_ARCH_EXT_NAME("sme-f64f64", AArch64::AEK_SMEF64F64, "+sme-f64f64", "-sme-f64f64") AARCH64_ARCH_EXT_NAME("sme-i16i64", AArch64::AEK_SMEI16I64, "+sme-i16i64", "-sme-i16i64") AARCH64_ARCH_EXT_NAME("sme2", AArch64::AEK_SME2, "+sme2", "-sme2") +AARCH64_ARCH_EXT_NAME("sme2p1", AArch64::AEK_SME2p1, "+sme2p1", "-sme2p1") AARCH64_ARCH_EXT_NAME("hbc", AArch64::AEK_HBC, "+hbc", "-hbc") AARCH64_ARCH_EXT_NAME("mops", AArch64::AEK_MOPS, "+mops", "-mops") AARCH64_ARCH_EXT_NAME("pmuv3", AArch64::AEK_PERFMON, "+perfmon", "-perfmon") @@ -204,6 +206,12 @@ AARCH64_CPU_NAME("cortex-a710", ARMV9A, FK_NEON_FP_ARMV8, false, AArch64::AEK_SB | AArch64::AEK_I8MM | AArch64::AEK_FP16FML | AArch64::AEK_SVE | AArch64::AEK_SVE2 | AArch64::AEK_SVE2BITPERM | AArch64::AEK_BF16)) +AARCH64_CPU_NAME("cortex-a715", ARMV9A, FK_NEON_FP_ARMV8, false, + (AArch64::AEK_SB | AArch64::AEK_SSBS | AArch64::AEK_MTE | + AArch64::AEK_FP16 | AArch64::AEK_FP16FML | AArch64::AEK_PAUTH | + AArch64::AEK_I8MM | AArch64::AEK_PREDRES | AArch64::AEK_PERFMON | + AArch64::AEK_PROFILE | AArch64::AEK_SVE | AArch64::AEK_SVE2BITPERM | + AArch64::AEK_BF16 | AArch64::AEK_FLAGM)) AARCH64_CPU_NAME("cortex-r82", ARMV8R, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_LSE)) AARCH64_CPU_NAME("cortex-x1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, diff --git a/llvm/include/llvm/Support/AArch64TargetParser.h b/llvm/include/llvm/Support/AArch64TargetParser.h index 35b28413a88f7..24ffb9195454a 100644 --- a/llvm/include/llvm/Support/AArch64TargetParser.h +++ b/llvm/include/llvm/Support/AArch64TargetParser.h @@ -74,6 +74,8 @@ enum ArchExtKind : uint64_t { AEK_PERFMON = 1ULL << 42, // FEAT_PMUv3 AEK_SME2 = 1ULL << 43, // FEAT_SME2 AEK_SVE2p1 = 1ULL << 44, // FEAT_SVE2p1 + AEK_SME2p1 = 1ULL << 45, // FEAT_SME2p1 + AEK_B16B16 = 1ULL << 46 // FEAT_B16B16 }; enum class ArchKind { diff --git a/llvm/include/llvm/Support/Format.h b/llvm/include/llvm/Support/Format.h index 9dd7b401b46a2..386f97488b5ca 100644 --- a/llvm/include/llvm/Support/Format.h +++ b/llvm/include/llvm/Support/Format.h @@ -33,11 +33,63 @@ namespace llvm { +/// Utility class that parses printf-style format strings to yield the expected +/// C type(s) of each specifier. This class is used to verify that a format +/// string unknown at compile-time is equivalent to another format string (which +/// itself is hopefully known at compile-time). +class PrintfStyleFormatReader { +public: + enum SpecifierType : char { + ST_EndOfFormatString, + ST_Unknown, + ST_WideChar, + ST_Int, + ST_Long, + ST_LongLong, + ST_IntMax, + ST_Size, + ST_Ptrdiff, + ST_Double, + ST_LongDouble, + ST_CString, + ST_WideCString, + ST_VoidPointer, + ST_Count_Char, + ST_Count_Short, + ST_Count_Int, + ST_Count_Long, + ST_Count_LongLong, + ST_Count_IntMax, + ST_Count_Size, + ST_Count_Ptrdiff + }; + +private: + const char *Fmt; + llvm::SmallVector SpecifierQueue; + + void refillSpecifierQueue(); + +public: + /// Verify that the format specifiers in \p Fmt consume no more arguments than + /// those in \p Expected, and that all consumed arguments have a compatible + /// type. If \p Fmt is compatible with \p Expected in this way, \p Fmt is + /// returned. Otherwise, \p Expected is returned. + static const char *ensureCompatible(const char *Expected, const char *Fmt); + + PrintfStyleFormatReader(const char *Fmt) : Fmt(Fmt) {} + + SpecifierType nextSpecifier() { + if (SpecifierQueue.empty()) + refillSpecifierQueue(); + return SpecifierQueue.pop_back_val(); + } +}; + /// This is a helper class used for handling formatted output. It is the /// abstract base class of a templated derived class. class format_object_base { protected: - const char *Fmt; ~format_object_base() = default; // Disallow polymorphic deletion. format_object_base(const format_object_base &) = default; virtual void home(); // Out of line virtual method. @@ -46,7 +98,7 @@ class format_object_base { virtual int snprint(char *Buffer, unsigned BufferSize) const = 0; public: - format_object_base(const char *fmt) : Fmt(fmt) {} + format_object_base() = default; /// Format the object into the specified buffer. On success, this returns /// the length of the formatted string. If the buffer is too small, this @@ -86,28 +138,27 @@ struct validate_format_parameters { }; template <> struct validate_format_parameters<> {}; -template -class format_object final : public format_object_base { - std::tuple Vals; - - template - int snprint_tuple(char *Buffer, unsigned BufferSize, - std::index_sequence) const { +template auto format_capture(const char *Fmt, Ts... Vals) { + validate_format_parameters(); + return [=](char *Buffer, unsigned BufferSize) { #ifdef _MSC_VER - return _snprintf(Buffer, BufferSize, Fmt, std::get(Vals)...); + return _snprintf(Buffer, BufferSize, Fmt, Vals...); #else - return snprintf(Buffer, BufferSize, Fmt, std::get(Vals)...); + return snprintf(Buffer, BufferSize, Fmt, Vals...); #endif - } + }; +} + +template +class format_object final : public format_object_base { + decltype(format_capture("", std::declval()...)) Format; public: - format_object(const char *fmt, const Ts &... vals) - : format_object_base(fmt), Vals(vals...) { - validate_format_parameters(); - } + format_object(const char *Fmt, const Ts &...vals) + : Format(format_capture(Fmt, vals...)) {} int snprint(char *Buffer, unsigned BufferSize) const override { - return snprint_tuple(Buffer, BufferSize, std::index_sequence_for()); + return Format(Buffer, BufferSize); } }; diff --git a/llvm/include/llvm/Support/MachineValueType.h b/llvm/include/llvm/Support/MachineValueType.h index 3e263a7739532..1fe4128516adc 100644 --- a/llvm/include/llvm/Support/MachineValueType.h +++ b/llvm/include/llvm/Support/MachineValueType.h @@ -74,118 +74,119 @@ namespace llvm { v256i1 = 25, // 256 x i1 v512i1 = 26, // 512 x i1 v1024i1 = 27, // 1024 x i1 - - v128i2 = 28, // 128 x i2 - v256i2 = 29, // 256 x i2 - - v64i4 = 30, // 64 x i4 - v128i4 = 31, // 128 x i4 - - v1i8 = 32, // 1 x i8 - v2i8 = 33, // 2 x i8 - v4i8 = 34, // 4 x i8 - v8i8 = 35, // 8 x i8 - v16i8 = 36, // 16 x i8 - v32i8 = 37, // 32 x i8 - v64i8 = 38, // 64 x i8 - v128i8 = 39, // 128 x i8 - v256i8 = 40, // 256 x i8 - v512i8 = 41, // 512 x i8 - v1024i8 = 42, // 1024 x i8 - - v1i16 = 43, // 1 x i16 - v2i16 = 44, // 2 x i16 - v3i16 = 45, // 3 x i16 - v4i16 = 46, // 4 x i16 - v8i16 = 47, // 8 x i16 - v16i16 = 48, // 16 x i16 - v32i16 = 49, // 32 x i16 - v64i16 = 50, // 64 x i16 - v128i16 = 51, // 128 x i16 - v256i16 = 52, // 256 x i16 - v512i16 = 53, // 512 x i16 - - v1i32 = 54, // 1 x i32 - v2i32 = 55, // 2 x i32 - v3i32 = 56, // 3 x i32 - v4i32 = 57, // 4 x i32 - v5i32 = 58, // 5 x i32 - v6i32 = 59, // 6 x i32 - v7i32 = 60, // 7 x i32 - v8i32 = 61, // 8 x i32 - v16i32 = 62, // 16 x i32 - v32i32 = 63, // 32 x i32 - v64i32 = 64, // 64 x i32 - v128i32 = 65, // 128 x i32 - v256i32 = 66, // 256 x i32 - v512i32 = 67, // 512 x i32 - v1024i32 = 68, // 1024 x i32 - v2048i32 = 69, // 2048 x i32 - - v1i64 = 70, // 1 x i64 - v2i64 = 71, // 2 x i64 - v3i64 = 72, // 3 x i64 - v4i64 = 73, // 4 x i64 - v8i64 = 74, // 8 x i64 - v16i64 = 75, // 16 x i64 - v32i64 = 76, // 32 x i64 - v64i64 = 77, // 64 x i64 - v128i64 = 78, // 128 x i64 - v256i64 = 79, // 256 x i64 - - v1i128 = 80, // 1 x i128 + v2048i1 = 28, // 2048 x i1 + + v128i2 = 29, // 128 x i2 + v256i2 = 30, // 256 x i2 + + v64i4 = 31, // 64 x i4 + v128i4 = 32, // 128 x i4 + + v1i8 = 33, // 1 x i8 + v2i8 = 34, // 2 x i8 + v4i8 = 35, // 4 x i8 + v8i8 = 36, // 8 x i8 + v16i8 = 37, // 16 x i8 + v32i8 = 38, // 32 x i8 + v64i8 = 39, // 64 x i8 + v128i8 = 40, // 128 x i8 + v256i8 = 41, // 256 x i8 + v512i8 = 42, // 512 x i8 + v1024i8 = 43, // 1024 x i8 + + v1i16 = 44, // 1 x i16 + v2i16 = 45, // 2 x i16 + v3i16 = 46, // 3 x i16 + v4i16 = 47, // 4 x i16 + v8i16 = 48, // 8 x i16 + v16i16 = 49, // 16 x i16 + v32i16 = 50, // 32 x i16 + v64i16 = 51, // 64 x i16 + v128i16 = 52, // 128 x i16 + v256i16 = 53, // 256 x i16 + v512i16 = 54, // 512 x i16 + + v1i32 = 55, // 1 x i32 + v2i32 = 56, // 2 x i32 + v3i32 = 57, // 3 x i32 + v4i32 = 58, // 4 x i32 + v5i32 = 59, // 5 x i32 + v6i32 = 60, // 6 x i32 + v7i32 = 61, // 7 x i32 + v8i32 = 62, // 8 x i32 + v16i32 = 63, // 16 x i32 + v32i32 = 64, // 32 x i32 + v64i32 = 65, // 64 x i32 + v128i32 = 66, // 128 x i32 + v256i32 = 67, // 256 x i32 + v512i32 = 68, // 512 x i32 + v1024i32 = 69, // 1024 x i32 + v2048i32 = 70, // 2048 x i32 + + v1i64 = 71, // 1 x i64 + v2i64 = 72, // 2 x i64 + v3i64 = 73, // 3 x i64 + v4i64 = 74, // 4 x i64 + v8i64 = 75, // 8 x i64 + v16i64 = 76, // 16 x i64 + v32i64 = 77, // 32 x i64 + v64i64 = 78, // 64 x i64 + v128i64 = 79, // 128 x i64 + v256i64 = 80, // 256 x i64 + + v1i128 = 81, // 1 x i128 FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE = v1i1, LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE = v1i128, - v1f16 = 81, // 1 x f16 - v2f16 = 82, // 2 x f16 - v3f16 = 83, // 3 x f16 - v4f16 = 84, // 4 x f16 - v8f16 = 85, // 8 x f16 - v16f16 = 86, // 16 x f16 - v32f16 = 87, // 32 x f16 - v64f16 = 88, // 64 x f16 - v128f16 = 89, // 128 x f16 - v256f16 = 90, // 256 x f16 - v512f16 = 91, // 512 x f16 - - v2bf16 = 92, // 2 x bf16 - v3bf16 = 93, // 3 x bf16 - v4bf16 = 94, // 4 x bf16 - v8bf16 = 95, // 8 x bf16 - v16bf16 = 96, // 16 x bf16 - v32bf16 = 97, // 32 x bf16 - v64bf16 = 98, // 64 x bf16 - v128bf16 = 99, // 128 x bf16 - - v1f32 = 100, // 1 x f32 - v2f32 = 101, // 2 x f32 - v3f32 = 102, // 3 x f32 - v4f32 = 103, // 4 x f32 - v5f32 = 104, // 5 x f32 - v6f32 = 105, // 6 x f32 - v7f32 = 106, // 7 x f32 - v8f32 = 107, // 8 x f32 - v16f32 = 108, // 16 x f32 - v32f32 = 109, // 32 x f32 - v64f32 = 110, // 64 x f32 - v128f32 = 111, // 128 x f32 - v256f32 = 112, // 256 x f32 - v512f32 = 113, // 512 x f32 - v1024f32 = 114, // 1024 x f32 - v2048f32 = 115, // 2048 x f32 - - v1f64 = 116, // 1 x f64 - v2f64 = 117, // 2 x f64 - v3f64 = 118, // 3 x f64 - v4f64 = 119, // 4 x f64 - v8f64 = 120, // 8 x f64 - v16f64 = 121, // 16 x f64 - v32f64 = 122, // 32 x f64 - v64f64 = 123, // 64 x f64 - v128f64 = 124, // 128 x f64 - v256f64 = 125, // 256 x f64 + v1f16 = 82, // 1 x f16 + v2f16 = 83, // 2 x f16 + v3f16 = 84, // 3 x f16 + v4f16 = 85, // 4 x f16 + v8f16 = 86, // 8 x f16 + v16f16 = 87, // 16 x f16 + v32f16 = 88, // 32 x f16 + v64f16 = 89, // 64 x f16 + v128f16 = 90, // 128 x f16 + v256f16 = 91, // 256 x f16 + v512f16 = 92, // 512 x f16 + + v2bf16 = 93, // 2 x bf16 + v3bf16 = 94, // 3 x bf16 + v4bf16 = 95, // 4 x bf16 + v8bf16 = 96, // 8 x bf16 + v16bf16 = 97, // 16 x bf16 + v32bf16 = 98, // 32 x bf16 + v64bf16 = 99, // 64 x bf16 + v128bf16 = 100, // 128 x bf16 + + v1f32 = 101, // 1 x f32 + v2f32 = 102, // 2 x f32 + v3f32 = 103, // 3 x f32 + v4f32 = 104, // 4 x f32 + v5f32 = 105, // 5 x f32 + v6f32 = 106, // 6 x f32 + v7f32 = 107, // 7 x f32 + v8f32 = 108, // 8 x f32 + v16f32 = 109, // 16 x f32 + v32f32 = 110, // 32 x f32 + v64f32 = 111, // 64 x f32 + v128f32 = 112, // 128 x f32 + v256f32 = 113, // 256 x f32 + v512f32 = 114, // 512 x f32 + v1024f32 = 115, // 1024 x f32 + v2048f32 = 116, // 2048 x f32 + + v1f64 = 117, // 1 x f64 + v2f64 = 118, // 2 x f64 + v3f64 = 119, // 3 x f64 + v4f64 = 120, // 4 x f64 + v8f64 = 121, // 8 x f64 + v16f64 = 122, // 16 x f64 + v32f64 = 123, // 32 x f64 + v64f64 = 124, // 64 x f64 + v128f64 = 125, // 128 x f64 + v256f64 = 126, // 256 x f64 FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE = v1f16, LAST_FP_FIXEDLEN_VECTOR_VALUETYPE = v256f64, @@ -193,70 +194,70 @@ namespace llvm { FIRST_FIXEDLEN_VECTOR_VALUETYPE = v1i1, LAST_FIXEDLEN_VECTOR_VALUETYPE = v256f64, - nxv1i1 = 126, // n x 1 x i1 - nxv2i1 = 127, // n x 2 x i1 - nxv4i1 = 128, // n x 4 x i1 - nxv8i1 = 129, // n x 8 x i1 - nxv16i1 = 130, // n x 16 x i1 - nxv32i1 = 131, // n x 32 x i1 - nxv64i1 = 132, // n x 64 x i1 - - nxv1i8 = 133, // n x 1 x i8 - nxv2i8 = 134, // n x 2 x i8 - nxv4i8 = 135, // n x 4 x i8 - nxv8i8 = 136, // n x 8 x i8 - nxv16i8 = 137, // n x 16 x i8 - nxv32i8 = 138, // n x 32 x i8 - nxv64i8 = 139, // n x 64 x i8 - - nxv1i16 = 140, // n x 1 x i16 - nxv2i16 = 141, // n x 2 x i16 - nxv4i16 = 142, // n x 4 x i16 - nxv8i16 = 143, // n x 8 x i16 - nxv16i16 = 144, // n x 16 x i16 - nxv32i16 = 145, // n x 32 x i16 - - nxv1i32 = 146, // n x 1 x i32 - nxv2i32 = 147, // n x 2 x i32 - nxv4i32 = 148, // n x 4 x i32 - nxv8i32 = 149, // n x 8 x i32 - nxv16i32 = 150, // n x 16 x i32 - nxv32i32 = 151, // n x 32 x i32 - - nxv1i64 = 152, // n x 1 x i64 - nxv2i64 = 153, // n x 2 x i64 - nxv4i64 = 154, // n x 4 x i64 - nxv8i64 = 155, // n x 8 x i64 - nxv16i64 = 156, // n x 16 x i64 - nxv32i64 = 157, // n x 32 x i64 + nxv1i1 = 127, // n x 1 x i1 + nxv2i1 = 128, // n x 2 x i1 + nxv4i1 = 129, // n x 4 x i1 + nxv8i1 = 130, // n x 8 x i1 + nxv16i1 = 131, // n x 16 x i1 + nxv32i1 = 132, // n x 32 x i1 + nxv64i1 = 133, // n x 64 x i1 + + nxv1i8 = 134, // n x 1 x i8 + nxv2i8 = 135, // n x 2 x i8 + nxv4i8 = 136, // n x 4 x i8 + nxv8i8 = 137, // n x 8 x i8 + nxv16i8 = 138, // n x 16 x i8 + nxv32i8 = 139, // n x 32 x i8 + nxv64i8 = 140, // n x 64 x i8 + + nxv1i16 = 141, // n x 1 x i16 + nxv2i16 = 142, // n x 2 x i16 + nxv4i16 = 143, // n x 4 x i16 + nxv8i16 = 144, // n x 8 x i16 + nxv16i16 = 145, // n x 16 x i16 + nxv32i16 = 146, // n x 32 x i16 + + nxv1i32 = 147, // n x 1 x i32 + nxv2i32 = 148, // n x 2 x i32 + nxv4i32 = 149, // n x 4 x i32 + nxv8i32 = 150, // n x 8 x i32 + nxv16i32 = 151, // n x 16 x i32 + nxv32i32 = 152, // n x 32 x i32 + + nxv1i64 = 153, // n x 1 x i64 + nxv2i64 = 154, // n x 2 x i64 + nxv4i64 = 155, // n x 4 x i64 + nxv8i64 = 156, // n x 8 x i64 + nxv16i64 = 157, // n x 16 x i64 + nxv32i64 = 158, // n x 32 x i64 FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv1i1, LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv32i64, - nxv1f16 = 158, // n x 1 x f16 - nxv2f16 = 159, // n x 2 x f16 - nxv4f16 = 160, // n x 4 x f16 - nxv8f16 = 161, // n x 8 x f16 - nxv16f16 = 162, // n x 16 x f16 - nxv32f16 = 163, // n x 32 x f16 - - nxv1bf16 = 164, // n x 1 x bf16 - nxv2bf16 = 165, // n x 2 x bf16 - nxv4bf16 = 166, // n x 4 x bf16 - nxv8bf16 = 167, // n x 8 x bf16 - nxv16bf16 = 168, // n x 16 x bf16 - nxv32bf16 = 169, // n x 32 x bf16 - - nxv1f32 = 170, // n x 1 x f32 - nxv2f32 = 171, // n x 2 x f32 - nxv4f32 = 172, // n x 4 x f32 - nxv8f32 = 173, // n x 8 x f32 - nxv16f32 = 174, // n x 16 x f32 - - nxv1f64 = 175, // n x 1 x f64 - nxv2f64 = 176, // n x 2 x f64 - nxv4f64 = 177, // n x 4 x f64 - nxv8f64 = 178, // n x 8 x f64 + nxv1f16 = 159, // n x 1 x f16 + nxv2f16 = 160, // n x 2 x f16 + nxv4f16 = 161, // n x 4 x f16 + nxv8f16 = 162, // n x 8 x f16 + nxv16f16 = 163, // n x 16 x f16 + nxv32f16 = 164, // n x 32 x f16 + + nxv1bf16 = 165, // n x 1 x bf16 + nxv2bf16 = 166, // n x 2 x bf16 + nxv4bf16 = 167, // n x 4 x bf16 + nxv8bf16 = 168, // n x 8 x bf16 + nxv16bf16 = 169, // n x 16 x bf16 + nxv32bf16 = 170, // n x 32 x bf16 + + nxv1f32 = 171, // n x 1 x f32 + nxv2f32 = 172, // n x 2 x f32 + nxv4f32 = 173, // n x 4 x f32 + nxv8f32 = 174, // n x 8 x f32 + nxv16f32 = 175, // n x 16 x f32 + + nxv1f64 = 176, // n x 1 x f64 + nxv2f64 = 177, // n x 2 x f64 + nxv4f64 = 178, // n x 4 x f64 + nxv8f64 = 179, // n x 8 x f64 FIRST_FP_SCALABLE_VECTOR_VALUETYPE = nxv1f16, LAST_FP_SCALABLE_VECTOR_VALUETYPE = nxv8f64, @@ -267,20 +268,20 @@ namespace llvm { FIRST_VECTOR_VALUETYPE = v1i1, LAST_VECTOR_VALUETYPE = nxv8f64, - x86mmx = 179, // This is an X86 MMX value + x86mmx = 180, // This is an X86 MMX value - Glue = 180, // This glues nodes together during pre-RA sched + Glue = 181, // This glues nodes together during pre-RA sched - isVoid = 181, // This has no value + isVoid = 182, // This has no value - Untyped = 182, // This value takes a register, but has + Untyped = 183, // This value takes a register, but has // unspecified type. The register class // will be determined by the opcode. - funcref = 183, // WebAssembly's funcref type - externref = 184, // WebAssembly's externref type - x86amx = 185, // This is an X86 AMX value - i64x8 = 186, // 8 Consecutive GPRs (AArch64) + funcref = 184, // WebAssembly's funcref type + externref = 185, // WebAssembly's externref type + x86amx = 186, // This is an X86 AMX value + i64x8 = 187, // 8 Consecutive GPRs (AArch64) FIRST_VALUETYPE = 1, // This is always the beginning of the list. LAST_VALUETYPE = i64x8, // This always remains at the end of the list. @@ -456,7 +457,8 @@ namespace llvm { return (SimpleTy == MVT::v256i8 || SimpleTy == MVT::v128i16 || SimpleTy == MVT::v64i32 || SimpleTy == MVT::v32i64 || SimpleTy == MVT::v128f16 || SimpleTy == MVT::v64f32 || - SimpleTy == MVT::v32f64 || SimpleTy == MVT::v128bf16); + SimpleTy == MVT::v32f64 || SimpleTy == MVT::v128bf16 || + SimpleTy == MVT::v2048i1); } /// Return true if this is an overloaded type for TableGen. @@ -544,6 +546,7 @@ namespace llvm { case v256i1: case v512i1: case v1024i1: + case v2048i1: case nxv1i1: case nxv2i1: case nxv4i1: @@ -704,6 +707,7 @@ namespace llvm { switch (SimpleTy) { default: llvm_unreachable("Not a vector MVT!"); + case v2048i1: case v2048i32: case v2048f32: return 2048; case v1024i1: @@ -1054,6 +1058,7 @@ namespace llvm { case v16f64: return TypeSize::Fixed(1024); case nxv32i32: case nxv16i64: return TypeSize::Scalable(1024); + case v2048i1: case v256i8: case v128i16: case v64i32: @@ -1239,6 +1244,7 @@ namespace llvm { if (NumElements == 256) return MVT::v256i1; if (NumElements == 512) return MVT::v512i1; if (NumElements == 1024) return MVT::v1024i1; + if (NumElements == 2048) return MVT::v2048i1; break; case MVT::i2: if (NumElements == 128) return MVT::v128i2; diff --git a/llvm/include/llvm/Support/TargetParser.h b/llvm/include/llvm/Support/TargetParser.h index e9920a50bae8e..39222b02e21ad 100644 --- a/llvm/include/llvm/Support/TargetParser.h +++ b/llvm/include/llvm/Support/TargetParser.h @@ -157,6 +157,9 @@ IsaVersion getIsaVersion(StringRef GPU); namespace RISCV { +// We use 64 bits as the known part in the scalable vector types. +static constexpr unsigned RVVBitsPerBlock = 64; + enum CPUKind : unsigned { #define PROC(ENUM, NAME, FEATURES, DEFAULT_MARCH) CK_##ENUM, #define TUNE_PROC(ENUM, NAME) CK_##ENUM, diff --git a/llvm/include/llvm/Support/Threading.h b/llvm/include/llvm/Support/Threading.h index 44e133de854b8..7f2708ddbb467 100644 --- a/llvm/include/llvm/Support/Threading.h +++ b/llvm/include/llvm/Support/Threading.h @@ -26,14 +26,13 @@ #define LLVM_THREADING_USE_STD_CALL_ONCE 1 #elif defined(LLVM_ON_UNIX) && \ (defined(_LIBCPP_VERSION) || \ - !(defined(__NetBSD__) || defined(__OpenBSD__) || \ - (defined(__ppc__) || defined(__PPC__)))) + !(defined(__NetBSD__) || defined(__OpenBSD__) || defined(__powerpc__))) // std::call_once from libc++ is used on all Unix platforms. Other // implementations like libstdc++ are known to have problems on NetBSD, // OpenBSD and PowerPC. #define LLVM_THREADING_USE_STD_CALL_ONCE 1 #elif defined(LLVM_ON_UNIX) && \ - ((defined(__ppc__) || defined(__PPC__)) && defined(__LITTLE_ENDIAN__)) + (defined(__powerpc__) && defined(__LITTLE_ENDIAN__)) #define LLVM_THREADING_USE_STD_CALL_ONCE 1 #else #define LLVM_THREADING_USE_STD_CALL_ONCE 0 diff --git a/llvm/include/llvm/Support/TypeSize.h b/llvm/include/llvm/Support/TypeSize.h index 0777005643a71..9cf2e873d7189 100644 --- a/llvm/include/llvm/Support/TypeSize.h +++ b/llvm/include/llvm/Support/TypeSize.h @@ -93,7 +93,7 @@ class LinearPolyBase { } template - friend typename std::enable_if_t::value, LeafTy> + friend std::enable_if_t::value, LeafTy> operator-(const LeafTy &LHS) { LeafTy Copy = LHS; return Copy *= -1; diff --git a/llvm/include/llvm/Support/X86TargetParser.def b/llvm/include/llvm/Support/X86TargetParser.def index 6b6c740a9b1f8..8ffcc2152b1f1 100644 --- a/llvm/include/llvm/Support/X86TargetParser.def +++ b/llvm/include/llvm/Support/X86TargetParser.def @@ -60,6 +60,10 @@ X86_CPU_TYPE_ALIAS(INTEL_SILVERMONT, "slm") #define X86_CPU_SUBTYPE(ENUM, STR) #endif +#ifndef X86_CPU_SUBTYPE_ALIAS +#define X86_CPU_SUBTYPE_ALIAS(ENUM, STR) +#endif + // This list must match what is implemented in libgcc and compilert-rt. Clang // uses this to know how to implement __builtin_cpu_is. X86_CPU_SUBTYPE(INTEL_COREI7_NEHALEM, "nehalem") @@ -89,6 +93,12 @@ X86_CPU_SUBTYPE(INTEL_COREI7_SAPPHIRERAPIDS, "sapphirerapids") X86_CPU_SUBTYPE(INTEL_COREI7_ALDERLAKE, "alderlake") X86_CPU_SUBTYPE(AMDFAM19H_ZNVER3, "znver3") X86_CPU_SUBTYPE(INTEL_COREI7_ROCKETLAKE, "rocketlake") + +// Alternate names supported by __builtin_cpu_is and target multiversioning. +X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_ALDERLAKE, "raptorlake") +X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_ALDERLAKE, "meteorlake") + +#undef X86_CPU_SUBTYPE_ALIAS #undef X86_CPU_SUBTYPE // This macro is used for cpu types present in compiler-rt/libgcc. The third diff --git a/llvm/include/llvm/Support/X86TargetParser.h b/llvm/include/llvm/Support/X86TargetParser.h index 612046f3b2d9c..922be6d2e508d 100644 --- a/llvm/include/llvm/Support/X86TargetParser.h +++ b/llvm/include/llvm/Support/X86TargetParser.h @@ -104,6 +104,8 @@ enum CPUKind { CK_Tigerlake, CK_SapphireRapids, CK_Alderlake, + CK_Raptorlake, + CK_Meteorlake, CK_KNL, CK_KNM, CK_Lakemont, diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 9f29e9faf385b..dd5d929e615c0 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -856,6 +856,13 @@ def redundant_neg_operands: GICombineRule< [{ return Helper.matchRedundantNegOperands(*${root}, ${matchinfo}); }]), (apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>; +// Transform (fsub +-0.0, X) -> (fneg X) +def fsub_to_fneg: GICombineRule< + (defs root:$root, register_matchinfo:$matchinfo), + (match (wip_match_opcode G_FSUB):$root, + [{ return Helper.matchFsubToFneg(*${root}, ${matchinfo}); }]), + (apply [{ Helper.applyFsubToFneg(*${root}, ${matchinfo}); }])>; + // Transform (fadd x, (fmul y, z)) -> (fma y, z, x) // (fadd x, (fmul y, z)) -> (fmad y, z, x) // Transform (fadd (fmul x, y), z) -> (fma x, y, z) @@ -1056,7 +1063,8 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines, form_bitfield_extract, constant_fold, fabs_fneg_fold, intdiv_combines, mulh_combines, redundant_neg_operands, and_or_disjoint_mask, fma_combines, fold_binop_into_select, - sub_add_reg, select_to_minmax, redundant_binop_in_equality]>; + sub_add_reg, select_to_minmax, redundant_binop_in_equality, + fsub_to_fneg]>; // A combine group used to for prelegalizer combiners at -O0. The combines in // this group have been selected based on experiments to balance code size and diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 69e52581e1d97..61c26dfabed0b 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -250,22 +250,6 @@ struct OffsetAndSize { return OAS.Offset + OAS.Size > Offset && OAS.Offset < Offset + Size; } - OffsetAndSize &operator&=(const OffsetAndSize &R) { - if (Offset == Unassigned) - Offset = R.Offset; - else if (R.Offset != Unassigned && R.Offset != Offset) - Offset = Unknown; - - if (Size == Unassigned) - Size = R.Size; - else if (Size == Unknown || R.Size == Unknown) - Size = Unknown; - else if (R.Size != Unassigned) - Size = std::max(Size, R.Size); - - return *this; - } - /// Constants used to represent special offsets or sizes. /// - This assumes that Offset and Size are non-negative. /// - The constants should not clash with DenseMapInfo, such as EmptyKey @@ -5008,47 +4992,33 @@ struct AAPointerInfo : public AbstractAttribute { /// An access description. struct Access { - Access(Instruction *I, int64_t Offset, int64_t Size, - Optional Content, AccessKind Kind, Type *Ty) - : LocalI(I), RemoteI(I), Content(Content), OAS(Offset, Size), - Kind(Kind), Ty(Ty) { + Access(Instruction *I, Optional Content, AccessKind Kind, Type *Ty) + : LocalI(I), RemoteI(I), Content(Content), Kind(Kind), Ty(Ty) { verify(); } - Access(Instruction *LocalI, Instruction *RemoteI, int64_t Offset, - int64_t Size, Optional Content, AccessKind Kind, Type *Ty) - : LocalI(LocalI), RemoteI(RemoteI), Content(Content), OAS(Offset, Size), - Kind(Kind), Ty(Ty) { + Access(Instruction *LocalI, Instruction *RemoteI, Optional Content, + AccessKind Kind, Type *Ty) + : LocalI(LocalI), RemoteI(RemoteI), Content(Content), Kind(Kind), + Ty(Ty) { verify(); } Access(const Access &Other) = default; Access(const Access &&Other) : LocalI(Other.LocalI), RemoteI(Other.RemoteI), Content(Other.Content), - OAS(Other.OAS), Kind(Other.Kind), Ty(Other.Ty) {} + Kind(Other.Kind), Ty(Other.Ty) {} Access &operator=(const Access &Other) = default; bool operator==(const Access &R) const { - return LocalI == R.LocalI && RemoteI == R.RemoteI && OAS == R.OAS && + return LocalI == R.LocalI && RemoteI == R.RemoteI && Content == R.Content && Kind == R.Kind; } bool operator!=(const Access &R) const { return !(*this == R); } Access &operator&=(const Access &R) { assert(RemoteI == R.RemoteI && "Expected same instruction!"); - assert(LocalI == R.LocalI && "Expected same instruction!"); + Content = + AA::combineOptionalValuesInAAValueLatice(Content, R.Content, Ty); Kind = AccessKind(Kind | R.Kind); - auto Before = OAS; - OAS &= R.OAS; - if (Before.isUnassigned() || Before == OAS) { - Content = - AA::combineOptionalValuesInAAValueLatice(Content, R.Content, Ty); - } else { - // Since the OAS information changed, set a conservative state -- drop - // the contents, and assume MayAccess rather than MustAccess. - Content.reset(); - Kind = AccessKind(Kind | AK_MAY); - Kind = AccessKind(Kind & ~AK_MUST); - } - verify(); return *this; } @@ -5096,12 +5066,6 @@ struct AAPointerInfo : public AbstractAttribute { /// determined. Optional getContent() const { return Content; } - /// Return the offset for this access. - int64_t getOffset() const { return OAS.Offset; } - - /// Return the size for this access. - int64_t getSize() const { return OAS.Size; } - private: /// The instruction responsible for the access with respect to the local /// scope of the associated attribute. @@ -5114,9 +5078,6 @@ struct AAPointerInfo : public AbstractAttribute { /// cannot be determined. Optional Content; - /// The object accessed, in terms of an offset and size in bytes. - AA::OffsetAndSize OAS; - /// The access kind, e.g., READ, as bitset (could be more than one). AccessKind Kind; @@ -5152,7 +5113,7 @@ struct AAPointerInfo : public AbstractAttribute { virtual bool forallInterferingAccesses( Attributor &A, const AbstractAttribute &QueryingAA, Instruction &I, function_ref CB, bool &HasBeenWrittenTo, - AA::OffsetAndSize &OAS) const = 0; + AA::OffsetAndSize *OASPtr = nullptr) const = 0; /// This function should return true if the type of the \p AA is AAPointerInfo static bool classof(const AbstractAttribute *AA) { diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h index 7558568b0c1e9..41ca345885083 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h +++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h @@ -36,7 +36,7 @@ struct AddressSanitizerOptions { class AddressSanitizerPass : public PassInfoMixin { public: AddressSanitizerPass(const AddressSanitizerOptions &Options, - bool UseGlobalGC = true, bool UseOdrIndicator = false, + bool UseGlobalGC = true, bool UseOdrIndicator = true, AsanDtorKind DestructorKind = AsanDtorKind::Global); PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); void printPipeline(raw_ostream &OS, diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp index 2bbc2486fec00..30dab43101dea 100644 --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -752,30 +752,10 @@ static bool isIntrinsicCall(const CallBase *Call, Intrinsic::ID IID) { return II && II->getIntrinsicID() == IID; } -static MemoryEffects getMemoryEffectsFromAttrs(AttributeSet Attrs) { - if (Attrs.hasAttribute(Attribute::ReadNone)) - return MemoryEffects::none(); - - ModRefInfo MR = ModRefInfo::ModRef; - if (Attrs.hasAttribute(Attribute::ReadOnly)) - MR = ModRefInfo::Ref; - else if (Attrs.hasAttribute(Attribute::WriteOnly)) - MR = ModRefInfo::Mod; - - if (Attrs.hasAttribute(Attribute::ArgMemOnly)) - return MemoryEffects::argMemOnly(MR); - if (Attrs.hasAttribute(Attribute::InaccessibleMemOnly)) - return MemoryEffects::inaccessibleMemOnly(MR); - if (Attrs.hasAttribute(Attribute::InaccessibleMemOrArgMemOnly)) - return MemoryEffects::inaccessibleOrArgMemOnly(MR); - return MemoryEffects(MR); -} - /// Returns the behavior when calling the given call site. MemoryEffects BasicAAResult::getMemoryEffects(const CallBase *Call, AAQueryInfo &AAQI) { - MemoryEffects Min = - getMemoryEffectsFromAttrs(Call->getAttributes().getFnAttrs()); + MemoryEffects Min = Call->getAttributes().getMemoryEffects(); if (const Function *F = dyn_cast(Call->getCalledOperand())) { MemoryEffects FuncME = AAQI.AAR.getMemoryEffects(F); @@ -803,7 +783,7 @@ MemoryEffects BasicAAResult::getMemoryEffects(const Function *F) { MemoryEffects::inaccessibleMemOnly(ModRefInfo::ModRef); } - return getMemoryEffectsFromAttrs(F->getAttributes().getFnAttrs()); + return F->getMemoryEffects(); } ModRefInfo BasicAAResult::getArgModRefInfo(const CallBase *Call, diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index e42512b41aa66..c76155832bce3 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -1111,9 +1111,13 @@ Value *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp, return ConstantInt::get(Tp, APInt::getSignedMinValue(Tp->getIntegerBitWidth())); case RecurKind::FMin: - return ConstantFP::getInfinity(Tp, true); + assert((FMF.noNaNs() && FMF.noSignedZeros()) && + "nnan, nsz is expected to be set for FP min reduction."); + return ConstantFP::getInfinity(Tp, false /*Negative*/); case RecurKind::FMax: - return ConstantFP::getInfinity(Tp, false); + assert((FMF.noNaNs() && FMF.noSignedZeros()) && + "nnan, nsz is expected to be set for FP max reduction."); + return ConstantFP::getInfinity(Tp, true /*Negative*/); case RecurKind::SelectICmp: case RecurKind::SelectFCmp: return getRecurrenceStartValue(); diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index acfac8cb9437c..c8b796a34c6d7 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -2198,6 +2198,7 @@ Value *llvm::simplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) { return ::simplifyAndInst(Op0, Op1, Q, RecursionLimit); } +// TODO: Many of these folds could use LogicalAnd/LogicalOr. static Value *simplifyOrLogic(Value *X, Value *Y) { assert(X->getType() == Y->getType() && "Expected same type for 'or' ops"); Type *Ty = X->getType(); @@ -2262,6 +2263,14 @@ static Value *simplifyOrLogic(Value *X, Value *Y) { m_Value(B))) && match(Y, m_Not(m_c_Or(m_Specific(A), m_Specific(B))))) return NotA; + // The same is true of Logical And + // TODO: This could share the logic of the version above if there was a + // version of LogicalAnd that allowed more than just i1 types. + if (match(X, m_c_LogicalAnd( + m_CombineAnd(m_Value(NotA), m_NotForbidUndef(m_Value(A))), + m_Value(B))) && + match(Y, m_Not(m_c_LogicalOr(m_Specific(A), m_Specific(B))))) + return NotA; // ~(A ^ B) | (A & B) --> ~(A ^ B) // ~(A ^ B) | (B & A) --> ~(A ^ B) @@ -5452,6 +5461,10 @@ simplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF, if (match(Op0, m_FNegNSZ(m_Specific(Op1))) || match(Op1, m_FNegNSZ(m_Specific(Op0)))) return ConstantFP::get(Op0->getType(), -1.0); + + // nnan ninf X / [-]0.0 -> poison + if (FMF.noInfs() && match(Op1, m_AnyZeroFP())) + return PoisonValue::get(Op1->getType()); } return nullptr; diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp index 2fae260e0d8fe..fe08e512a81f7 100644 --- a/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/llvm/lib/Analysis/LazyValueInfo.cpp @@ -701,7 +701,8 @@ Optional LazyValueInfoImpl::solveBlockValueNonLocal( // to overdefined. if (Result.isOverdefined()) { LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName() - << "' - overdefined because of pred (non local).\n"); + << "' - overdefined because of pred '" + << Pred->getName() << "' (non local).\n"); return Result; } } @@ -1859,9 +1860,27 @@ LazyValueInfo::Tristate LazyValueInfo::getPredicateAt(unsigned P, Value *LHS, return getPredicateAt(CmpInst::getSwappedPredicate(Pred), RHS, C, CxtI, UseBlockValue); - // Got two non-Constant values. While we could handle them somewhat, - // by getting their constant ranges, and applying ConstantRange::icmp(), - // so far it did not appear to be profitable. + // Got two non-Constant values. Try to determine the comparison results based + // on the block values of the two operands, e.g. because they have + // non-overlapping ranges. + if (UseBlockValue) { + Module *M = CxtI->getModule(); + ValueLatticeElement L = + getImpl(PImpl, AC, M).getValueInBlock(LHS, CxtI->getParent(), CxtI); + if (L.isOverdefined()) + return LazyValueInfo::Unknown; + + ValueLatticeElement R = + getImpl(PImpl, AC, M).getValueInBlock(RHS, CxtI->getParent(), CxtI); + Type *Ty = CmpInst::makeCmpResultType(LHS->getType()); + if (Constant *Res = L.getCompare((CmpInst::Predicate)P, Ty, R, + M->getDataLayout())) { + if (Res->isNullValue()) + return LazyValueInfo::False; + if (Res->isOneValue()) + return LazyValueInfo::True; + } + } return LazyValueInfo::Unknown; } diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp index 9eff2b161185e..bc16c00c53206 100644 --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -513,6 +513,43 @@ static Value *getAvailableLoadStore(Instruction *Inst, const Value *Ptr, return ConstantFoldLoadFromConst(C, AccessTy, DL); } + if (auto *MSI = dyn_cast(Inst)) { + // Don't forward from (non-atomic) memset to atomic load. + if (AtLeastAtomic) + return nullptr; + + // Only handle constant memsets. + auto *Val = dyn_cast(MSI->getValue()); + auto *Len = dyn_cast(MSI->getLength()); + if (!Val || !Len) + return nullptr; + + // TODO: Handle offsets. + Value *Dst = MSI->getDest(); + if (!AreEquivalentAddressValues(Dst, Ptr)) + return nullptr; + + if (IsLoadCSE) + *IsLoadCSE = false; + + TypeSize LoadTypeSize = DL.getTypeSizeInBits(AccessTy); + if (LoadTypeSize.isScalable()) + return nullptr; + + // Make sure the read bytes are contained in the memset. + uint64_t LoadSize = LoadTypeSize.getFixedSize(); + if ((Len->getValue() * 8).ult(LoadSize)) + return nullptr; + + APInt Splat = LoadSize >= 8 ? APInt::getSplat(LoadSize, Val->getValue()) + : Val->getValue().trunc(LoadSize); + ConstantInt *SplatC = ConstantInt::get(MSI->getContext(), Splat); + if (CastInst::isBitOrNoopPointerCastable(SplatC->getType(), AccessTy, DL)) + return SplatC; + + return nullptr; + } + return nullptr; } diff --git a/llvm/lib/Analysis/MLInlineAdvisor.cpp b/llvm/lib/Analysis/MLInlineAdvisor.cpp index f55de71ea98ae..a20c05243b773 100644 --- a/llvm/lib/Analysis/MLInlineAdvisor.cpp +++ b/llvm/lib/Analysis/MLInlineAdvisor.cpp @@ -415,8 +415,8 @@ void MLInlineAdvisor::print(raw_ostream &OS) const { << " EdgesOfLastSeenNodes: " << EdgesOfLastSeenNodes << "\n"; OS << "[MLInlineAdvisor] FPI:\n"; for (auto I : FPICache) { - OS << I.getFirst()->getName() << ":\n"; - I.getSecond().print(OS); + OS << I.first->getName() << ":\n"; + I.second.print(OS); OS << "\n"; } OS << "\n"; diff --git a/llvm/lib/Analysis/ValueLattice.cpp b/llvm/lib/Analysis/ValueLattice.cpp index 627166e2409d3..1d2177a92eb46 100644 --- a/llvm/lib/Analysis/ValueLattice.cpp +++ b/llvm/lib/Analysis/ValueLattice.cpp @@ -7,8 +7,51 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/ValueLattice.h" +#include "llvm/Analysis/ConstantFolding.h" namespace llvm { +Constant * +ValueLatticeElement::getCompare(CmpInst::Predicate Pred, Type *Ty, + const ValueLatticeElement &Other, + const DataLayout &DL) const { + // Not yet resolved. + if (isUnknown() || Other.isUnknown()) + return nullptr; + + // TODO: Can be made more precise, but always returning undef would be + // incorrect. + if (isUndef() || Other.isUndef()) + return nullptr; + + if (isConstant() && Other.isConstant()) + return ConstantFoldCompareInstOperands(Pred, getConstant(), + Other.getConstant(), DL); + + if (ICmpInst::isEquality(Pred)) { + // not(C) != C => true, not(C) == C => false. + if ((isNotConstant() && Other.isConstant() && + getNotConstant() == Other.getConstant()) || + (isConstant() && Other.isNotConstant() && + getConstant() == Other.getNotConstant())) + return Pred == ICmpInst::ICMP_NE ? ConstantInt::getTrue(Ty) + : ConstantInt::getFalse(Ty); + } + + // Integer constants are represented as ConstantRanges with single + // elements. + if (!isConstantRange() || !Other.isConstantRange()) + return nullptr; + + const auto &CR = getConstantRange(); + const auto &OtherCR = Other.getConstantRange(); + if (CR.icmp(Pred, OtherCR)) + return ConstantInt::getTrue(Ty); + if (CR.icmp(CmpInst::getInversePredicate(Pred), OtherCR)) + return ConstantInt::getFalse(Ty); + + return nullptr; +} + raw_ostream &operator<<(raw_ostream &OS, const ValueLatticeElement &Val) { if (Val.isUnknown()) return OS << "unknown"; diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index 496f1ed435663..2a171df168fca 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -649,6 +649,9 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(readwrite); KEYWORD(argmem); KEYWORD(inaccessiblemem); + KEYWORD(argmemonly); + KEYWORD(inaccessiblememonly); + KEYWORD(inaccessiblemem_or_argmemonly); KEYWORD(type); KEYWORD(opaque); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 0fda0559b5b41..25204847ca9ce 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -1472,6 +1472,31 @@ bool LLParser::parseEnumAttribute(Attribute::AttrKind Attr, AttrBuilder &B, } } +static bool upgradeMemoryAttr(MemoryEffects &ME, lltok::Kind Kind) { + switch (Kind) { + case lltok::kw_readnone: + ME &= MemoryEffects::none(); + return true; + case lltok::kw_readonly: + ME &= MemoryEffects::readOnly(); + return true; + case lltok::kw_writeonly: + ME &= MemoryEffects::writeOnly(); + return true; + case lltok::kw_argmemonly: + ME &= MemoryEffects::argMemOnly(); + return true; + case lltok::kw_inaccessiblememonly: + ME &= MemoryEffects::inaccessibleMemOnly(); + return true; + case lltok::kw_inaccessiblemem_or_argmemonly: + ME &= MemoryEffects::inaccessibleOrArgMemOnly(); + return true; + default: + return false; + } +} + /// parseFnAttributeValuePairs /// ::= | '=' bool LLParser::parseFnAttributeValuePairs(AttrBuilder &B, @@ -1481,10 +1506,11 @@ bool LLParser::parseFnAttributeValuePairs(AttrBuilder &B, B.clear(); + MemoryEffects ME = MemoryEffects::unknown(); while (true) { lltok::Kind Token = Lex.getKind(); if (Token == lltok::rbrace) - return HaveError; // Finished. + break; // Finished. if (Token == lltok::StringConstant) { if (parseStringAttribute(B)) @@ -1512,10 +1538,15 @@ bool LLParser::parseFnAttributeValuePairs(AttrBuilder &B, if (Token == lltok::kw_builtin) BuiltinLoc = Loc; + if (upgradeMemoryAttr(ME, Token)) { + Lex.Lex(); + continue; + } + Attribute::AttrKind Attr = tokenToAttribute(Token); if (Attr == Attribute::None) { if (!InAttrGrp) - return HaveError; + break; return error(Lex.getLoc(), "unterminated attribute group"); } @@ -1528,6 +1559,10 @@ bool LLParser::parseFnAttributeValuePairs(AttrBuilder &B, if (!Attribute::canUseAsFnAttr(Attr) && Attr != Attribute::Alignment) HaveError |= error(Loc, "this attribute does not apply to functions"); } + + if (ME != MemoryEffects::unknown()) + B.addMemoryAttr(ME); + return HaveError; } //===----------------------------------------------------------------------===// @@ -4646,6 +4681,24 @@ bool LLParser::parseDILocation(MDNode *&Result, bool IsDistinct) { return false; } +/// parseDIAssignID: +/// ::= distinct !DIAssignID() +bool LLParser::parseDIAssignID(MDNode *&Result, bool IsDistinct) { + if (!IsDistinct) + return Lex.Error("missing 'distinct', required for !DIAssignID()"); + + Lex.Lex(); + + // Now eat the parens. + if (parseToken(lltok::lparen, "expected '(' here")) + return true; + if (parseToken(lltok::rparen, "expected ')' here")) + return true; + + Result = DIAssignID::getDistinct(Context); + return false; +} + /// parseGenericDINode: /// ::= !GenericDINode(tag: 15, header: "...", operands: {...}) bool LLParser::parseGenericDINode(MDNode *&Result, bool IsDistinct) { diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 338674c086356..66b4edbacde72 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -1718,8 +1718,8 @@ static uint64_t getRawAttributeMask(Attribute::AttrKind Val) { case Attribute::Convergent: return 1ULL << 46; case Attribute::SafeStack: return 1ULL << 47; case Attribute::NoRecurse: return 1ULL << 48; - case Attribute::InaccessibleMemOnly: return 1ULL << 49; - case Attribute::InaccessibleMemOrArgMemOnly: return 1ULL << 50; + // 1ULL << 49 is InaccessibleMemOnly, which is upgraded separately. + // 1ULL << 50 is InaccessibleMemOrArgMemOnly, which is upgraded separately. case Attribute::SwiftSelf: return 1ULL << 51; case Attribute::SwiftError: return 1ULL << 52; case Attribute::WriteOnly: return 1ULL << 53; @@ -1767,7 +1767,8 @@ static void addRawAttributeValue(AttrBuilder &B, uint64_t Val) { /// been decoded from the given integer. This function must stay in sync with /// 'encodeLLVMAttributesForBitcode'. static void decodeLLVMAttributesForBitcode(AttrBuilder &B, - uint64_t EncodedAttrs) { + uint64_t EncodedAttrs, + uint64_t AttrIdx) { // The alignment is stored as a 16-bit raw value from bits 31--16. We shift // the bits above 31 down by 11 bits. unsigned Alignment = (EncodedAttrs & (0xffffULL << 16)) >> 16; @@ -1776,8 +1777,43 @@ static void decodeLLVMAttributesForBitcode(AttrBuilder &B, if (Alignment) B.addAlignmentAttr(Alignment); - addRawAttributeValue(B, ((EncodedAttrs & (0xfffffULL << 32)) >> 11) | - (EncodedAttrs & 0xffff)); + + uint64_t Attrs = ((EncodedAttrs & (0xfffffULL << 32)) >> 11) | + (EncodedAttrs & 0xffff); + + if (AttrIdx == AttributeList::FunctionIndex) { + // Upgrade old memory attributes. + MemoryEffects ME = MemoryEffects::unknown(); + if (Attrs & (1ULL << 9)) { + // ReadNone + Attrs &= ~(1ULL << 9); + ME &= MemoryEffects::none(); + } + if (Attrs & (1ULL << 10)) { + // ReadOnly + Attrs &= ~(1ULL << 10); + ME &= MemoryEffects::readOnly(); + } + if (Attrs & (1ULL << 49)) { + // InaccessibleMemOnly + Attrs &= ~(1ULL << 49); + ME &= MemoryEffects::inaccessibleMemOnly(); + } + if (Attrs & (1ULL << 50)) { + // InaccessibleMemOrArgMemOnly + Attrs &= ~(1ULL << 50); + ME &= MemoryEffects::inaccessibleOrArgMemOnly(); + } + if (Attrs & (1ULL << 53)) { + // WriteOnly + Attrs &= ~(1ULL << 53); + ME &= MemoryEffects::writeOnly(); + } + if (ME != MemoryEffects::unknown()) + B.addMemoryAttr(ME); + } + + addRawAttributeValue(B, Attrs); } Error BitcodeReader::parseAttributeBlock() { @@ -1824,7 +1860,7 @@ Error BitcodeReader::parseAttributeBlock() { for (unsigned i = 0, e = Record.size(); i != e; i += 2) { AttrBuilder B(Context); - decodeLLVMAttributesForBitcode(B, Record[i+1]); + decodeLLVMAttributesForBitcode(B, Record[i+1], Record[i]); Attrs.push_back(AttributeList::get(Context, Record[i], B)); } @@ -1851,8 +1887,6 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) { return Attribute::Alignment; case bitc::ATTR_KIND_ALWAYS_INLINE: return Attribute::AlwaysInline; - case bitc::ATTR_KIND_ARGMEMONLY: - return Attribute::ArgMemOnly; case bitc::ATTR_KIND_BUILTIN: return Attribute::Builtin; case bitc::ATTR_KIND_BY_VAL: @@ -1869,10 +1903,6 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) { return Attribute::ElementType; case bitc::ATTR_KIND_FNRETTHUNK_EXTERN: return Attribute::FnRetThunkExtern; - case bitc::ATTR_KIND_INACCESSIBLEMEM_ONLY: - return Attribute::InaccessibleMemOnly; - case bitc::ATTR_KIND_INACCESSIBLEMEM_OR_ARGMEMONLY: - return Attribute::InaccessibleMemOrArgMemOnly; case bitc::ATTR_KIND_INLINE_HINT: return Attribute::InlineHint; case bitc::ATTR_KIND_IN_REG: @@ -2039,6 +2069,31 @@ Error BitcodeReader::parseAttrKind(uint64_t Code, Attribute::AttrKind *Kind) { return Error::success(); } +static bool upgradeOldMemoryAttribute(MemoryEffects &ME, uint64_t EncodedKind) { + switch (EncodedKind) { + case bitc::ATTR_KIND_READ_NONE: + ME &= MemoryEffects::none(); + return true; + case bitc::ATTR_KIND_READ_ONLY: + ME &= MemoryEffects::readOnly(); + return true; + case bitc::ATTR_KIND_WRITEONLY: + ME &= MemoryEffects::writeOnly(); + return true; + case bitc::ATTR_KIND_ARGMEMONLY: + ME &= MemoryEffects::argMemOnly(); + return true; + case bitc::ATTR_KIND_INACCESSIBLEMEM_ONLY: + ME &= MemoryEffects::inaccessibleMemOnly(); + return true; + case bitc::ATTR_KIND_INACCESSIBLEMEM_OR_ARGMEMONLY: + ME &= MemoryEffects::inaccessibleOrArgMemOnly(); + return true; + default: + return false; + } +} + Error BitcodeReader::parseAttributeGroupBlock() { if (Error Err = Stream.EnterSubBlock(bitc::PARAMATTR_GROUP_BLOCK_ID)) return Err; @@ -2082,10 +2137,16 @@ Error BitcodeReader::parseAttributeGroupBlock() { uint64_t Idx = Record[1]; // Index of the object this attribute refers to. AttrBuilder B(Context); + MemoryEffects ME = MemoryEffects::unknown(); for (unsigned i = 2, e = Record.size(); i != e; ++i) { if (Record[i] == 0) { // Enum attribute Attribute::AttrKind Kind; - if (Error Err = parseAttrKind(Record[++i], &Kind)) + uint64_t EncodedKind = Record[++i]; + if (Idx == AttributeList::FunctionIndex && + upgradeOldMemoryAttribute(ME, EncodedKind)) + continue; + + if (Error Err = parseAttrKind(EncodedKind, &Kind)) return Err; // Upgrade old-style byval attribute to one with a type, even if it's @@ -2159,6 +2220,9 @@ Error BitcodeReader::parseAttributeGroupBlock() { } } + if (ME != MemoryEffects::unknown()) + B.addMemoryAttr(ME); + UpgradeAttributes(B); MAttributeGroups[GrpID] = AttributeList::get(Context, Idx, B); break; diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp index 02d76f61695af..1ac1502e8aefb 100644 --- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp +++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp @@ -856,6 +856,7 @@ MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock() { case bitc::METADATA_TEMPLATE_VALUE: case bitc::METADATA_GLOBAL_VAR: case bitc::METADATA_LOCAL_VAR: + case bitc::METADATA_ASSIGN_ID: case bitc::METADATA_LABEL: case bitc::METADATA_EXPRESSION: case bitc::METADATA_OBJC_PROPERTY: @@ -1964,6 +1965,18 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( break; } + case bitc::METADATA_ASSIGN_ID: { + if (Record.size() != 1) + return error("Invalid DIAssignID record."); + + IsDistinct = Record[0] & 1; + if (!IsDistinct) + return error("Invalid DIAssignID record. Must be distinct"); + + MetadataList.assignValue(DIAssignID::getDistinct(Context), NextMetadataNo); + NextMetadataNo++; + break; + } case bitc::METADATA_LOCAL_VAR: { // 10th field is for the obseleted 'inlinedAt:' field. if (Record.size() < 8 || Record.size() > 10) diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 1ac4413f158eb..4bf881a479170 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -340,6 +340,8 @@ class ModuleBitcodeWriter : public ModuleBitcodeWriterBase { unsigned Abbrev); void writeDIModule(const DIModule *N, SmallVectorImpl &Record, unsigned Abbrev); + void writeDIAssignID(const DIAssignID *N, SmallVectorImpl &Record, + unsigned Abbrev); void writeDITemplateTypeParameter(const DITemplateTypeParameter *N, SmallVectorImpl &Record, unsigned Abbrev); @@ -620,8 +622,6 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { return bitc::ATTR_KIND_ALLOC_SIZE; case Attribute::AlwaysInline: return bitc::ATTR_KIND_ALWAYS_INLINE; - case Attribute::ArgMemOnly: - return bitc::ATTR_KIND_ARGMEMONLY; case Attribute::Builtin: return bitc::ATTR_KIND_BUILTIN; case Attribute::ByVal: @@ -640,10 +640,6 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { return bitc::ATTR_KIND_HOT; case Attribute::ElementType: return bitc::ATTR_KIND_ELEMENTTYPE; - case Attribute::InaccessibleMemOnly: - return bitc::ATTR_KIND_INACCESSIBLEMEM_ONLY; - case Attribute::InaccessibleMemOrArgMemOnly: - return bitc::ATTR_KIND_INACCESSIBLEMEM_OR_ARGMEMONLY; case Attribute::InlineHint: return bitc::ATTR_KIND_INLINE_HINT; case Attribute::InReg: @@ -1955,6 +1951,15 @@ void ModuleBitcodeWriter::writeDIModule(const DIModule *N, Record.clear(); } +void ModuleBitcodeWriter::writeDIAssignID(const DIAssignID *N, + SmallVectorImpl &Record, + unsigned Abbrev) { + // There are no arguments for this metadata type. + Record.push_back(N->isDistinct()); + Stream.EmitRecord(bitc::METADATA_ASSIGN_ID, Record, Abbrev); + Record.clear(); +} + void ModuleBitcodeWriter::writeDITemplateTypeParameter( const DITemplateTypeParameter *N, SmallVectorImpl &Record, unsigned Abbrev) { diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index 8b46e9580729a..72262b4423fc1 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -610,6 +610,9 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { } case TargetLoweringBase::AtomicExpansionKind::NotAtomic: return lowerAtomicRMWInst(AI); + case TargetLoweringBase::AtomicExpansionKind::Expand: + TLI->emitExpandAtomicRMW(AI); + return true; default: llvm_unreachable("Unhandled case in tryExpandAtomicRMW"); } diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index b9ffb8a8318fb..cf2b32c74eb5a 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -287,7 +287,7 @@ using SetOfInstrs = SmallPtrSet; using TypeIsSExt = PointerIntPair; using InstrToOrigTy = DenseMap; using SExts = SmallVector; -using ValueToSExts = DenseMap; +using ValueToSExts = MapVector; class TypePromotionTransaction; diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 1fea2607c061f..a233936ae9dae 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -5190,6 +5190,38 @@ bool CombinerHelper::matchRedundantNegOperands(MachineInstr &MI, return true; } +bool CombinerHelper::matchFsubToFneg(MachineInstr &MI, Register &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_FSUB); + + Register LHS = MI.getOperand(1).getReg(); + MatchInfo = MI.getOperand(2).getReg(); + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + + const auto LHSCst = Ty.isVector() + ? getFConstantSplat(LHS, MRI, /* allowUndef */ true) + : getFConstantVRegValWithLookThrough(LHS, MRI); + if (!LHSCst) + return false; + + // -0.0 is always allowed + if (LHSCst->Value.isNegZero()) + return true; + + // +0.0 is only allowed if nsz is set. + if (LHSCst->Value.isPosZero()) + return MI.getFlag(MachineInstr::FmNsz); + + return false; +} + +void CombinerHelper::applyFsubToFneg(MachineInstr &MI, Register &MatchInfo) { + Builder.setInstrAndDebugLoc(MI); + Register Dst = MI.getOperand(0).getReg(); + Builder.buildFNeg( + Dst, Builder.buildFCanonicalize(MRI.getType(Dst), MatchInfo).getReg(0)); + eraseInst(MI); +} + /// Checks if \p MI is TargetOpcode::G_FMUL and contractable either /// due to global flags or MachineInstr flags. static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally) { diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 69fb5bce632e8..7faae09220cc9 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2481,7 +2481,8 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { ? getLLTForMVT(Info.memVT.getSimpleVT()) : LLT::scalar(Info.memVT.getStoreSizeInBits()); MIB.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Info.ptrVal), - Info.flags, MemTy, Alignment)); + Info.flags, MemTy, Alignment, + CI.getAAMetadata())); } return true; diff --git a/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp b/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp index a2abe71a6bd7b..35c9aebc119c6 100644 --- a/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp +++ b/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp @@ -62,7 +62,8 @@ std::string VRegRenamer::getInstructionOpcodeHash(MachineInstr &MI) { /* HashConstantPoolIndices */ true, /* HashMemOperands */ true); assert(Hash && "Expected non-zero Hash"); - return std::to_string(Hash).substr(0, 5); + OS << format_hex_no_prefix(Hash, 16, true); + return OS.str(); } // Gets a hashable artifact from a given MachineOperand (ie an unsigned). @@ -132,7 +133,8 @@ std::string VRegRenamer::getInstructionOpcodeHash(MachineInstr &MI) { } auto HashMI = hash_combine_range(MIOperands.begin(), MIOperands.end()); - return std::to_string(HashMI).substr(0, 5); + OS << format_hex_no_prefix(HashMI, 16, true); + return OS.str(); } unsigned VRegRenamer::createVirtualRegister(unsigned VReg) { diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index 4884ac9417204..7bbc347a8cf88 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -201,6 +201,18 @@ static cl::opt TriangleChainCount( cl::init(2), cl::Hidden); +// Use case: When block layout is visualized after MBP pass, the basic blocks +// are labeled in layout order; meanwhile blocks could be numbered in a +// different order. It's hard to map between the graph and pass output. +// With this option on, the basic blocks are renumbered in function layout +// order. For debugging only. +static cl::opt RenumberBlocksBeforeView( + "renumber-blocks-before-view", + cl::desc( + "If true, basic blocks are re-numbered before MBP layout is printed " + "into a dot graph. Only used when a function is being printed."), + cl::init(false), cl::Hidden); + extern cl::opt EnableExtTspBlockPlacement; extern cl::opt ApplyExtTspWithoutProfile; @@ -3466,6 +3478,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { if (ViewBlockLayoutWithBFI != GVDT_None && (ViewBlockFreqFuncName.empty() || F->getFunction().getName().equals(ViewBlockFreqFuncName))) { + if (RenumberBlocksBeforeView) + MF.RenumberBlocks(); MBFI->view("MBP." + MF.getName(), false); } diff --git a/llvm/lib/CodeGen/MachineCSE.cpp b/llvm/lib/CodeGen/MachineCSE.cpp index 2de5879e26b09..3a8c80cbddf68 100644 --- a/llvm/lib/CodeGen/MachineCSE.cpp +++ b/llvm/lib/CodeGen/MachineCSE.cpp @@ -145,7 +145,7 @@ namespace { DenseMap &OpenChildren); bool PerformCSE(MachineDomTreeNode *Node); - bool isPRECandidate(MachineInstr *MI); + bool isPRECandidate(MachineInstr *MI, SmallSet &PhysRefs); bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB); bool PerformSimplePRE(MachineDominatorTree *DT); /// Heuristics to see if it's profitable to move common computations of MBB @@ -798,7 +798,8 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { // We use stronger checks for PRE candidate rather than for CSE ones to embrace // checks inside ProcessBlockCSE(), not only inside isCSECandidate(). This helps // to exclude instrs created by PRE that won't be CSEed later. -bool MachineCSE::isPRECandidate(MachineInstr *MI) { +bool MachineCSE::isPRECandidate(MachineInstr *MI, + SmallSet &PhysRefs) { if (!isCSECandidate(MI) || MI->isNotDuplicable() || MI->mayLoad() || @@ -807,13 +808,14 @@ bool MachineCSE::isPRECandidate(MachineInstr *MI) { MI->getNumExplicitDefs() != 1) return false; - for (const auto &def : MI->defs()) - if (!Register::isVirtualRegister(def.getReg())) - return false; - - for (const auto &use : MI->uses()) - if (use.isReg() && !Register::isVirtualRegister(use.getReg())) - return false; + for (const MachineOperand &MO : MI->operands()) { + if (MO.isReg() && !Register::isVirtualRegister(MO.getReg())) { + if (MO.isDef()) + return false; + else + PhysRefs.insert(MO.getReg()); + } + } return true; } @@ -822,7 +824,8 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT, MachineBasicBlock *MBB) { bool Changed = false; for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) { - if (!isPRECandidate(&MI)) + SmallSet PhysRefs; + if (!isPRECandidate(&MI, PhysRefs)) continue; if (!PREMap.count(&MI)) { @@ -858,6 +861,15 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT, if (MI.isConvergent() && CMBB != MBB) continue; + // If this instruction uses physical registers then we can only do PRE + // if it's using the value that is live at the place we're hoisting to. + bool NonLocal; + PhysDefVector PhysDefs; + if (!PhysRefs.empty() && + !PhysRegDefsReach(&*(CMBB->getFirstTerminator()), &MI, PhysRefs, + PhysDefs, NonLocal)) + continue; + assert(MI.getOperand(0).isDef() && "First operand of instr with one explicit def must be this def"); Register VReg = MI.getOperand(0).getReg(); diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index 721bd52448ace..3333cbd109586 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -2277,20 +2277,28 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep, assert(!OffsetSIsScalable && !OffsetDIsScalable && "Expected offsets to be byte offsets"); - if (!BaseOpS->isIdenticalTo(*BaseOpD)) + MachineInstr *DefS = MRI.getVRegDef(BaseOpS->getReg()); + MachineInstr *DefD = MRI.getVRegDef(BaseOpD->getReg()); + if (!DefS || !DefD || !DefS->isPHI() || !DefD->isPHI()) + return true; + + unsigned InitValS = 0; + unsigned LoopValS = 0; + unsigned InitValD = 0; + unsigned LoopValD = 0; + getPhiRegs(*DefS, BB, InitValS, LoopValS); + getPhiRegs(*DefD, BB, InitValD, LoopValD); + MachineInstr *InitDefS = MRI.getVRegDef(InitValS); + MachineInstr *InitDefD = MRI.getVRegDef(InitValD); + + if (!InitDefS->isIdenticalTo(*InitDefD)) return true; // Check that the base register is incremented by a constant value for each // iteration. - MachineInstr *Def = MRI.getVRegDef(BaseOpS->getReg()); - if (!Def || !Def->isPHI()) - return true; - unsigned InitVal = 0; - unsigned LoopVal = 0; - getPhiRegs(*Def, BB, InitVal, LoopVal); - MachineInstr *LoopDef = MRI.getVRegDef(LoopVal); + MachineInstr *LoopDefS = MRI.getVRegDef(LoopValS); int D = 0; - if (!LoopDef || !TII->getIncrementValue(*LoopDef, D)) + if (!LoopDefS || !TII->getIncrementValue(*LoopDefS, D)) return true; uint64_t AccessSizeS = (*SI->memoperands_begin())->getSize(); diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index 6ef36d86891a1..74e4ad0562b4d 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -61,6 +61,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/ModRef.h" #include "llvm/InitializePasses.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCAsmInfo.h" @@ -1474,10 +1475,9 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { bool NoSideEffects = MI->getOpcode() == TargetOpcode::G_INTRINSIC; unsigned IntrID = IntrIDOp.getIntrinsicID(); if (IntrID != 0 && IntrID < Intrinsic::num_intrinsics) { - AttributeList Attrs - = Intrinsic::getAttributes(MF->getFunction().getContext(), - static_cast(IntrID)); - bool DeclHasSideEffects = !Attrs.hasFnAttr(Attribute::ReadNone); + AttributeList Attrs = Intrinsic::getAttributes( + MF->getFunction().getContext(), static_cast(IntrID)); + bool DeclHasSideEffects = !Attrs.getMemoryEffects().doesNotAccessMemory(); if (NoSideEffects && DeclHasSideEffects) { report("G_INTRINSIC used with intrinsic that accesses memory", MI); break; diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 7d139fce7f758..0112a401a8ea6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10380,103 +10380,106 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { if (!C1 || !C2) return SDValue(); + if (CondVT != MVT::i1 || LegalOperations) { + // fold (select Cond, 0, 1) -> (xor Cond, 1) + // We can't do this reliably if integer based booleans have different contents + // to floating point based booleans. This is because we can't tell whether we + // have an integer-based boolean or a floating-point-based boolean unless we + // can find the SETCC that produced it and inspect its operands. This is + // fairly easy if C is the SETCC node, but it can potentially be + // undiscoverable (or not reasonably discoverable). For example, it could be + // in another basic block or it could require searching a complicated + // expression. + if (CondVT.isInteger() && + TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) == + TargetLowering::ZeroOrOneBooleanContent && + TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) == + TargetLowering::ZeroOrOneBooleanContent && + C1->isZero() && C2->isOne()) { + SDValue NotCond = + DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT)); + if (VT.bitsEq(CondVT)) + return NotCond; + return DAG.getZExtOrTrunc(NotCond, DL, VT); + } + + return SDValue(); + } + // Only do this before legalization to avoid conflicting with target-specific // transforms in the other direction (create a select from a zext/sext). There // is also a target-independent combine here in DAGCombiner in the other // direction for (select Cond, -1, 0) when the condition is not i1. - if (CondVT == MVT::i1 && !LegalOperations) { - // select Cond, 1, 0 --> zext (Cond) - if (C1->isOne() && C2->isZero()) - return DAG.getZExtOrTrunc(Cond, DL, VT); - - // select Cond, -1, 0 --> sext (Cond) - if (C1->isAllOnes() && C2->isZero()) - return DAG.getSExtOrTrunc(Cond, DL, VT); - - // select Cond, 0, 1 --> zext (!Cond) - if (C1->isZero() && C2->isOne()) { - SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); - NotCond = DAG.getZExtOrTrunc(NotCond, DL, VT); - return NotCond; - } - - // select Cond, 0, -1 --> sext (!Cond) - if (C1->isZero() && C2->isAllOnes()) { - SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); - NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT); - return NotCond; - } - - // Use a target hook because some targets may prefer to transform in the - // other direction. - if (shouldConvertSelectOfConstantsToMath(Cond, VT, TLI)) { - // For any constants that differ by 1, we can transform the select into - // an extend and add. - const APInt &C1Val = C1->getAPIntValue(); - const APInt &C2Val = C2->getAPIntValue(); - - // select Cond, C1, C1-1 --> add (zext Cond), C1-1 - if (C1Val - 1 == C2Val) { - Cond = DAG.getZExtOrTrunc(Cond, DL, VT); - return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); - } - - // select Cond, C1, C1+1 --> add (sext Cond), C1+1 - if (C1Val + 1 == C2Val) { - Cond = DAG.getSExtOrTrunc(Cond, DL, VT); - return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); - } + assert(CondVT == MVT::i1 && !LegalOperations); - // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2) - if (C1Val.isPowerOf2() && C2Val.isZero()) { - Cond = DAG.getZExtOrTrunc(Cond, DL, VT); - SDValue ShAmtC = - DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL); - return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC); - } + // select Cond, 1, 0 --> zext (Cond) + if (C1->isOne() && C2->isZero()) + return DAG.getZExtOrTrunc(Cond, DL, VT); - // select Cond, -1, C --> or (sext Cond), C - if (C1->isAllOnes()) { - Cond = DAG.getSExtOrTrunc(Cond, DL, VT); - return DAG.getNode(ISD::OR, DL, VT, Cond, N2); - } + // select Cond, -1, 0 --> sext (Cond) + if (C1->isAllOnes() && C2->isZero()) + return DAG.getSExtOrTrunc(Cond, DL, VT); - // select Cond, C, -1 --> or (sext (not Cond)), C - if (C2->isAllOnes()) { - SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); - NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT); - return DAG.getNode(ISD::OR, DL, VT, NotCond, N1); - } + // select Cond, 0, 1 --> zext (!Cond) + if (C1->isZero() && C2->isOne()) { + SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); + NotCond = DAG.getZExtOrTrunc(NotCond, DL, VT); + return NotCond; + } - if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG)) - return V; - } + // select Cond, 0, -1 --> sext (!Cond) + if (C1->isZero() && C2->isAllOnes()) { + SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); + NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT); + return NotCond; + } + // Use a target hook because some targets may prefer to transform in the + // other direction. + if (!shouldConvertSelectOfConstantsToMath(Cond, VT, TLI)) return SDValue(); + + // For any constants that differ by 1, we can transform the select into + // an extend and add. + const APInt &C1Val = C1->getAPIntValue(); + const APInt &C2Val = C2->getAPIntValue(); + + // select Cond, C1, C1-1 --> add (zext Cond), C1-1 + if (C1Val - 1 == C2Val) { + Cond = DAG.getZExtOrTrunc(Cond, DL, VT); + return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); } - // fold (select Cond, 0, 1) -> (xor Cond, 1) - // We can't do this reliably if integer based booleans have different contents - // to floating point based booleans. This is because we can't tell whether we - // have an integer-based boolean or a floating-point-based boolean unless we - // can find the SETCC that produced it and inspect its operands. This is - // fairly easy if C is the SETCC node, but it can potentially be - // undiscoverable (or not reasonably discoverable). For example, it could be - // in another basic block or it could require searching a complicated - // expression. - if (CondVT.isInteger() && - TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) == - TargetLowering::ZeroOrOneBooleanContent && - TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) == - TargetLowering::ZeroOrOneBooleanContent && - C1->isZero() && C2->isOne()) { - SDValue NotCond = - DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT)); - if (VT.bitsEq(CondVT)) - return NotCond; - return DAG.getZExtOrTrunc(NotCond, DL, VT); + // select Cond, C1, C1+1 --> add (sext Cond), C1+1 + if (C1Val + 1 == C2Val) { + Cond = DAG.getSExtOrTrunc(Cond, DL, VT); + return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); } + // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2) + if (C1Val.isPowerOf2() && C2Val.isZero()) { + Cond = DAG.getZExtOrTrunc(Cond, DL, VT); + SDValue ShAmtC = + DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL); + return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC); + } + + // select Cond, -1, C --> or (sext Cond), C + if (C1->isAllOnes()) { + Cond = DAG.getSExtOrTrunc(Cond, DL, VT); + return DAG.getNode(ISD::OR, DL, VT, Cond, N2); + } + + // select Cond, C, -1 --> or (sext (not Cond)), C + if (C2->isAllOnes()) { + SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); + NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT); + return DAG.getNode(ISD::OR, DL, VT, NotCond, N1); + } + + if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG)) + return V; + return SDValue(); } diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index b1e369d21887e..3e59d0d2b753d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -119,10 +119,6 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, } } } - if (Personality == EHPersonality::Wasm_CXX) { - WasmEHFuncInfo &EHInfo = *MF->getWasmEHFuncInfo(); - calculateWasmEHInfo(&fn, EHInfo); - } // Initialize the mapping of values to registers. This is only set up for // instruction values that are used outside of the block that defines @@ -323,10 +319,10 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, const auto *BB = CME.Handler.get(); CME.Handler = MBBMap[BB]; } - } - - else if (Personality == EHPersonality::Wasm_CXX) { + } else if (Personality == EHPersonality::Wasm_CXX) { WasmEHFuncInfo &EHInfo = *MF->getWasmEHFuncInfo(); + calculateWasmEHInfo(&fn, EHInfo); + // Map all BB references in the Wasm EH data to MBBs. DenseMap SrcToUnwindDest; for (auto &KV : EHInfo.SrcToUnwindDest) { @@ -369,8 +365,7 @@ void FunctionLoweringInfo::clear() { /// CreateReg - Allocate a single virtual register for the given type. Register FunctionLoweringInfo::CreateReg(MVT VT, bool isDivergent) { - return RegInfo->createVirtualRegister( - MF->getSubtarget().getTargetLowering()->getRegClassFor(VT, isDivergent)); + return RegInfo->createVirtualRegister(TLI->getRegClassFor(VT, isDivergent)); } /// CreateRegs - Allocate the appropriate number of virtual registers of @@ -381,8 +376,6 @@ Register FunctionLoweringInfo::CreateReg(MVT VT, bool isDivergent) { /// will assign registers for each member or element. /// Register FunctionLoweringInfo::CreateRegs(Type *Ty, bool isDivergent) { - const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); - SmallVector ValueVTs; ComputeValueVTs(*TLI, MF->getDataLayout(), Ty, ValueVTs); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 331149bdd05f5..a63e85a2863e5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -398,10 +398,9 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); - // If the element type of the source/dest vectors are the same, but the - // parts vector has more elements than the value vector, then we have a - // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the - // elements we want. + // If the parts vector has more elements than the value vector, then we + // have a vector widening case (e.g. <2 x float> -> <4 x float>). + // Extract the elements we want. if (PartEVT.getVectorElementCount() != ValueVT.getVectorElementCount()) { assert((PartEVT.getVectorElementCount().getKnownMinValue() > ValueVT.getVectorElementCount().getKnownMinValue()) && @@ -415,6 +414,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, DAG.getVectorIdxConstant(0, DL)); if (PartEVT == ValueVT) return Val; + if (PartEVT.isInteger() && ValueVT.isFloatingPoint()) + return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); } // Promoted vector extract diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 11dd2e3dcd0f5..0b760ac652d2e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -321,7 +321,7 @@ static void reservePreviousStackSlotForValue(const Value *IncomingValue, /// reference lowered call result static std::pair lowerCallFromStatepointLoweringInfo( SelectionDAGBuilder::StatepointLoweringInfo &SI, - SelectionDAGBuilder &Builder, SmallVectorImpl &PendingExports) { + SelectionDAGBuilder &Builder) { SDValue ReturnValue, CallEndVal; std::tie(ReturnValue, CallEndVal) = Builder.lowerInvokable(SI.CLI, SI.EHPadBB); @@ -770,8 +770,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( // Get call node, we will replace it later with statepoint SDValue ReturnVal; SDNode *CallNode; - std::tie(ReturnVal, CallNode) = - lowerCallFromStatepointLoweringInfo(SI, *this, PendingExports); + std::tie(ReturnVal, CallNode) = lowerCallFromStatepointLoweringInfo(SI, *this); // Construct the actual GC_TRANSITION_START, STATEPOINT, and GC_TRANSITION_END // nodes with all the appropriate arguments and return values. diff --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp index 1bc03f0dfd833..cea97a30828ef 100644 --- a/llvm/lib/CodeGen/ValueTypes.cpp +++ b/llvm/lib/CodeGen/ValueTypes.cpp @@ -232,6 +232,8 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { return FixedVectorType::get(Type::getInt1Ty(Context), 512); case MVT::v1024i1: return FixedVectorType::get(Type::getInt1Ty(Context), 1024); + case MVT::v2048i1: + return FixedVectorType::get(Type::getInt1Ty(Context), 2048); case MVT::v128i2: return FixedVectorType::get(Type::getIntNTy(Context, 2), 128); case MVT::v256i2: diff --git a/llvm/lib/DebugInfo/LogicalView/Core/LVScope.cpp b/llvm/lib/DebugInfo/LogicalView/Core/LVScope.cpp index dd5530e07330a..f012bb471be2e 100644 --- a/llvm/lib/DebugInfo/LogicalView/Core/LVScope.cpp +++ b/llvm/lib/DebugInfo/LogicalView/Core/LVScope.cpp @@ -1192,7 +1192,8 @@ void LVScopeCompileUnit::addSize(LVScope *Scope, LVOffset Lower, LVOffset Upper) { LLVM_DEBUG({ dbgs() << format( - "CU [0x%08x], Scope [0x%08x], Range [0x%08x:0x%08x], Size = %d\n", + "CU [0x%08" PRIx64 "], Scope [0x%08" PRIx64 "], Range [0x%08" PRIx64 + ":0x%08" PRIx64 "], Size = %" PRId64 "\n", getOffset(), Scope->getOffset(), Lower, Upper, Upper - Lower); }); @@ -1548,7 +1549,7 @@ void LVScopeCompileUnit::printScopeSize(const LVScope *Scope, raw_ostream &OS) { // implementation-defined rounding inside printing functions. float Percentage = rint((float(Size) / CUContributionSize) * 100.0 * 100.0) / 100.0; - OS << format("%10d (%6.2f%%) : ", Size, Percentage); + OS << format("%10" PRId64 " (%6.2f%%) : ", Size, Percentage); Scope->print(OS); // Keep record of the total sizes at each lexical level. diff --git a/llvm/lib/Debuginfod/Debuginfod.cpp b/llvm/lib/Debuginfod/Debuginfod.cpp index ee5cc5141f74f..f20b5bc677e00 100644 --- a/llvm/lib/Debuginfod/Debuginfod.cpp +++ b/llvm/lib/Debuginfod/Debuginfod.cpp @@ -22,6 +22,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Debuginfod/Debuginfod.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/Magic.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" @@ -34,6 +35,7 @@ #include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" #include "llvm/Support/FileUtilities.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include "llvm/Support/ThreadPool.h" #include "llvm/Support/xxhash.h" @@ -169,6 +171,44 @@ Error StreamedHTTPResponseHandler::handleBodyChunk(StringRef BodyChunk) { return Error::success(); } +// An over-accepting simplification of the HTTP RFC 7230 spec. +static bool isHeader(StringRef S) { + StringRef Name; + StringRef Value; + std::tie(Name, Value) = S.split(':'); + if (Name.empty() || Value.empty()) + return false; + return all_of(Name, [](char C) { return llvm::isPrint(C) && C != ' '; }) && + all_of(Value, [](char C) { return llvm::isPrint(C) || C == '\t'; }); +} + +static SmallVector getHeaders() { + const char *Filename = getenv("DEBUGINFOD_HEADERS_FILE"); + if (!Filename) + return {}; + ErrorOr> HeadersFile = + MemoryBuffer::getFile(Filename, /*IsText=*/true); + if (!HeadersFile) + return {}; + + SmallVector Headers; + uint64_t LineNumber = 0; + for (StringRef Line : llvm::split((*HeadersFile)->getBuffer(), '\n')) { + LineNumber++; + if (!isHeader(Line)) { + if (!all_of(Line, llvm::isSpace)) + WithColor::warning() + << "could not parse debuginfod header: " << Filename << ':' + << LineNumber << '\n'; + continue; + } + if (Line.back() == '\r') + Line = Line.drop_back(); + Headers.emplace_back(Line); + } + return Headers; +} + Expected getCachedOrDownloadArtifact( StringRef UniqueKey, StringRef UrlPath, StringRef CacheDirectoryPath, ArrayRef DebuginfodUrls, std::chrono::milliseconds Timeout) { @@ -214,6 +254,7 @@ Expected getCachedOrDownloadArtifact( StreamedHTTPResponseHandler Handler([&]() { return CacheAddStream(Task); }, Client); HTTPRequest Request(ArtifactUrl); + Request.Headers = getHeaders(); Error Err = Client.perform(Request, Handler); if (Err) return std::move(Err); diff --git a/llvm/lib/Debuginfod/HTTPClient.cpp b/llvm/lib/Debuginfod/HTTPClient.cpp index 3376eaa7cd0d2..f9201e4f96268 100644 --- a/llvm/lib/Debuginfod/HTTPClient.cpp +++ b/llvm/lib/Debuginfod/HTTPClient.cpp @@ -111,9 +111,15 @@ Error HTTPClient::perform(const HTTPRequest &Request, curl_easy_setopt(Curl, CURLOPT_URL, Url.c_str()); curl_easy_setopt(Curl, CURLOPT_FOLLOWLOCATION, Request.FollowRedirects); + curl_slist *Headers = nullptr; + for (const std::string &Header : Request.Headers) + Headers = curl_slist_append(Headers, Header.c_str()); + curl_easy_setopt(Curl, CURLOPT_HTTPHEADER, Headers); + CurlHTTPRequest CurlRequest(Handler); curl_easy_setopt(Curl, CURLOPT_WRITEDATA, &CurlRequest); CURLcode CurlRes = curl_easy_perform(Curl); + curl_slist_free_all(Headers); if (CurlRes != CURLE_OK) return joinErrors(std::move(CurlRequest.ErrorState), createStringError(errc::io_error, diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index 54ab007323302..1585170144c7c 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -310,9 +310,12 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) { << " SID: " << SectionID << " Offset: " << format("%p", (uintptr_t)Addr) << " flags: " << *FlagsOrErr << "\n"); - if (!Name.empty()) // Skip absolute symbol relocations. - GlobalSymbolTable[Name] = - SymbolTableEntry(SectionID, Addr, *JITSymFlags); + // Skip absolute symbol relocations. + if (!Name.empty()) { + auto Result = GlobalSymbolTable.insert_or_assign( + Name, SymbolTableEntry(SectionID, Addr, *JITSymFlags)); + processNewSymbol(*I, Result.first->getValue()); + } } else if (SymType == object::SymbolRef::ST_Function || SymType == object::SymbolRef::ST_Data || SymType == object::SymbolRef::ST_Unknown || @@ -344,9 +347,12 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) { << " SID: " << SectionID << " Offset: " << format("%p", (uintptr_t)SectOffset) << " flags: " << *FlagsOrErr << "\n"); - if (!Name.empty()) // Skip absolute symbol relocations - GlobalSymbolTable[Name] = - SymbolTableEntry(SectionID, SectOffset, *JITSymFlags); + // Skip absolute symbol relocations. + if (!Name.empty()) { + auto Result = GlobalSymbolTable.insert_or_assign( + Name, SymbolTableEntry(SectionID, SectOffset, *JITSymFlags)); + processNewSymbol(*I, Result.first->getValue()); + } } } @@ -632,6 +638,11 @@ Error RuntimeDyldImpl::computeTotalAllocSize(const ObjectFile &Obj, RWDataAlign = std::max(RWDataAlign, CommonAlign); } + if (!CodeSectionSizes.empty()) { + // Add 64 bytes for a potential IFunc resolver stub + CodeSectionSizes.push_back(64); + } + // Compute the required allocation space for each different type of sections // (code, read-only data, read-write data) assuming that all sections are // allocated with the max alignment. Note that we cannot compute with the diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index b5a64a70a89a4..f343bec642756 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -2292,18 +2292,75 @@ RelocationEntry RuntimeDyldELF::computeGOTOffsetRE(uint64_t GOTOffset, return RelocationEntry(GOTSectionID, GOTOffset, Type, SymbolOffset); } +void RuntimeDyldELF::processNewSymbol(const SymbolRef &ObjSymbol, SymbolTableEntry& Symbol) { + // This should never return an error as `processNewSymbol` wouldn't have been + // called if getFlags() returned an error before. + auto ObjSymbolFlags = cantFail(ObjSymbol.getFlags()); + + if (ObjSymbolFlags & SymbolRef::SF_Indirect) { + if (IFuncStubSectionID == 0) { + // Create a dummy section for the ifunc stubs. It will be actually + // allocated in finalizeLoad() below. + IFuncStubSectionID = Sections.size(); + Sections.push_back( + SectionEntry(".text.__llvm_IFuncStubs", nullptr, 0, 0, 0)); + // First 64B are reserverd for the IFunc resolver + IFuncStubOffset = 64; + } + + IFuncStubs.push_back(IFuncStub{IFuncStubOffset, Symbol}); + // Modify the symbol so that it points to the ifunc stub instead of to the + // resolver function. + Symbol = SymbolTableEntry(IFuncStubSectionID, IFuncStubOffset, + Symbol.getFlags()); + IFuncStubOffset += getMaxIFuncStubSize(); + } +} + Error RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj, ObjSectionToIDMap &SectionMap) { if (IsMipsO32ABI) if (!PendingRelocs.empty()) return make_error("Can't find matching LO16 reloc"); + // Create the IFunc stubs if necessary. This must be done before processing + // the GOT entries, as the IFunc stubs may create some. + if (IFuncStubSectionID != 0) { + uint8_t *IFuncStubsAddr = MemMgr.allocateCodeSection( + IFuncStubOffset, 1, IFuncStubSectionID, ".text.__llvm_IFuncStubs"); + if (!IFuncStubsAddr) + return make_error( + "Unable to allocate memory for IFunc stubs!"); + Sections[IFuncStubSectionID] = + SectionEntry(".text.__llvm_IFuncStubs", IFuncStubsAddr, IFuncStubOffset, + IFuncStubOffset, 0); + + createIFuncResolver(IFuncStubsAddr); + + LLVM_DEBUG(dbgs() << "Creating IFunc stubs SectionID: " + << IFuncStubSectionID << " Addr: " + << Sections[IFuncStubSectionID].getAddress() << '\n'); + for (auto &IFuncStub : IFuncStubs) { + auto &Symbol = IFuncStub.OriginalSymbol; + LLVM_DEBUG(dbgs() << "\tSectionID: " << Symbol.getSectionID() + << " Offset: " << format("%p", Symbol.getOffset()) + << " IFuncStubOffset: " + << format("%p\n", IFuncStub.StubOffset)); + createIFuncStub(IFuncStubSectionID, 0, IFuncStub.StubOffset, + Symbol.getSectionID(), Symbol.getOffset()); + } + + IFuncStubSectionID = 0; + IFuncStubOffset = 0; + IFuncStubs.clear(); + } + // If necessary, allocate the global offset table if (GOTSectionID != 0) { // Allocate memory for the section size_t TotalSize = CurrentGOTIndex * getGOTEntrySize(); uint8_t *Addr = MemMgr.allocateDataSection(TotalSize, getGOTEntrySize(), - GOTSectionID, ".got", false); + GOTSectionID, ".got", false); if (!Addr) return make_error("Unable to allocate memory for GOT!"); @@ -2326,7 +2383,7 @@ Error RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj, section_iterator RelocatedSection = *RelSecOrErr; ObjSectionToIDMap::iterator i = SectionMap.find(*RelocatedSection); - assert (i != SectionMap.end()); + assert(i != SectionMap.end()); SectionToGOTMap[i->second] = GOTSectionID; } } @@ -2362,6 +2419,110 @@ bool RuntimeDyldELF::isCompatibleFile(const object::ObjectFile &Obj) const { return Obj.isELF(); } +void RuntimeDyldELF::createIFuncResolver(uint8_t *Addr) const { + if (Arch == Triple::x86_64) { + // The adddres of the GOT1 entry is in %r11, the GOT2 entry is in %r11+8 + // (see createIFuncStub() for details) + // The following code first saves all registers that contain the original + // function arguments as those registers are not saved by the resolver + // function. %r11 is saved as well so that the GOT2 entry can be updated + // afterwards. Then it calls the actual IFunc resolver function whose + // address is stored in GOT2. After the resolver function returns, all + // saved registers are restored and the return value is written to GOT1. + // Finally, jump to the now resolved function. + // clang-format off + const uint8_t StubCode[] = { + 0x57, // push %rdi + 0x56, // push %rsi + 0x52, // push %rdx + 0x51, // push %rcx + 0x41, 0x50, // push %r8 + 0x41, 0x51, // push %r9 + 0x41, 0x53, // push %r11 + 0x41, 0xff, 0x53, 0x08, // call *0x8(%r11) + 0x41, 0x5b, // pop %r11 + 0x41, 0x59, // pop %r9 + 0x41, 0x58, // pop %r8 + 0x59, // pop %rcx + 0x5a, // pop %rdx + 0x5e, // pop %rsi + 0x5f, // pop %rdi + 0x49, 0x89, 0x03, // mov %rax,(%r11) + 0xff, 0xe0 // jmp *%rax + }; + // clang-format on + static_assert(sizeof(StubCode) <= 64, + "maximum size of the IFunc resolver is 64B"); + memcpy(Addr, StubCode, sizeof(StubCode)); + } else { + report_fatal_error( + "IFunc resolver is not supported for target architecture"); + } +} + +void RuntimeDyldELF::createIFuncStub(unsigned IFuncStubSectionID, + uint64_t IFuncResolverOffset, + uint64_t IFuncStubOffset, + unsigned IFuncSectionID, + uint64_t IFuncOffset) { + auto &IFuncStubSection = Sections[IFuncStubSectionID]; + auto *Addr = IFuncStubSection.getAddressWithOffset(IFuncStubOffset); + + if (Arch == Triple::x86_64) { + // The first instruction loads a PC-relative address into %r11 which is a + // GOT entry for this stub. This initially contains the address to the + // IFunc resolver. We can use %r11 here as it's caller saved but not used + // to pass any arguments. In fact, x86_64 ABI even suggests using %r11 for + // code in the PLT. The IFunc resolver will use %r11 to update the GOT + // entry. + // + // The next instruction just jumps to the address contained in the GOT + // entry. As mentioned above, we do this two-step jump by first setting + // %r11 so that the IFunc resolver has access to it. + // + // The IFunc resolver of course also needs to know the actual address of + // the actual IFunc resolver function. This will be stored in a GOT entry + // right next to the first one for this stub. So, the IFunc resolver will + // be able to call it with %r11+8. + // + // In total, two adjacent GOT entries (+relocation) and one additional + // relocation are required: + // GOT1: Address of the IFunc resolver. + // GOT2: Address of the IFunc resolver function. + // IFuncStubOffset+3: 32-bit PC-relative address of GOT1. + uint64_t GOT1 = allocateGOTEntries(2); + uint64_t GOT2 = GOT1 + getGOTEntrySize(); + + RelocationEntry RE1(GOTSectionID, GOT1, ELF::R_X86_64_64, + IFuncResolverOffset, {}); + addRelocationForSection(RE1, IFuncStubSectionID); + RelocationEntry RE2(GOTSectionID, GOT2, ELF::R_X86_64_64, IFuncOffset, {}); + addRelocationForSection(RE2, IFuncSectionID); + + const uint8_t StubCode[] = { + 0x4c, 0x8d, 0x1d, 0x00, 0x00, 0x00, 0x00, // leaq 0x0(%rip),%r11 + 0x41, 0xff, 0x23 // jmpq *(%r11) + }; + assert(sizeof(StubCode) <= getMaxIFuncStubSize() && + "IFunc stub size must not exceed getMaxIFuncStubSize()"); + memcpy(Addr, StubCode, sizeof(StubCode)); + + // The PC-relative value starts 4 bytes from the end of the leaq + // instruction, so the addend is -4. + resolveGOTOffsetRelocation(IFuncStubSectionID, IFuncStubOffset + 3, + GOT1 - 4, ELF::R_X86_64_PC32); + } else { + report_fatal_error("IFunc stub is not supported for target architecture"); + } +} + +unsigned RuntimeDyldELF::getMaxIFuncStubSize() const { + if (Arch == Triple::x86_64) { + return 10; + } + return 0; +} + bool RuntimeDyldELF::relocationNeedsGot(const RelocationRef &R) const { unsigned RelTy = R.getType(); if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be) diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h index 1251036f4caa8..fbd81e4f63bf4 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h @@ -158,6 +158,40 @@ class RuntimeDyldELF : public RuntimeDyldImpl { // Map between GOT relocation value and corresponding GOT offset std::map GOTOffsetMap; + /// The ID of the current IFunc stub section + unsigned IFuncStubSectionID = 0; + /// The current offset into the IFunc stub section + uint64_t IFuncStubOffset = 0; + + /// A IFunc stub and its original symbol + struct IFuncStub { + /// The offset of this stub in the IFunc stub section + uint64_t StubOffset; + /// The symbol table entry of the original symbol + SymbolTableEntry OriginalSymbol; + }; + + /// The IFunc stubs + SmallVector IFuncStubs; + + /// Create the code for the IFunc resolver at the given address. This code + /// works together with the stubs created in createIFuncStub() to call the + /// resolver function and then jump to the real function address. + /// It must not be larger than 64B. + void createIFuncResolver(uint8_t *Addr) const; + /// Create the code for an IFunc stub for the IFunc that is defined in + /// section IFuncSectionID at offset IFuncOffset. The IFunc resolver created + /// by createIFuncResolver() is defined in the section IFuncStubSectionID at + /// offset IFuncResolverOffset. The code should be written into the section + /// with the id IFuncStubSectionID at the offset IFuncStubOffset. + void createIFuncStub(unsigned IFuncStubSectionID, + uint64_t IFuncResolverOffset, uint64_t IFuncStubOffset, + unsigned IFuncSectionID, uint64_t IFuncOffset); + /// Return the maximum size of a stub created by createIFuncStub() + unsigned getMaxIFuncStubSize() const; + + void processNewSymbol(const SymbolRef &ObjSymbol, + SymbolTableEntry &Entry) override; bool relocationNeedsGot(const RelocationRef &R) const override; bool relocationNeedsStub(const RelocationRef &R) const override; diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h index 3940e6ea5b057..0d7ba4d822182 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h @@ -435,6 +435,10 @@ class RuntimeDyldImpl { // Return size of Global Offset Table (GOT) entry virtual size_t getGOTEntrySize() { return 0; } + // Hook for the subclasses to do further processing when a symbol is added to + // the global symbol table. This function may modify the symbol table entry. + virtual void processNewSymbol(const SymbolRef &ObjSymbol, SymbolTableEntry& Entry) {} + // Return true if the relocation R may require allocating a GOT entry. virtual bool relocationNeedsGot(const RelocationRef &R) const { return false; diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 6a3700e3adb70..0a0e6a9386e58 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -4692,17 +4692,248 @@ void OpenMPIRBuilder::OutlineInfo::collectBlocks( } } +void OpenMPIRBuilder::createOffloadEntry(bool IsTargetCodegen, Constant *ID, + Constant *Addr, uint64_t Size, + int32_t Flags, + GlobalValue::LinkageTypes) { + if (!IsTargetCodegen) { + emitOffloadingEntry(ID, Addr->getName(), Size, Flags); + return; + } + // TODO: Add support for global variables on the device after declare target + // support. + Function *Fn = dyn_cast(Addr); + if (!Fn) + return; + + Module &M = *(Fn->getParent()); + LLVMContext &Ctx = M.getContext(); + + // Get "nvvm.annotations" metadata node. + NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations"); + + Metadata *MDVals[] = { + ConstantAsMetadata::get(Fn), MDString::get(Ctx, "kernel"), + ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Ctx), 1))}; + // Append metadata to nvvm.annotations. + MD->addOperand(MDNode::get(Ctx, MDVals)); + + // Add a function attribute for the kernel. + Fn->addFnAttr(Attribute::get(Ctx, "kernel")); +} + +// We only generate metadata for function that contain target regions. +void OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata( + OffloadEntriesInfoManager &OffloadEntriesInfoManager, bool IsTargetCodegen, + bool IsEmbedded, bool HasRequiresUnifiedSharedMemory, + EmitMetadataErrorReportFunctionTy &ErrorFn) { + + // If there are no entries, we don't need to do anything. + if (OffloadEntriesInfoManager.empty()) + return; + + LLVMContext &C = M.getContext(); + SmallVector, + 16> + OrderedEntries(OffloadEntriesInfoManager.size()); + + // Auxiliary methods to create metadata values and strings. + auto &&GetMDInt = [this](unsigned V) { + return ConstantAsMetadata::get(ConstantInt::get(Builder.getInt32Ty(), V)); + }; + + auto &&GetMDString = [&C](StringRef V) { return MDString::get(C, V); }; + + // Create the offloading info metadata node. + NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); + auto &&TargetRegionMetadataEmitter = + [&C, MD, &OrderedEntries, &GetMDInt, &GetMDString]( + const TargetRegionEntryInfo &EntryInfo, + const OffloadEntriesInfoManager::OffloadEntryInfoTargetRegion &E) { + // Generate metadata for target regions. Each entry of this metadata + // contains: + // - Entry 0 -> Kind of this type of metadata (0). + // - Entry 1 -> Device ID of the file where the entry was identified. + // - Entry 2 -> File ID of the file where the entry was identified. + // - Entry 3 -> Mangled name of the function where the entry was + // identified. + // - Entry 4 -> Line in the file where the entry was identified. + // - Entry 5 -> Count of regions at this DeviceID/FilesID/Line. + // - Entry 6 -> Order the entry was created. + // The first element of the metadata node is the kind. + Metadata *Ops[] = { + GetMDInt(E.getKind()), GetMDInt(EntryInfo.DeviceID), + GetMDInt(EntryInfo.FileID), GetMDString(EntryInfo.ParentName), + GetMDInt(EntryInfo.Line), GetMDInt(EntryInfo.Count), + GetMDInt(E.getOrder())}; + + // Save this entry in the right position of the ordered entries array. + OrderedEntries[E.getOrder()] = std::make_pair(&E, EntryInfo); + + // Add metadata to the named metadata node. + MD->addOperand(MDNode::get(C, Ops)); + }; + + OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( + TargetRegionMetadataEmitter); + + // Create function that emits metadata for each device global variable entry; + auto &&DeviceGlobalVarMetadataEmitter = + [&C, &OrderedEntries, &GetMDInt, &GetMDString, MD]( + StringRef MangledName, + const OffloadEntriesInfoManager::OffloadEntryInfoDeviceGlobalVar &E) { + // Generate metadata for global variables. Each entry of this metadata + // contains: + // - Entry 0 -> Kind of this type of metadata (1). + // - Entry 1 -> Mangled name of the variable. + // - Entry 2 -> Declare target kind. + // - Entry 3 -> Order the entry was created. + // The first element of the metadata node is the kind. + Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDString(MangledName), + GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; + + // Save this entry in the right position of the ordered entries array. + TargetRegionEntryInfo varInfo(MangledName, 0, 0, 0); + OrderedEntries[E.getOrder()] = std::make_pair(&E, varInfo); + + // Add metadata to the named metadata node. + MD->addOperand(MDNode::get(C, Ops)); + }; + + OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( + DeviceGlobalVarMetadataEmitter); + + for (const auto &E : OrderedEntries) { + assert(E.first && "All ordered entries must exist!"); + if (const auto *CE = + dyn_cast( + E.first)) { + if (!CE->getID() || !CE->getAddress()) { + // Do not blame the entry if the parent funtion is not emitted. + TargetRegionEntryInfo EntryInfo = E.second; + StringRef FnName = EntryInfo.ParentName; + if (!M.getNamedValue(FnName)) + continue; + ErrorFn(EMIT_MD_TARGET_REGION_ERROR, EntryInfo); + continue; + } + createOffloadEntry(IsTargetCodegen, CE->getID(), CE->getAddress(), + /*Size=*/0, CE->getFlags(), + GlobalValue::WeakAnyLinkage); + } else if (const auto *CE = dyn_cast< + OffloadEntriesInfoManager::OffloadEntryInfoDeviceGlobalVar>( + E.first)) { + OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags = + static_cast( + CE->getFlags()); + switch (Flags) { + case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo: { + if (IsEmbedded && HasRequiresUnifiedSharedMemory) + continue; + if (!CE->getAddress()) { + ErrorFn(EMIT_MD_DECLARE_TARGET_ERROR, E.second); + continue; + } + // The vaiable has no definition - no need to add the entry. + if (CE->getVarSize() == 0) + continue; + break; + } + case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink: + assert(((IsEmbedded && !CE->getAddress()) || + (!IsEmbedded && CE->getAddress())) && + "Declaret target link address is set."); + if (IsEmbedded) + continue; + if (!CE->getAddress()) { + ErrorFn(EMIT_MD_GLOBAL_VAR_LINK_ERROR, TargetRegionEntryInfo()); + continue; + } + break; + } + + // Hidden or internal symbols on the device are not externally visible. + // We should not attempt to register them by creating an offloading + // entry. + if (auto *GV = dyn_cast(CE->getAddress())) + if (GV->hasLocalLinkage() || GV->hasHiddenVisibility()) + continue; + + createOffloadEntry(IsTargetCodegen, CE->getAddress(), CE->getAddress(), + CE->getVarSize(), Flags, CE->getLinkage()); + + } else { + llvm_unreachable("Unsupported entry kind."); + } + } +} + void TargetRegionEntryInfo::getTargetRegionEntryFnName( SmallVectorImpl &Name, StringRef ParentName, unsigned DeviceID, - unsigned FileID, unsigned Line) { + unsigned FileID, unsigned Line, unsigned Count) { raw_svector_ostream OS(Name); OS << "__omp_offloading" << llvm::format("_%x", DeviceID) << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; + if (Count) + OS << "_" << Count; } -void TargetRegionEntryInfo::getTargetRegionEntryFnName( - SmallVectorImpl &Name) { - getTargetRegionEntryFnName(Name, ParentName, DeviceID, FileID, Line); +void OffloadEntriesInfoManager::getTargetRegionEntryFnName( + SmallVectorImpl &Name, const TargetRegionEntryInfo &EntryInfo) { + unsigned NewCount = getTargetRegionEntryInfoCount(EntryInfo); + TargetRegionEntryInfo::getTargetRegionEntryFnName( + Name, EntryInfo.ParentName, EntryInfo.DeviceID, EntryInfo.FileID, + EntryInfo.Line, NewCount); +} + +/// Loads all the offload entries information from the host IR +/// metadata. +void OpenMPIRBuilder::loadOffloadInfoMetadata( + Module &M, OffloadEntriesInfoManager &OffloadEntriesInfoManager) { + // If we are in target mode, load the metadata from the host IR. This code has + // to match the metadata creation in createOffloadEntriesAndInfoMetadata(). + + NamedMDNode *MD = M.getNamedMetadata(ompOffloadInfoName); + if (!MD) + return; + + for (MDNode *MN : MD->operands()) { + auto &&GetMDInt = [MN](unsigned Idx) { + auto *V = cast(MN->getOperand(Idx)); + return cast(V->getValue())->getZExtValue(); + }; + + auto &&GetMDString = [MN](unsigned Idx) { + auto *V = cast(MN->getOperand(Idx)); + return V->getString(); + }; + + switch (GetMDInt(0)) { + default: + llvm_unreachable("Unexpected metadata!"); + break; + case OffloadEntriesInfoManager::OffloadEntryInfo:: + OffloadingEntryInfoTargetRegion: { + TargetRegionEntryInfo EntryInfo(/*ParentName=*/GetMDString(3), + /*DeviceID=*/GetMDInt(1), + /*FileID=*/GetMDInt(2), + /*Line=*/GetMDInt(4), + /*Count=*/GetMDInt(5)); + OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( + EntryInfo, /*Order=*/GetMDInt(6)); + break; + } + case OffloadEntriesInfoManager::OffloadEntryInfo:: + OffloadingEntryInfoDeviceGlobalVar: + OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( + /*MangledName=*/GetMDString(1), + static_cast( + /*Flags=*/GetMDInt(2)), + /*Order=*/GetMDInt(3)); + break; + } + } } bool OffloadEntriesInfoManager::empty() const { @@ -4710,6 +4941,21 @@ bool OffloadEntriesInfoManager::empty() const { OffloadEntriesDeviceGlobalVar.empty(); } +unsigned OffloadEntriesInfoManager::getTargetRegionEntryInfoCount( + const TargetRegionEntryInfo &EntryInfo) const { + auto It = OffloadEntriesTargetRegionCount.find( + getTargetRegionEntryCountKey(EntryInfo)); + if (It == OffloadEntriesTargetRegionCount.end()) + return 0; + return It->second; +} + +void OffloadEntriesInfoManager::incrementTargetRegionEntryInfoCount( + const TargetRegionEntryInfo &EntryInfo) { + OffloadEntriesTargetRegionCount[getTargetRegionEntryCountKey(EntryInfo)] = + EntryInfo.Count + 1; +} + /// Initialize target region entry. void OffloadEntriesInfoManager::initializeTargetRegionEntryInfo( const TargetRegionEntryInfo &EntryInfo, unsigned Order) { @@ -4720,8 +4966,13 @@ void OffloadEntriesInfoManager::initializeTargetRegionEntryInfo( } void OffloadEntriesInfoManager::registerTargetRegionEntryInfo( - const TargetRegionEntryInfo &EntryInfo, Constant *Addr, Constant *ID, + TargetRegionEntryInfo EntryInfo, Constant *Addr, Constant *ID, OMPTargetRegionEntryKind Flags, bool IsDevice) { + assert(EntryInfo.Count == 0 && "expected default EntryInfo"); + + // Update the EntryInfo with the next available count for this location. + EntryInfo.Count = getTargetRegionEntryInfoCount(EntryInfo); + // If we are emitting code for a target, the entry is already initialized, // only has to be registered. if (IsDevice) { @@ -4743,10 +4994,15 @@ void OffloadEntriesInfoManager::registerTargetRegionEntryInfo( OffloadEntriesTargetRegion[EntryInfo] = Entry; ++OffloadingEntriesNum; } + incrementTargetRegionEntryInfoCount(EntryInfo); } bool OffloadEntriesInfoManager::hasTargetRegionEntryInfo( - const TargetRegionEntryInfo &EntryInfo, bool IgnoreAddressId) const { + TargetRegionEntryInfo EntryInfo, bool IgnoreAddressId) const { + + // Update the EntryInfo with the next available count for this location. + EntryInfo.Count = getTargetRegionEntryInfoCount(EntryInfo); + auto It = OffloadEntriesTargetRegion.find(EntryInfo); if (It == OffloadEntriesTargetRegion.end()) { return false; diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index 21e662bed6b25..d49b8710bc9a4 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -1865,6 +1865,12 @@ static void writeDILocation(raw_ostream &Out, const DILocation *DL, Out << ")"; } +static void writeDIAssignID(raw_ostream &Out, const DIAssignID *DL, + AsmWriterContext &WriterCtx) { + Out << "!DIAssignID()"; + MDFieldPrinter Printer(Out, WriterCtx); +} + static void writeDISubrange(raw_ostream &Out, const DISubrange *N, AsmWriterContext &WriterCtx) { Out << "!DISubrange("; diff --git a/llvm/lib/IR/DIBuilder.cpp b/llvm/lib/IR/DIBuilder.cpp index fada07ac383ae..76d7ade09a88c 100644 --- a/llvm/lib/IR/DIBuilder.cpp +++ b/llvm/lib/IR/DIBuilder.cpp @@ -84,7 +84,9 @@ void DIBuilder::finalize() { } if (!AllEnumTypes.empty()) - CUNode->replaceEnumTypes(MDTuple::get(VMContext, AllEnumTypes)); + CUNode->replaceEnumTypes(MDTuple::get( + VMContext, SmallVector(AllEnumTypes.begin(), + AllEnumTypes.end()))); SmallVector RetainValues; // Declarations and definitions of the same type may be retained. Some @@ -556,7 +558,7 @@ DICompositeType *DIBuilder::createEnumerationType( getNonCompileUnitScope(Scope), UnderlyingType, SizeInBits, AlignInBits, 0, IsScoped ? DINode::FlagEnumClass : DINode::FlagZero, Elements, 0, nullptr, nullptr, UniqueIdentifier); - AllEnumTypes.push_back(CTy); + AllEnumTypes.emplace_back(CTy); trackIfUnresolved(CTy); return CTy; } diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp index be09d14adf0ee..d30fca63067c0 100644 --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -39,6 +39,13 @@ using namespace llvm; using namespace llvm::dwarf; +static cl::opt + ExperimentalAssignmentTracking("experimental-assignment-tracking", + cl::init(false)); +bool llvm::getEnableAssignmentTracking() { + return ExperimentalAssignmentTracking; +} + /// Finds all intrinsics declaring local variables as living in the memory that /// 'V' points to. This may include a mix of dbg.declare and /// dbg.addr intrinsics. @@ -462,9 +469,13 @@ bool llvm::stripDebugInfo(Function &F) { if (NewLoopID != LoopID) I.setMetadata(LLVMContext::MD_loop, NewLoopID); } - // Strip heapallocsite attachments, they point into the DIType system. - if (I.hasMetadataOtherThanDebugLoc()) + // Strip other attachments that are or use debug info. + if (I.hasMetadataOtherThanDebugLoc()) { + // Heapallocsites point into the DIType system. I.setMetadata("heapallocsite", nullptr); + // DIAssignID are debug info metadata primitives. + I.setMetadata(LLVMContext::MD_DIAssignID, nullptr); + } } } return Changed; diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp index 9b4f92a63c5e2..5483595a78667 100644 --- a/llvm/lib/IR/DebugInfoMetadata.cpp +++ b/llvm/lib/IR/DebugInfoMetadata.cpp @@ -1253,6 +1253,13 @@ bool DIExpression::startsWithDeref() const { return getNumElements() > 0 && getElement(0) == dwarf::DW_OP_deref; } +DIAssignID *DIAssignID::getImpl(LLVMContext &Context, StorageType Storage, + bool ShouldCreate) { + // Uniqued DIAssignID are not supported as the instance address *is* the ID. + assert(Storage != StorageType::Uniqued && "uniqued DIAssignID unsupported"); + return storeImpl(new (0u, Storage) DIAssignID(Context, Storage), Storage); +} + unsigned DIExpression::ExprOperand::getSize() const { uint64_t Op = getOp(); diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp index 66f7f84b2267c..ab6624ef5f3d7 100644 --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -51,6 +51,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/ModRef.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/SymbolTableListTraits.h" @@ -727,6 +728,65 @@ void Function::copyAttributesFrom(const Function *Src) { setPrologueData(Src->getPrologueData()); } +MemoryEffects Function::getMemoryEffects() const { + return getAttributes().getMemoryEffects(); +} +void Function::setMemoryEffects(MemoryEffects ME) { + addFnAttr(Attribute::getWithMemoryEffects(getContext(), ME)); +} + +/// Determine if the function does not access memory. +bool Function::doesNotAccessMemory() const { + return getMemoryEffects().doesNotAccessMemory(); +} +void Function::setDoesNotAccessMemory() { + setMemoryEffects(MemoryEffects::none()); +} + +/// Determine if the function does not access or only reads memory. +bool Function::onlyReadsMemory() const { + return getMemoryEffects().onlyReadsMemory(); +} +void Function::setOnlyReadsMemory() { + setMemoryEffects(getMemoryEffects() & MemoryEffects::readOnly()); +} + +/// Determine if the function does not access or only writes memory. +bool Function::onlyWritesMemory() const { + return getMemoryEffects().onlyWritesMemory(); +} +void Function::setOnlyWritesMemory() { + setMemoryEffects(getMemoryEffects() & MemoryEffects::writeOnly()); +} + +/// Determine if the call can access memmory only using pointers based +/// on its arguments. +bool Function::onlyAccessesArgMemory() const { + return getMemoryEffects().onlyAccessesArgPointees(); +} +void Function::setOnlyAccessesArgMemory() { + setMemoryEffects(getMemoryEffects() & MemoryEffects::argMemOnly()); +} + +/// Determine if the function may only access memory that is +/// inaccessible from the IR. +bool Function::onlyAccessesInaccessibleMemory() const { + return getMemoryEffects().onlyAccessesInaccessibleMem(); +} +void Function::setOnlyAccessesInaccessibleMemory() { + setMemoryEffects(getMemoryEffects() & MemoryEffects::inaccessibleMemOnly()); +} + +/// Determine if the function may only access memory that is +/// either inaccessible from the IR or pointed to by its arguments. +bool Function::onlyAccessesInaccessibleMemOrArgMem() const { + return getMemoryEffects().onlyAccessesInaccessibleOrArgMem(); +} +void Function::setOnlyAccessesInaccessibleMemOrArgMem() { + setMemoryEffects(getMemoryEffects() & + MemoryEffects::inaccessibleOrArgMemOnly()); +} + /// Table of string intrinsic names indexed by enum value. static const char * const IntrinsicNameTable[] = { "not_intrinsic", diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index 8682a938d78c4..05e225dd64cfa 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/ModRef.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" @@ -391,10 +392,12 @@ bool CallBase::hasFnAttrOnCalledFunction(StringRef Kind) const { template Attribute CallBase::getFnAttrOnCalledFunction(AK Kind) const { - // Operand bundles override attributes on the called function, but don't - // override attributes directly present on the call instruction. - if (isFnAttrDisallowedByOpBundle(Kind)) - return Attribute(); + if constexpr (std::is_same_v) { + // getMemoryEffects() correctly combines memory effects from the call-site, + // operand bundles and function. + assert(Kind != Attribute::Memory && "Use getMemoryEffects() instead"); + } + Value *V = getCalledOperand(); if (auto *CE = dyn_cast(V)) if (CE->getOpcode() == BitCast) @@ -534,6 +537,77 @@ bool CallBase::hasClobberingOperandBundles() const { getIntrinsicID() != Intrinsic::assume; } +MemoryEffects CallBase::getMemoryEffects() const { + MemoryEffects ME = getAttributes().getMemoryEffects(); + if (auto *Fn = dyn_cast(getCalledOperand())) { + MemoryEffects FnME = Fn->getMemoryEffects(); + if (hasOperandBundles()) { + // TODO: Add a method to get memory effects for operand bundles instead. + if (hasReadingOperandBundles()) + FnME |= MemoryEffects::readOnly(); + if (hasClobberingOperandBundles()) + FnME |= MemoryEffects::writeOnly(); + } + ME &= FnME; + } + return ME; +} +void CallBase::setMemoryEffects(MemoryEffects ME) { + addFnAttr(Attribute::getWithMemoryEffects(getContext(), ME)); +} + +/// Determine if the function does not access memory. +bool CallBase::doesNotAccessMemory() const { + return getMemoryEffects().doesNotAccessMemory(); +} +void CallBase::setDoesNotAccessMemory() { + setMemoryEffects(MemoryEffects::none()); +} + +/// Determine if the function does not access or only reads memory. +bool CallBase::onlyReadsMemory() const { + return getMemoryEffects().onlyReadsMemory(); +} +void CallBase::setOnlyReadsMemory() { + setMemoryEffects(getMemoryEffects() & MemoryEffects::readOnly()); +} + +/// Determine if the function does not access or only writes memory. +bool CallBase::onlyWritesMemory() const { + return getMemoryEffects().onlyWritesMemory(); +} +void CallBase::setOnlyWritesMemory() { + setMemoryEffects(getMemoryEffects() & MemoryEffects::writeOnly()); +} + +/// Determine if the call can access memmory only using pointers based +/// on its arguments. +bool CallBase::onlyAccessesArgMemory() const { + return getMemoryEffects().onlyAccessesArgPointees(); +} +void CallBase::setOnlyAccessesArgMemory() { + setMemoryEffects(getMemoryEffects() & MemoryEffects::argMemOnly()); +} + +/// Determine if the function may only access memory that is +/// inaccessible from the IR. +bool CallBase::onlyAccessesInaccessibleMemory() const { + return getMemoryEffects().onlyAccessesInaccessibleMem(); +} +void CallBase::setOnlyAccessesInaccessibleMemory() { + setMemoryEffects(getMemoryEffects() & MemoryEffects::inaccessibleMemOnly()); +} + +/// Determine if the function may only access memory that is +/// either inaccessible from the IR or pointed to by its arguments. +bool CallBase::onlyAccessesInaccessibleMemOrArgMem() const { + return getMemoryEffects().onlyAccessesInaccessibleOrArgMem(); +} +void CallBase::setOnlyAccessesInaccessibleMemOrArgMem() { + setMemoryEffects(getMemoryEffects() & + MemoryEffects::inaccessibleOrArgMemOnly()); +} + //===----------------------------------------------------------------------===// // CallInst Implementation //===----------------------------------------------------------------------===// diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp index 8ca75f58e4033..b6537b2077ebe 100644 --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -112,10 +112,23 @@ static ValueAsMetadata *getAsMetadata(Value *V) { void DbgVariableIntrinsic::replaceVariableLocationOp(Value *OldValue, Value *NewValue) { + // If OldValue is used as the address part of a dbg.assign intrinsic replace + // it with NewValue and return true. + auto ReplaceDbgAssignAddress = [this, OldValue, NewValue]() -> bool { + auto *DAI = dyn_cast(this); + if (!DAI || OldValue != DAI->getAddress()) + return false; + DAI->setAddress(NewValue); + return true; + }; + bool DbgAssignAddrReplaced = ReplaceDbgAssignAddress(); + (void)DbgAssignAddrReplaced; + assert(NewValue && "Values must be non-null"); auto Locations = location_ops(); auto OldIt = find(Locations, OldValue); - assert(OldIt != Locations.end() && "OldValue must be a current location"); + assert((OldIt != Locations.end() || DbgAssignAddrReplaced) && + "OldValue must be a current location"); if (!hasArgList()) { Value *NewOperand = isa(NewValue) ? NewValue @@ -172,6 +185,32 @@ Optional DbgVariableIntrinsic::getFragmentSizeInBits() const { return getVariable()->getSizeInBits(); } +Value *DbgAssignIntrinsic::getAddress() const { + auto *MD = getRawAddress(); + if (auto *V = dyn_cast(MD)) + return V->getValue(); + + // When the value goes to null, it gets replaced by an empty MDNode. + assert(!cast(MD)->getNumOperands() && "Expected an empty MDNode"); + return nullptr; +} + +void DbgAssignIntrinsic::setAssignId(DIAssignID *New) { + setOperand(OpAssignID, MetadataAsValue::get(getContext(), New)); +} + +void DbgAssignIntrinsic::setAddress(Value *V) { + assert(V->getType()->isPointerTy() && + "Destination Component must be a pointer type"); + setOperand(OpAddress, + MetadataAsValue::get(getContext(), ValueAsMetadata::get(V))); +} + +void DbgAssignIntrinsic::setValue(Value *V) { + setOperand(OpValue, + MetadataAsValue::get(getContext(), ValueAsMetadata::get(V))); +} + int llvm::Intrinsic::lookupLLVMIntrinsicByName(ArrayRef NameTable, StringRef Name) { assert(Name.startswith("llvm.")); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 0614f206981a1..ab6730c578a7b 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -471,6 +471,7 @@ class Verifier : public InstVisitor, VerifierSupport { void visitCallStackMetadata(MDNode *MD); void visitMemProfMetadata(Instruction &I, MDNode *MD); void visitCallsiteMetadata(Instruction &I, MDNode *MD); + void visitDIAssignIDMetadata(Instruction &I, MDNode *MD); void visitAnnotationMetadata(MDNode *Annotation); void visitAliasScopeMetadata(const MDNode *MD); void visitAliasScopeListMetadata(const MDNode *MD); @@ -815,9 +816,18 @@ void Verifier::visitAliaseeSubExpr(const GlobalAlias &GA, const Constant &C) { void Verifier::visitAliaseeSubExpr(SmallPtrSetImpl &Visited, const GlobalAlias &GA, const Constant &C) { - if (const auto *GV = dyn_cast(&C)) { - Check(!GV->isDeclarationForLinker(), "Alias must point to a definition", + if (GA.hasAvailableExternallyLinkage()) { + Check(isa(C) && + cast(C).hasAvailableExternallyLinkage(), + "available_externally alias must point to available_externally " + "global value", &GA); + } + if (const auto *GV = dyn_cast(&C)) { + if (!GA.hasAvailableExternallyLinkage()) { + Check(!GV->isDeclarationForLinker(), "Alias must point to a definition", + &GA); + } if (const auto *GA2 = dyn_cast(GV)) { Check(Visited.insert(GA2).second, "Aliases cannot form a cycle", &GA); @@ -846,7 +856,7 @@ void Verifier::visitAliaseeSubExpr(SmallPtrSetImpl &Visited, void Verifier::visitGlobalAlias(const GlobalAlias &GA) { Check(GlobalAlias::isValidLinkage(GA.getLinkage()), "Alias should have private, internal, linkonce, weak, linkonce_odr, " - "weak_odr, or external linkage!", + "weak_odr, external, or available_externally linkage!", &GA); const Constant *Aliasee = GA.getAliasee(); Check(Aliasee, "Aliasee cannot be NULL!", &GA); @@ -1483,6 +1493,11 @@ void Verifier::visitDILocalVariable(const DILocalVariable &N) { CheckDI(!isa(Ty), "invalid type", &N, N.getType()); } +void Verifier::visitDIAssignID(const DIAssignID &N) { + CheckDI(!N.getNumOperands(), "DIAssignID has no arguments", &N); + CheckDI(N.isDistinct(), "DIAssignID must be distinct", &N); +} + void Verifier::visitDILabel(const DILabel &N) { if (auto *S = N.getRawScope()) CheckDI(isa(S), "invalid scope", &N, S); @@ -2021,28 +2036,6 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs, "' does not apply to functions!", V); - Check(!(Attrs.hasFnAttr(Attribute::ReadNone) && - Attrs.hasFnAttr(Attribute::ReadOnly)), - "Attributes 'readnone and readonly' are incompatible!", V); - - Check(!(Attrs.hasFnAttr(Attribute::ReadNone) && - Attrs.hasFnAttr(Attribute::WriteOnly)), - "Attributes 'readnone and writeonly' are incompatible!", V); - - Check(!(Attrs.hasFnAttr(Attribute::ReadOnly) && - Attrs.hasFnAttr(Attribute::WriteOnly)), - "Attributes 'readonly and writeonly' are incompatible!", V); - - Check(!(Attrs.hasFnAttr(Attribute::ReadNone) && - Attrs.hasFnAttr(Attribute::InaccessibleMemOrArgMemOnly)), - "Attributes 'readnone and inaccessiblemem_or_argmemonly' are " - "incompatible!", - V); - - Check(!(Attrs.hasFnAttr(Attribute::ReadNone) && - Attrs.hasFnAttr(Attribute::InaccessibleMemOnly)), - "Attributes 'readnone and inaccessiblememonly' are incompatible!", V); - Check(!(Attrs.hasFnAttr(Attribute::NoInline) && Attrs.hasFnAttr(Attribute::AlwaysInline)), "Attributes 'noinline and alwaysinline' are incompatible!", V); @@ -2879,6 +2872,8 @@ void Verifier::visitSwitchInst(SwitchInst &SI) { Type *SwitchTy = SI.getCondition()->getType(); SmallPtrSet Constants; for (auto &Case : SI.cases()) { + Check(isa(SI.getOperand(Case.getCaseIndex() * 2 + 2)), + "Case value is not a constant integer.", &SI); Check(Case.getCaseValue()->getType() == SwitchTy, "Switch constants must all be same type as switch value!", &SI); Check(Constants.insert(Case.getCaseValue()).second, @@ -4549,6 +4544,23 @@ void Verifier::visitProfMetadata(Instruction &I, MDNode *MD) { } } +void Verifier::visitDIAssignIDMetadata(Instruction &I, MDNode *MD) { + assert(I.hasMetadata(LLVMContext::MD_DIAssignID)); + bool ExpectedInstTy = + isa(I) || isa(I) || isa(I); + CheckDI(ExpectedInstTy, "!DIAssignID attached to unexpected instruction kind", + I, MD); + // Iterate over the MetadataAsValue uses of the DIAssignID - these should + // only be found as DbgAssignIntrinsic operands. + if (auto *AsValue = MetadataAsValue::getIfExists(Context, MD)) { + for (auto *User : AsValue->users()) { + CheckDI(isa(User), + "!DIAssignID should only be used by llvm.dbg.assign intrinsics", + MD, User); + } + } +} + void Verifier::visitCallStackMetadata(MDNode *MD) { // Call stack metadata should consist of a list of at least 1 constant int // (representing a hash of the location). @@ -4850,6 +4862,9 @@ void Verifier::visitInstruction(Instruction &I) { if (MDNode *MD = I.getMetadata(LLVMContext::MD_callsite)) visitCallsiteMetadata(I, MD); + if (MDNode *MD = I.getMetadata(LLVMContext::MD_DIAssignID)) + visitDIAssignIDMetadata(I, MD); + if (MDNode *Annotation = I.getMetadata(LLVMContext::MD_annotation)) visitAnnotationMetadata(Annotation); @@ -5026,6 +5041,9 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { case Intrinsic::dbg_value: // llvm.dbg.value visitDbgIntrinsic("value", cast(Call)); break; + case Intrinsic::dbg_assign: // llvm.dbg.assign + visitDbgIntrinsic("assign", cast(Call)); + break; case Intrinsic::dbg_label: // llvm.dbg.label visitDbgLabelIntrinsic("label", cast(Call)); break; @@ -5989,6 +6007,18 @@ void Verifier::visitDbgIntrinsic(StringRef Kind, DbgVariableIntrinsic &DII) { "invalid llvm.dbg." + Kind + " intrinsic expression", &DII, DII.getRawExpression()); + if (auto *DAI = dyn_cast(&DII)) { + CheckDI(isa(DAI->getRawAssignID()), + "invalid llvm.dbg.assign intrinsic DIAssignID", &DII, + DAI->getRawAssignID()); + CheckDI(isa(DAI->getRawAddress()), + "invalid llvm.dbg.assign intrinsic address)", &DII, + DAI->getRawAddress()); + CheckDI(isa(DAI->getRawAddressExpression()), + "invalid llvm.dbg.assign intrinsic address expression", &DII, + DAI->getRawAddressExpression()); + } + // Ignore broken !dbg attachments; they're checked elsewhere. if (MDNode *N = DII.getDebugLoc().getAsMDNode()) if (!isa(N)) diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 286d3ca3e2cc0..dc28b681a1515 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -712,11 +712,11 @@ handleNonPrevailingComdat(GlobalValue &GV, if (!NonPrevailingComdats.count(C)) return; - // Additionally need to drop externally visible global values from the comdat - // to available_externally, so that there aren't multiply defined linker - // errors. - if (!GV.hasLocalLinkage()) - GV.setLinkage(GlobalValue::AvailableExternallyLinkage); + // Additionally need to drop all global values from the comdat to + // available_externally, to satisfy the COMDAT requirement that all members + // are discarded as a unit. The non-local linkage global values avoid + // duplicate definition linker errors. + GV.setLinkage(GlobalValue::AvailableExternallyLinkage); if (auto GO = dyn_cast(&GV)) GO->setComdat(nullptr); diff --git a/llvm/lib/Object/OffloadBinary.cpp b/llvm/lib/Object/OffloadBinary.cpp index 8f62d692d050a..d8cdcdc21d39c 100644 --- a/llvm/lib/Object/OffloadBinary.cpp +++ b/llvm/lib/Object/OffloadBinary.cpp @@ -17,6 +17,7 @@ #include "llvm/Object/Archive.h" #include "llvm/Object/ArchiveWriter.h" #include "llvm/Object/Binary.h" +#include "llvm/Object/COFF.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/Error.h" #include "llvm/Object/IRObjectFile.h" @@ -41,6 +42,10 @@ Error extractOffloadFiles(MemoryBufferRef Contents, std::unique_ptr Buffer = MemoryBuffer::getMemBuffer(Contents.getBuffer().drop_front(Offset), "", /*RequiresNullTerminator*/ false); + if (!isAddrAligned(Align(OffloadBinary::getAlignment()), + Buffer->getBufferStart())) + Buffer = MemoryBuffer::getMemBufferCopy(Buffer->getBuffer(), + Buffer->getBufferIdentifier()); auto BinaryOrErr = OffloadBinary::create(*Buffer); if (!BinaryOrErr) return BinaryOrErr.takeError(); @@ -62,12 +67,26 @@ Error extractOffloadFiles(MemoryBufferRef Contents, } // Extract offloading binaries from an Object file \p Obj. -Error extractFromBinary(const ObjectFile &Obj, +Error extractFromObject(const ObjectFile &Obj, SmallVectorImpl &Binaries) { - for (ELFSectionRef Sec : Obj.sections()) { - if (Sec.getType() != ELF::SHT_LLVM_OFFLOADING) + assert((Obj.isELF() || Obj.isCOFF()) && "Invalid file type"); + + for (SectionRef Sec : Obj.sections()) { + // ELF files contain a section with the LLVM_OFFLOADING type. + if (Obj.isELF() && + static_cast(Sec).getType() != ELF::SHT_LLVM_OFFLOADING) continue; + // COFF has no section types so we rely on the name of the section. + if (Obj.isCOFF()) { + Expected NameOrErr = Sec.getName(); + if (!NameOrErr) + return NameOrErr.takeError(); + + if (!NameOrErr->equals(".llvm.offloading")) + continue; + } + Expected Buffer = Sec.getContents(); if (!Buffer) return Buffer.takeError(); @@ -254,12 +273,15 @@ Error object::extractOffloadBinaries(MemoryBufferRef Buffer, switch (Type) { case file_magic::bitcode: return extractFromBitcode(Buffer, Binaries); - case file_magic::elf_relocatable: { + case file_magic::elf_relocatable: + case file_magic::elf_executable: + case file_magic::elf_shared_object: + case file_magic::coff_object: { Expected> ObjFile = ObjectFile::createObjectFile(Buffer, Type); if (!ObjFile) return ObjFile.takeError(); - return extractFromBinary(*ObjFile->get(), Binaries); + return extractFromObject(*ObjFile->get(), Binaries); } case file_magic::archive: { Expected> LibFile = diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index df32ce6ef6307..7ec8411f597ff 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -200,6 +200,7 @@ PipelineTuningOptions::PipelineTuningOptions() { LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap; CallGraphProfile = true; MergeFunctions = EnableMergeFunctions; + InlinerThreshold = -1; EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses; } @@ -741,7 +742,11 @@ static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level) { ModuleInlinerWrapperPass PassBuilder::buildInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase) { - InlineParams IP = getInlineParamsFromOptLevel(Level); + InlineParams IP; + if (PTO.InlinerThreshold == -1) + IP = getInlineParamsFromOptLevel(Level); + else + IP = getInlineParams(PTO.InlinerThreshold); // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to // disable hot callsite inline (as much as possible [1]) because it makes // profile annotation in the backend inaccurate. diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index cd4e8900c9637..de632695ca499 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -530,13 +530,10 @@ Error InstrProfWriter::validateRecord(const InstrProfRecord &Func) { for (uint32_t S = 0; S < NS; S++) { uint32_t ND = Func.getNumValueDataForSite(VK, S); std::unique_ptr VD = Func.getValueForSite(VK, S); - bool WasZero = false; + DenseSet SeenValues; for (uint32_t I = 0; I < ND; I++) - if ((VK != IPVK_IndirectCallTarget) && (VD[I].Value == 0)) { - if (WasZero) - return make_error(instrprof_error::invalid_prof); - WasZero = true; - } + if ((VK != IPVK_IndirectCallTarget) && !SeenValues.insert(VD[I].Value).second) + return make_error(instrprof_error::invalid_prof); } } diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt index 01ab58e942f9a..c63ff43427162 100644 --- a/llvm/lib/Support/CMakeLists.txt +++ b/llvm/lib/Support/CMakeLists.txt @@ -59,9 +59,7 @@ elseif( CMAKE_HOST_UNIX ) if( LLVM_ENABLE_TERMINFO ) set(imported_libs ${imported_libs} Terminfo::terminfo) endif() - if( LLVM_ENABLE_THREADS AND (HAVE_LIBATOMIC OR HAVE_CXX_LIBATOMICS64) ) - set(system_libs ${system_libs} atomic) - endif() + set(system_libs ${system_libs} ${LLVM_ATOMIC_LIB}) set(system_libs ${system_libs} ${LLVM_PTHREAD_LIB}) if( UNIX AND NOT (BEOS OR HAIKU) ) set(system_libs ${system_libs} m) @@ -172,6 +170,7 @@ add_llvm_component_library(LLVMSupport FileUtilities.cpp FileOutputBuffer.cpp FoldingSet.cpp + Format.cpp FormattedStream.cpp FormatVariadic.cpp GlobPattern.cpp diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp index 136b813b1f6c8..fbaacbbbcf8a0 100644 --- a/llvm/lib/Support/CommandLine.cpp +++ b/llvm/lib/Support/CommandLine.cpp @@ -1158,9 +1158,11 @@ Error ExpansionContext::expandResponseFile( assert(sys::path::is_absolute(FName)); llvm::ErrorOr> MemBufOrErr = FS->getBufferForFile(FName); - if (!MemBufOrErr) - return llvm::createStringError( - MemBufOrErr.getError(), Twine("cannot not open file '") + FName + "'"); + if (!MemBufOrErr) { + std::error_code EC = MemBufOrErr.getError(); + return llvm::createStringError(EC, Twine("cannot not open file '") + FName + + "': " + EC.message()); + } MemoryBuffer &MemBuf = *MemBufOrErr.get(); StringRef Str(MemBuf.getBufferStart(), MemBuf.getBufferSize()); @@ -1262,7 +1264,7 @@ Error ExpansionContext::expandResponseFiles( if (auto CWD = FS->getCurrentWorkingDirectory()) { CurrDir = *CWD; } else { - return make_error( + return createStringError( CWD.getError(), Twine("cannot get absolute path for: ") + FName); } } else { @@ -1271,49 +1273,48 @@ Error ExpansionContext::expandResponseFiles( llvm::sys::path::append(CurrDir, FName); FName = CurrDir.c_str(); } + + ErrorOr Res = FS->status(FName); + if (!Res || !Res->exists()) { + std::error_code EC = Res.getError(); + if (!InConfigFile) { + // If the specified file does not exist, leave '@file' unexpanded, as + // libiberty does. + if (!EC || EC == llvm::errc::no_such_file_or_directory) { + ++I; + continue; + } + } + if (!EC) + EC = llvm::errc::no_such_file_or_directory; + return createStringError(EC, Twine("cannot not open file '") + FName + + "': " + EC.message()); + } + const llvm::vfs::Status &FileStatus = Res.get(); + auto IsEquivalent = - [FName, this](const ResponseFileRecord &RFile) -> ErrorOr { - ErrorOr LHS = FS->status(FName); - if (!LHS) - return LHS.getError(); + [FileStatus, this](const ResponseFileRecord &RFile) -> ErrorOr { ErrorOr RHS = FS->status(RFile.File); if (!RHS) return RHS.getError(); - return LHS->equivalent(*RHS); + return FileStatus.equivalent(*RHS); }; // Check for recursive response files. for (const auto &F : drop_begin(FileStack)) { if (ErrorOr R = IsEquivalent(F)) { if (R.get()) - return make_error( - Twine("recursive expansion of: '") + F.File + "'", R.getError()); + return createStringError( + R.getError(), Twine("recursive expansion of: '") + F.File + "'"); } else { - return make_error(Twine("cannot open file: ") + F.File, - R.getError()); + return createStringError(R.getError(), + Twine("cannot open file: ") + F.File); } } // Replace this response file argument with the tokenization of its // contents. Nested response files are expanded in subsequent iterations. SmallVector ExpandedArgv; - if (!InConfigFile) { - // If the specified file does not exist, leave '@file' unexpanded, as - // libiberty does. - ErrorOr Res = FS->status(FName); - if (!Res) { - std::error_code EC = Res.getError(); - if (EC == llvm::errc::no_such_file_or_directory) { - ++I; - continue; - } - } else { - if (!Res->exists()) { - ++I; - continue; - } - } - } if (Error Err = expandResponseFile(FName, ExpandedArgv)) return Err; diff --git a/llvm/lib/Support/Format.cpp b/llvm/lib/Support/Format.cpp new file mode 100644 index 0000000000000..45b279915afac --- /dev/null +++ b/llvm/lib/Support/Format.cpp @@ -0,0 +1,370 @@ +//===- Format.cpp - Efficient printf-style formatting for streams -*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the non-template part of Format.h, which is used to +// provide a type-safe-ish interface to printf-style formatting. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Format.h" + +namespace { +/// Enum representation of a printf-style length specifier. +enum ArgLength : char { + /// Corresponds to 'hh' length specifier. + AL_ShortShort, + /// Corresponds to 'h' length specifier. + AL_Short, + /// Corresponds to default length specifier. + AL_Default, + /// Corresponds to 'l' length specifier. + AL_Long, + /// Corresponds to 'll' length specifier. + AL_LongLong, + /// Corresponds to 'j' length specifier. + AL_IntMax, + /// Corresponds to 'z' length specifier. + AL_Size, + /// Corresponds to 't' length specifier. + AL_Ptrdiff, + /// Corresponds to 'L' length specifier. + AL_LongDouble, + /// First invalid value of \p ArgLength. + AL_End, +}; + +/// Enum representation of a printf-style specifier. +enum SpecifierChar : char { + /// Corresponds to any of 'd', 'i', 'u', 'o', 'x' or 'X' specifiers. + SC_Int, + /// Corresponds to any of 'f', 'F', 'e', 'E', 'g', 'G', 'a' or 'A' specifiers. + SC_Float, + /// Corresponds to 'c' specifier. + SC_Char, + /// Corresponds to 's' specifier. + SC_String, + /// Corresponds to 'p' specifier. + SC_VoidPointer, + /// Corresponds to 'n' specifier. + SC_Count, + /// First invalid value of \p SpecifierChar. + SC_End, +}; + +constexpr uint64_t specifierBit(char C) { + // specifierMask builds a bit map where each set bit indicates that the + // character whose ASCII value is 64 + would be legal to use + // as a format specifier in the current parsing context. + // To cover all ASCII characters, we would need 128 bits; however, the only + // character with an ASCII value less than 64 that can be used as a specifier + // is % (as in %%), so we save some space and complexity by dropping the + // lower half of the bit map, which is going to be all zeroes anyway. + // % is handled as a special case. + return (uint64_t)1 << (C - 64); +} + +template +constexpr /* consteval */ uint64_t specifierMask(const char (&Specifiers)[N]) { + uint64_t Mask = 0; + for (const char *I = std::begin(Specifiers); I != std::end(Specifiers); ++I) { + if (*I == 0) + break; + Mask |= specifierBit(*I); + } + return Mask; +} + +constexpr auto ST_Unknown = llvm::PrintfStyleFormatReader::ST_Unknown; +constexpr auto ST_WideChar = llvm::PrintfStyleFormatReader::ST_WideChar; +constexpr auto ST_Int = llvm::PrintfStyleFormatReader::ST_Int; +constexpr auto ST_Long = llvm::PrintfStyleFormatReader::ST_Long; +constexpr auto ST_LongLong = llvm::PrintfStyleFormatReader::ST_LongLong; +constexpr auto ST_IntMax = llvm::PrintfStyleFormatReader::ST_IntMax; +constexpr auto ST_Size = llvm::PrintfStyleFormatReader::ST_Size; +constexpr auto ST_Ptrdiff = llvm::PrintfStyleFormatReader::ST_Ptrdiff; +constexpr auto ST_Double = llvm::PrintfStyleFormatReader::ST_Double; +constexpr auto ST_LongDouble = llvm::PrintfStyleFormatReader::ST_LongDouble; +constexpr auto ST_CString = llvm::PrintfStyleFormatReader::ST_CString; +constexpr auto ST_WideCString = llvm::PrintfStyleFormatReader::ST_WideCString; +constexpr auto ST_VoidPointer = llvm::PrintfStyleFormatReader::ST_VoidPointer; +constexpr auto ST_Count_Char = llvm::PrintfStyleFormatReader::ST_Count_Char; +constexpr auto ST_Count_Short = llvm::PrintfStyleFormatReader::ST_Count_Short; +constexpr auto ST_Count_Int = llvm::PrintfStyleFormatReader::ST_Count_Int; +constexpr auto ST_Count_Long = llvm::PrintfStyleFormatReader::ST_Count_Long; +constexpr auto ST_Count_LongLong = + llvm::PrintfStyleFormatReader::ST_Count_LongLong; +constexpr auto ST_Count_IntMax = llvm::PrintfStyleFormatReader::ST_Count_IntMax; +constexpr auto ST_Count_Size = llvm::PrintfStyleFormatReader::ST_Count_Size; +constexpr auto ST_Count_Ptrdiff = + llvm::PrintfStyleFormatReader::ST_Count_Ptrdiff; + +llvm::PrintfStyleFormatReader::SpecifierType SpecifierTable[SC_End][AL_End] = { + { + // SC_Int + ST_Int, + ST_Int, + ST_Int, + ST_Long, + ST_LongLong, + ST_IntMax, + ST_Size, + ST_Ptrdiff, + ST_Unknown, + }, + { + // SC_Float + ST_Unknown, + ST_Unknown, + ST_Double, + ST_Double, + ST_Unknown, + ST_Unknown, + ST_Unknown, + ST_Unknown, + ST_LongDouble, + }, + { + // SC_Char + ST_Unknown, + ST_Unknown, + ST_Int, + ST_WideChar, + ST_Unknown, + ST_Unknown, + ST_Unknown, + ST_Unknown, + ST_Unknown, + }, + { + // SC_String + ST_Unknown, + ST_Unknown, + ST_CString, + ST_WideCString, + ST_Unknown, + ST_Unknown, + ST_Unknown, + ST_Unknown, + ST_Unknown, + }, + { + // SC_VoidPointer + ST_Unknown, + ST_Unknown, + ST_VoidPointer, + ST_Unknown, + ST_Unknown, + ST_Unknown, + ST_Unknown, + ST_Unknown, + ST_Unknown, + }, + { + // SC_Count + ST_Count_Char, + ST_Count_Short, + ST_Count_Int, + ST_Count_Long, + ST_Count_LongLong, + ST_Count_IntMax, + ST_Count_Size, + ST_Count_Ptrdiff, + ST_Unknown, + }, +}; +} // namespace + +namespace llvm { + +void PrintfStyleFormatReader::refillSpecifierQueue() { + if (auto PercentPtr = strchr(Fmt, '%')) { + Fmt = PercentPtr; + } else { + SpecifierQueue.push_back(ST_EndOfFormatString); + return; + } + + if (*++Fmt == '%') { + // %% case: skip and try again + ++Fmt; + refillSpecifierQueue(); + return; + } + + // Push ST_Unknown to SpecifierQueue. If we bail out early, this is what + // the caller gets. Fill in real specifiers to Specifiers: if we + // successfully get to the end, then swap Specifiers with SpecifierQueue. + SpecifierQueue.push_back(ST_Unknown); + llvm::SmallVector Specifiers; + + // Bitfield keeping track of which specifier characters are allowed given + // flags and precision settings. Each bit tells whether ascii character + // 0x40 + is allowed as a specifier. '%', which has an ASCII value + // less than 0x40 and does not allow any customization, is handled by a check + // above. The starting value contains all standard specifiers. + uint64_t ValidSpecifiers = specifierMask("diuoxXfFeEgGaAcspn"); + + // update specifier mask based on flags + bool ReadAllFlags = false; + while (!ReadAllFlags) { + switch (*Fmt) { + case '+': + case '-': + case ' ': + // valid for all specifiers + ++Fmt; + break; + case '#': + ValidSpecifiers &= specifierMask("xXaAeEfFgG"); + ++Fmt; + break; + case '0': + ValidSpecifiers &= specifierMask("diouxXaAeEfFgG"); + ++Fmt; + break; + default: + ReadAllFlags = true; + break; + } + } + + // skip width + if (*Fmt == '*') { + Specifiers.push_back(ST_Int); + ++Fmt; + } else + while (*Fmt >= '0' && *Fmt <= '9') + ++Fmt; + + // test precision + if (*Fmt == '.') { + ValidSpecifiers &= specifierMask("diouxXaAeEfFgGs"); + ++Fmt; + if (*Fmt == '*') { + Specifiers.push_back(ST_Int); + ++Fmt; + } else + while (*Fmt >= '0' && *Fmt <= '9') + ++Fmt; + } + + // parse length + bool FoundLength = false; + ArgLength AL = AL_Default; + while (!FoundLength) { + ArgLength NewAL; + switch (*Fmt) { + case 'h': + NewAL = AL_Short; + break; + case 'l': + NewAL = AL_Long; + break; + case 'j': + NewAL = AL_IntMax; + break; + case 'z': + NewAL = AL_Size; + break; + case 't': + NewAL = AL_Ptrdiff; + break; + case 'L': + NewAL = AL_LongDouble; + break; + default: + FoundLength = true; + continue; + } + + if (NewAL == AL_Long && AL == AL_Long) + AL = AL_LongLong; + else if (NewAL == AL_Short && AL == AL_Short) + AL = AL_ShortShort; + else if (AL == AL_Default) + AL = NewAL; + else + return; + ++Fmt; + } + + // parse specifier; verify that the character is a valid specifier given + // restrictions imposed by by the use of flags and precision values + char Next = *Fmt; + if (Next == 0) + return; + + ++Fmt; + if (Next < 0x40 || (specifierBit(Next) & ValidSpecifiers) == 0) + return; + + SpecifierChar SC; + switch (Next) { + case 'd': + case 'i': + case 'u': + case 'o': + case 'x': + case 'X': + SC = SC_Int; + break; + + case 'a': + case 'A': + case 'e': + case 'E': + case 'f': + case 'F': + case 'g': + case 'G': + SC = SC_Float; + break; + + case 'c': + SC = SC_Char; + break; + + case 's': + SC = SC_String; + break; + + case 'p': + SC = SC_VoidPointer; + break; + + case 'n': + SC = SC_Count; + break; + + default: + return; + } + + auto Spec = SpecifierTable[SC][AL]; + if (Spec == ST_Unknown) + return; + + Specifiers.push_back(Spec); + std::reverse(Specifiers.begin(), Specifiers.end()); + std::swap(Specifiers, SpecifierQueue); +} + +const char *PrintfStyleFormatReader::ensureCompatible(const char *Expected, + const char *Fmt) { + PrintfStyleFormatReader EFR(Expected); + PrintfStyleFormatReader FFR(Fmt); + SpecifierType EST; + do { + EST = EFR.nextSpecifier(); + if (EST != FFR.nextSpecifier()) + return Expected; + } while (EST); + return Fmt; +} + +} // namespace llvm diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp index 70cae1e221b2b..732aa83090439 100644 --- a/llvm/lib/Support/Host.cpp +++ b/llvm/lib/Support/Host.cpp @@ -213,6 +213,7 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { .Case("0xd0b", "cortex-a76") .Case("0xd0d", "cortex-a77") .Case("0xd41", "cortex-a78") + .Case("0xd4d", "cortex-a715") .Case("0xd44", "cortex-x1") .Case("0xd4c", "cortex-x1c") .Case("0xd0c", "neoverse-n1") @@ -815,6 +816,12 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, // Alderlake: case 0x97: case 0x9a: + // Raptorlake: + case 0xb7: + // Meteorlake: + case 0xb5: + case 0xaa: + case 0xac: CPU = "alderlake"; *Type = X86::INTEL_COREI7; *Subtype = X86::INTEL_COREI7_ALDERLAKE; @@ -1244,7 +1251,7 @@ StringRef sys::getHostCPUName() { return "generic"; } -#elif defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__)) +#elif defined(__APPLE__) && defined(__powerpc__) StringRef sys::getHostCPUName() { host_basic_info_data_t hostInfo; mach_msg_type_number_t infoCount; @@ -1288,7 +1295,7 @@ StringRef sys::getHostCPUName() { return "generic"; } -#elif defined(__linux__) && (defined(__ppc__) || defined(__powerpc__)) +#elif defined(__linux__) && defined(__powerpc__) StringRef sys::getHostCPUName() { std::unique_ptr P = getProcCpuinfoContent(); StringRef Content = P ? P->getBuffer() : ""; diff --git a/llvm/lib/Support/Unix/Memory.inc b/llvm/lib/Support/Unix/Memory.inc index 5e008069dd989..e4454fe8c517d 100644 --- a/llvm/lib/Support/Unix/Memory.inc +++ b/llvm/lib/Support/Unix/Memory.inc @@ -50,8 +50,7 @@ static int getPosixProtectionFlags(unsigned Flags) { llvm::sys::Memory::MF_EXEC: return PROT_READ | PROT_WRITE | PROT_EXEC; case llvm::sys::Memory::MF_EXEC: -#if (defined(__FreeBSD__) || defined(__POWERPC__) || defined (__ppc__) || \ - defined(_POWER) || defined(_ARCH_PPC)) +#if defined(__FreeBSD__) || defined(__powerpc__) // On PowerPC, having an executable page that has no read permission // can have unintended consequences. The function InvalidateInstruction- // Cache uses instructions dcbf and icbi, both of which are treated by @@ -213,9 +212,7 @@ void Memory::InvalidateInstructionCache(const void *Addr, // icache invalidation for PPC and ARM. #if defined(__APPLE__) -# if (defined(__POWERPC__) || defined (__ppc__) || \ - defined(_POWER) || defined(_ARCH_PPC) || defined(__arm__) || \ - defined(__arm64__)) +# if (defined(__powerpc__) || defined(__arm__) || defined(__arm64__)) sys_icache_invalidate(const_cast(Addr), Len); # endif @@ -226,8 +223,7 @@ void Memory::InvalidateInstructionCache(const void *Addr, #else -# if (defined(__POWERPC__) || defined (__ppc__) || \ - defined(_POWER) || defined(_ARCH_PPC)) && defined(__GNUC__) +# if defined(__powerpc__) && defined(__GNUC__) const size_t LineSize = 32; const intptr_t Mask = ~(LineSize - 1); diff --git a/llvm/lib/Support/X86TargetParser.cpp b/llvm/lib/Support/X86TargetParser.cpp index 20bcfb3b9094a..7fcbb2108eea5 100644 --- a/llvm/lib/Support/X86TargetParser.cpp +++ b/llvm/lib/Support/X86TargetParser.cpp @@ -370,6 +370,10 @@ constexpr ProcInfo Processors[] = { { {"sapphirerapids"}, CK_SapphireRapids, FEATURE_AVX512BF16, FeaturesSapphireRapids }, // Alderlake microarchitecture based processors. { {"alderlake"}, CK_Alderlake, FEATURE_AVX2, FeaturesAlderlake }, + // Raptorlake microarchitecture based processors. + { {"raptorlake"}, CK_Raptorlake, FEATURE_AVX2, FeaturesAlderlake }, + // Meteorlake microarchitecture based processors. + { {"meteorlake"}, CK_Meteorlake, FEATURE_AVX2, FeaturesAlderlake }, // Knights Landing processor. { {"knl"}, CK_KNL, FEATURE_AVX512F, FeaturesKNL }, // Knights Mill processor. diff --git a/llvm/lib/TableGen/SetTheory.cpp b/llvm/lib/TableGen/SetTheory.cpp index 3db46aae6d967..34fdd35269164 100644 --- a/llvm/lib/TableGen/SetTheory.cpp +++ b/llvm/lib/TableGen/SetTheory.cpp @@ -210,21 +210,26 @@ struct SequenceOp : public SetTheory::Operator { PrintFatalError(Loc, "To out of range"); RecordKeeper &Records = - cast(Expr->getOperator())->getDef()->getRecords(); + cast(Expr->getOperator())->getDef()->getRecords(); Step *= From <= To ? 1 : -1; + const char FallbackFmt[] = "%u"; while (true) { if (Step > 0 && From > To) break; else if (Step < 0 && From < To) break; + const char *const VerifiedFmt = PrintfStyleFormatReader::ensureCompatible( + FallbackFmt, Format.c_str()); + if (VerifiedFmt == FallbackFmt) + PrintFatalError(Loc, "Format string '" + Format + + "' is incompatible with '%u'!"); std::string Name; - raw_string_ostream OS(Name); - OS << format(Format.c_str(), unsigned(From)); - Record *Rec = Records.getDef(OS.str()); + raw_string_ostream(Name) << format(VerifiedFmt, unsigned(From)); + Record *Rec = Records.getDef(Name); if (!Rec) - PrintFatalError(Loc, "No def named '" + Name + "': " + - Expr->getAsString()); + PrintFatalError(Loc, + "No def named '" + Name + "': " + Expr->getAsString()); // Try to reevaluate Rec in case it is a set. if (const RecVec *Result = ST.expand(Rec)) Elts.insert(Result->begin(), Result->end()); diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index 03759f5911a3a..f28f460ea4e72 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -165,6 +165,9 @@ def FeatureSVE2BitPerm : SubtargetFeature<"sve2-bitperm", "HasSVE2BitPerm", "tru def FeatureSVE2p1: SubtargetFeature<"sve2p1", "HasSVE2p1", "true", "Enable Scalable Vector Extension 2.1 instructions", [FeatureSVE2]>; +def FeatureB16B16 : SubtargetFeature<"b16b16", "HasB16B16", "true", + "Enable SVE2.1 or SME2.1 non-widening BFloat16 to BFloat16 instructions (FEAT_B16B16)", []>; + def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true", "Has zero-cycle register moves">; @@ -479,6 +482,9 @@ def FeatureSMEI16I64 : SubtargetFeature<"sme-i16i64", "HasSMEI16I64", "true", def FeatureSME2 : SubtargetFeature<"sme2", "HasSME2", "true", "Enable Scalable Matrix Extension 2 (SME2) instructions", [FeatureSME]>; +def FeatureSME2p1 : SubtargetFeature<"sme2p1", "HasSME2p1", "true", + "Enable Scalable Matrix Extension 2.1 (FEAT_SME2p1) instructions", [FeatureSME2]>; + def FeatureAppleA7SysReg : SubtargetFeature<"apple-a7-sysreg", "HasAppleA7SysReg", "true", "Apple A7 (the CPU formerly known as Cyclone)">; @@ -640,7 +646,7 @@ class AArch64Unsupported { list F; } def SVEUnsupported : AArch64Unsupported { let F = [HasSVE, HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, - HasSVE2BitPerm, HasSVEorSME, HasSVE2orSME]; + HasSVE2BitPerm, HasSVEorSME, HasSVE2p1, HasSVE2orSME, HasSVE2p1_or_HasSME2p1]; } def PAUnsupported : AArch64Unsupported { @@ -648,7 +654,7 @@ def PAUnsupported : AArch64Unsupported { } def SMEUnsupported : AArch64Unsupported { - let F = [HasSME, HasSMEF64F64, HasSMEI16I64, HasSME2, HasSVE2p1_or_HasSME2]; + let F = [HasSME, HasSMEF64F64, HasSMEI16I64, HasSME2, HasSVE2p1_or_HasSME2, HasSVE2p1_or_HasSME2p1]; } include "AArch64SchedA53.td" @@ -764,6 +770,14 @@ def TuneA710 : SubtargetFeature<"a710", "ARMProcFamily", "CortexA710", FeatureLSLFast, FeaturePostRAScheduler]>; +def TuneA715 : SubtargetFeature<"a715", "ARMProcFamily", "CortexA715", + "Cortex-A715 ARM processors", [ + FeatureFuseAES, + FeaturePostRAScheduler, + FeatureCmpBccFusion, + FeatureLSLFast, + FeatureFuseAdrpAdd]>; + def TuneR82 : SubtargetFeature<"cortex-r82", "ARMProcFamily", "CortexR82", "Cortex-R82 ARM processors", [ @@ -1093,6 +1107,10 @@ def ProcessorFeatures { list A710 = [HasV9_0aOps, FeatureNEON, FeaturePerfMon, FeatureETE, FeatureMTE, FeatureFP16FML, FeatureSVE2BitPerm, FeatureBF16, FeatureMatMulInt8]; + list A715 = [HasV9_0aOps, FeatureNEON, FeatureMTE, + FeatureFP16FML, FeatureSVE, FeatureTRBE, + FeatureSVE2BitPerm, FeatureBF16, FeatureETE, + FeaturePerfMon, FeatureMatMulInt8, FeatureSPE]; list R82 = [HasV8_0rOps, FeaturePerfMon, FeatureFullFP16, FeatureFP16FML, FeatureSSBS, FeaturePredRes, FeatureSB]; @@ -1231,6 +1249,8 @@ def : ProcessorModel<"cortex-a78c", CortexA57Model, ProcessorFeatures.A78C, [TuneA78C]>; def : ProcessorModel<"cortex-a710", NeoverseN2Model, ProcessorFeatures.A710, [TuneA710]>; +def : ProcessorModel<"cortex-a715", NeoverseN2Model, ProcessorFeatures.A715, + [TuneA715]>; def : ProcessorModel<"cortex-r82", CortexA55Model, ProcessorFeatures.R82, [TuneR82]>; def : ProcessorModel<"cortex-x1", CortexA57Model, ProcessorFeatures.X1, diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 844f9c0c7159a..de44144dc25bc 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -2803,6 +2803,122 @@ static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) { return true; } +static bool isWorthFoldingIntoOrrWithLeftShift(SDValue Dst, + SelectionDAG *CurDAG, + SDValue &LeftShiftedOperand, + uint64_t &LeftShiftAmount) { + // Avoid folding Dst into ORR-with-left-shift if Dst has other uses than ORR. + if (!Dst.hasOneUse()) + return false; + + EVT VT = Dst.getValueType(); + assert((VT == MVT::i32 || VT == MVT::i64) && + "Caller should guarantee that VT is one of i32 or i64"); + const unsigned SizeInBits = VT.getSizeInBits(); + + SDLoc DL(Dst.getNode()); + uint64_t AndImm, ShlImm; + if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) && + isShiftedMask_64(AndImm)) { + // Avoid transforming 'DstOp0' if it has other uses than the AND node. + SDValue DstOp0 = Dst.getOperand(0); + if (!DstOp0.hasOneUse()) + return false; + + // An example to illustrate the transformation + // From: + // lsr x8, x1, #1 + // and x8, x8, #0x3f80 + // bfxil x8, x1, #0, #7 + // To: + // and x8, x23, #0x7f + // ubfx x9, x23, #8, #7 + // orr x23, x8, x9, lsl #7 + // + // The number of instructions remains the same, but ORR is faster than BFXIL + // on many AArch64 processors (or as good as BFXIL if not faster). Besides, + // the dependency chain is improved after the transformation. + uint64_t SrlImm; + if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) { + uint64_t NumTrailingZeroInShiftedMask = countTrailingZeros(AndImm); + if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) { + unsigned MaskWidth = + countTrailingOnes(AndImm >> NumTrailingZeroInShiftedMask); + unsigned UBFMOpc = + (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; + SDNode *UBFMNode = CurDAG->getMachineNode( + UBFMOpc, DL, VT, DstOp0.getOperand(0), + CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL, + VT), + CurDAG->getTargetConstant( + SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT)); + LeftShiftedOperand = SDValue(UBFMNode, 0); + LeftShiftAmount = NumTrailingZeroInShiftedMask; + return true; + } + } + } else if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) { + LeftShiftedOperand = Dst.getOperand(0); + LeftShiftAmount = ShlImm; + return true; + } + // FIXME: Extend the implementation to optimize if Dst is an SRL node. + return false; +} + +static bool tryOrrWithLeftShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, + SDValue Src, SDValue Dst, SelectionDAG *CurDAG, + const bool BiggerPattern) { + EVT VT = N->getValueType(0); + assert((VT == MVT::i32 || VT == MVT::i64) && + "Expect result type to be i32 or i64 since N is combinable to BFM"); + SDLoc DL(N); + + // Bail out if BFM simplifies away one node in BFM Dst. + if (OrOpd1 != Dst) + return false; + + // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer + // nodes from Rn (or inserts additional shift node) if BiggerPattern is true. + if (BiggerPattern) { + uint64_t SrcAndImm; + if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) && + isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) { + // OrOpd0 = AND Src, #Mask + // So BFM simplifies away one AND node from Src and doesn't simplify away + // nodes from Dst. If ORR with left-shifted operand also simplifies away + // one node (from Rd), ORR is better since it has higher throughput and + // smaller latency than BFM on many AArch64 processors (and for the rest + // ORR is at least as good as BFM). + SDValue LeftShiftedOperand; + uint64_t LeftShiftAmount; + if (isWorthFoldingIntoOrrWithLeftShift(Dst, CurDAG, LeftShiftedOperand, + LeftShiftAmount)) { + unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs; + SDValue Ops[] = {OrOpd0, LeftShiftedOperand, + CurDAG->getTargetConstant(LeftShiftAmount, DL, VT)}; + CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops); + return true; + } + } + return false; + } + + assert((!BiggerPattern) && "BiggerPattern should be handled above"); + + uint64_t ShlImm; + // FIXME: Extend the implementation if OrOpd0 is an SRL node. + if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm) && + OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) { + unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs; + SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ShlImm, DL, VT)}; + CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops); + return true; + } + + return false; +} + static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG) { assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); @@ -2905,6 +3021,14 @@ static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, // or is useful because it discards more bits Dst = OrOpd1Val; + // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR + // with left-shifted operand is more efficient. + // FIXME: Extend this to compare AArch64::BFM and AArch64::ORR with + // right-shifted operand as well. + if (tryOrrWithLeftShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG, + BiggerPattern)) + return true; + // both parts match SDLoc DL(N); SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT), diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 17c489b2fb5ad..fce26dd6c21f5 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -14454,7 +14454,7 @@ AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N, if (auto *SRLC = dyn_cast(AndLHS.getOperand(1))) { if (N->getOpcode() == ISD::SHL) if (auto *SHLC = dyn_cast(N->getOperand(1))) - return SRLC->getAPIntValue() == SHLC->getAPIntValue(); + return SRLC->getZExtValue() == SHLC->getZExtValue(); return false; } } diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 19ff399d5ed4a..b7c1cb6a47831 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -1408,6 +1408,18 @@ def sme_elm_idx0_15 : Operand, TImmLeaf; + +def uimm3s8 : Operand, ImmLeaf= 0 && Imm <= 56 && ((Imm % 8) == 0); }], UImmS8XForm> { + let PrintMethod = "printVectorIndex<8>"; + let ParserMatchClass = UImm3s8Operand; +} + class UImmScaledMemoryIndexedRange : AsmOperandClass { let Name = "UImm" # Width # "s" # Scale # "Range"; let DiagnosticType = "InvalidMemoryIndexedRange" # Scale # "UImm" # Width; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 137e8ac917f04..b3db70967f14e 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -138,6 +138,8 @@ def HasSVE2SHA3 : Predicate<"Subtarget->hasSVE2SHA3()">, AssemblerPredicateWithAll<(all_of FeatureSVE2SHA3), "sve2-sha3">; def HasSVE2BitPerm : Predicate<"Subtarget->hasSVE2BitPerm()">, AssemblerPredicateWithAll<(all_of FeatureSVE2BitPerm), "sve2-bitperm">; +def HasB16B16 : Predicate<"Subtarget->hasB16B16()">, + AssemblerPredicateWithAll<(all_of FeatureB16B16), "b16b16">; def HasSME : Predicate<"Subtarget->hasSME()">, AssemblerPredicateWithAll<(all_of FeatureSME), "sme">; def HasSMEF64F64 : Predicate<"Subtarget->hasSMEF64F64()">, @@ -146,6 +148,8 @@ def HasSMEI16I64 : Predicate<"Subtarget->hasSMEI16I64()">, AssemblerPredicateWithAll<(all_of FeatureSMEI16I64), "sme-i16i64">; def HasSME2 : Predicate<"Subtarget->hasSME2()">, AssemblerPredicateWithAll<(all_of FeatureSME2), "sme2">; +def HasSME2p1 : Predicate<"Subtarget->hasSME2p1()">, + AssemblerPredicateWithAll<(all_of FeatureSME2p1), "sme2p1">; // A subset of SVE(2) instructions are legal in Streaming SVE execution mode, // they should be enabled if either has been specified. def HasSVEorSME @@ -158,10 +162,13 @@ def HasSVE2orSME "sve2 or sme">; def HasSVE2p1_or_HasSME : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME()">, - AssemblerPredicate<(any_of FeatureSME, FeatureSVE2p1), "sme or sve2p1">; + AssemblerPredicateWithAll<(any_of FeatureSME, FeatureSVE2p1), "sme or sve2p1">; def HasSVE2p1_or_HasSME2 : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME2()">, - AssemblerPredicate<(any_of FeatureSME2, FeatureSVE2p1), "sme2 or sve2p1">; + AssemblerPredicateWithAll<(any_of FeatureSME2, FeatureSVE2p1), "sme2 or sve2p1">; +def HasSVE2p1_or_HasSME2p1 + : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME2p1()">, + AssemblerPredicateWithAll<(any_of FeatureSME2p1, FeatureSVE2p1), "sme2p1 or sve2p1">; // A subset of NEON instructions are legal in Streaming SVE execution mode, // they should be enabled if either has been specified. def HasNEONorSME diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td index 8a41cd5c4d49b..7b100e9026d06 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td @@ -1233,6 +1233,10 @@ let EncoderMethod = "EncodeRegAsMultipleOf<2>", def ZZ_d_mul_r : RegisterOperand"> { let ParserMatchClass = ZPRVectorListMul<64, 2>; } + + def ZZ_q_mul_r : RegisterOperand"> { + let ParserMatchClass = ZPRVectorListMul<128, 2>; + } } // end let EncoderMethod/DecoderMethod let EncoderMethod = "EncodeRegAsMultipleOf<4>", @@ -1252,6 +1256,10 @@ let EncoderMethod = "EncodeRegAsMultipleOf<4>", def ZZZZ_d_mul_r : RegisterOperand"> { let ParserMatchClass = ZPRVectorListMul<64, 4>; } + + def ZZZZ_q_mul_r : RegisterOperand"> { + let ParserMatchClass = ZPRVectorListMul<128, 4>; + } } // end let EncoderMethod/DecoderMethod class ZPRExtendAsmOperand; } +def ZT0 : AArch64Reg<0, "zt0">; + // SME Register Classes let isAllocatable = 0 in { @@ -1416,6 +1426,10 @@ let isAllocatable = 0 in { } } +def ZTR : RegisterClass<"AArch64", [untyped], 512, (add ZT0)> { + let Size = 512; + let DiagnosticType = "InvalidLookupTable"; +} // SME Register Operands // There are three types of SME matrix register operands: // * Tiles: diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index 5a8149cf38500..23d034636c4fc 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -351,14 +351,14 @@ defm UMLSL_VG4_M4ZZ : sme2_int_mla_long_array_vg4_single<"umlsl", 0b11>; defm UMLSL_VG2_M2Z2Z : sme2_int_mla_long_array_vg2_multi<"umlsl", 0b11>; defm UMLSL_VG4_M4Z4Z : sme2_int_mla_long_array_vg4_multi<"umlsl", 0b11>; -def FCVT_Z2Z_StoH : sme2_cvt_vg2_single<"fcvt", 0b00, 0b00>; -def FCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"fcvtn", 0b01, 0b00>; -def BFCVT_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvt", 0b10, 0b00>; -def BFCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvtn", 0b11, 0b00>; - -def SQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvt", 0b00, 0b11>; -def UQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"uqcvt", 0b01, 0b11>; -def SQCVTU_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvtu", 0b10, 0b11>; +defm FCVT_Z2Z_StoH : sme2_cvt_vg2_single<"fcvt", 0b0000>; +defm FCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"fcvtn", 0b0001>; +defm BFCVT_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvt", 0b1000>; +defm BFCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvtn", 0b1001>; + +defm SQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvt", 0b0110>; +defm UQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"uqcvt", 0b0111>; +defm SQCVTU_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvtu", 0b1110>; defm SQCVT_Z4Z : sme2_int_cvt_vg4_single<"sqcvt", 0b000>; defm UQCVT_Z4Z : sme2_int_cvt_vg4_single<"uqcvt", 0b001>; defm SQCVTU_Z4Z : sme2_int_cvt_vg4_single<"sqcvtu", 0b100>; @@ -366,14 +366,14 @@ defm SQCVTN_Z4Z : sme2_int_cvt_vg4_single<"sqcvtn", 0b010>; defm SQCVTUN_Z4Z : sme2_int_cvt_vg4_single<"sqcvtun", 0b110>; defm UQCVTN_Z4Z : sme2_int_cvt_vg4_single<"uqcvtn", 0b011>; -def FCVTZS_2Z2Z_StoS : sme2_fp_cvt_vg2_multi<"fcvtzs", 0b010>; -def FCVTZS_4Z4Z_StoS : sme2_fp_cvt_vg4_multi<"fcvtzs", 0b010>; -def FCVTZU_2Z2Z_StoS : sme2_fp_cvt_vg2_multi<"fcvtzu", 0b011>; -def FCVTZU_4Z4Z_StoS : sme2_fp_cvt_vg4_multi<"fcvtzu", 0b011>; -def SCVTF_2Z2Z_StoS : sme2_fp_cvt_vg2_multi<"scvtf", 0b100>; -def SCVTF_4Z4Z_StoS : sme2_fp_cvt_vg4_multi<"scvtf", 0b100>; -def UCVTF_2Z2Z_StoS : sme2_fp_cvt_vg2_multi<"ucvtf", 0b101>; -def UCVTF_4Z4Z_StoS : sme2_fp_cvt_vg4_multi<"ucvtf", 0b101>; +defm FCVTZS_2Z2Z_StoS : sme2_fp_cvt_vg2_multi<"fcvtzs", 0b00010>; +defm FCVTZS_4Z4Z_StoS : sme2_fp_cvt_vg4_multi<"fcvtzs", 0b0001000>; +defm FCVTZU_2Z2Z_StoS : sme2_fp_cvt_vg2_multi<"fcvtzu", 0b00011>; +defm FCVTZU_4Z4Z_StoS : sme2_fp_cvt_vg4_multi<"fcvtzu", 0b0001010>; +defm SCVTF_2Z2Z_StoS : sme2_fp_cvt_vg2_multi<"scvtf", 0b00100>; +defm SCVTF_4Z4Z_StoS : sme2_fp_cvt_vg4_multi<"scvtf", 0b0010000>; +defm UCVTF_2Z2Z_StoS : sme2_fp_cvt_vg2_multi<"ucvtf", 0b00101>; +defm UCVTF_4Z4Z_StoS : sme2_fp_cvt_vg4_multi<"ucvtf", 0b0010010>; defm SMAX_VG2_2ZZ : sme2_int_sve_destructive_vector_vg2_single<"smax", 0b0000000>; defm SMAX_VG4_4ZZ : sme2_int_sve_destructive_vector_vg4_single<"smax", 0b0000000>; @@ -559,6 +559,43 @@ defm SMOPS_MPPZZ_HtoS : sme2_int_mopx_tile<"smops", 0b001>; defm UMOPA_MPPZZ_HtoS : sme2_int_mopx_tile<"umopa", 0b100>; defm UMOPS_MPPZZ_HtoS : sme2_int_mopx_tile<"umops", 0b101>; + +def ZERO_T : sme2_zero_zt<"zero", 0b0001>; + +def LDR_TX : sme2_spill_fill_vector<"ldr", 0b01111100>; +def STR_TX : sme2_spill_fill_vector<"str", 0b11111100>; + +def MOVT_XTI : sme2_movt_zt_to_scalar<"movt", 0b0011111>; +def MOVT_TIX : sme2_movt_scalar_to_zt<"movt", 0b0011111>; + +defm LUTI2_ZTZI : sme2_luti2_vector_index<"luti2">; +defm LUTI2_2ZTZI : sme2_luti2_vector_vg2_index<"luti2">; +defm LUTI2_4ZTZI : sme2_luti2_vector_vg4_index<"luti2">; + +defm LUTI4_ZTZI : sme2_luti4_vector_index<"luti4">; +defm LUTI4_2ZTZI : sme2_luti4_vector_vg2_index<"luti4">; +defm LUTI4_4ZTZI : sme2_luti4_vector_vg4_index<"luti4">; + +defm SUNPK_VG2_2ZZ : sme2_unpk_vector_vg2<"sunpk", 0b0>; +defm SUNPK_VG4_4Z2Z : sme2_unpk_vector_vg4<"sunpk", 0b0>; +defm UUNPK_VG2_2ZZ : sme2_unpk_vector_vg2<"uunpk", 0b1>; +defm UUNPK_VG4_4Z2Z : sme2_unpk_vector_vg4<"uunpk", 0b1>; + +defm ZIP_VG2_2ZZZ : sme2_zip_vector_vg2<"zip", 0b0>; +defm UZP_VG2_2ZZZ : sme2_zip_vector_vg2<"uzp", 0b1>; +defm ZIP_VG4_4Z4Z : sme2_zip_vector_vg4<"zip", 0b0110000>; +defm UZP_VG4_4Z4Z : sme2_zip_vector_vg4<"uzp", 0b0110001>; +defm ZIP_VG4_4Z4Z_Q : sme2_zip_vector_vg4_Q<"zip", 0b0111000>; +defm UZP_VG4_4Z4Z_Q : sme2_zip_vector_vg4_Q<"uzp", 0b0111001>; + +defm FRINTA_2Z2Z: sme2_frint_vector_vg2_multi<"frinta", 0b11000>; +defm FRINTA_4Z4Z: sme2_frint_vector_vg4_multi<"frinta", 0b1100000>; +defm FRINTM_2Z2Z: sme2_frint_vector_vg2_multi<"frintm", 0b10100>; +defm FRINTM_4Z4Z: sme2_frint_vector_vg4_multi<"frintm", 0b1010000>; +defm FRINTN_2Z2Z: sme2_frint_vector_vg2_multi<"frintn", 0b10000>; +defm FRINTN_4Z4Z: sme2_frint_vector_vg4_multi<"frintn", 0b1000000>; +defm FRINTP_2Z2Z: sme2_frint_vector_vg2_multi<"frintp", 0b10010>; +defm FRINTP_4Z4Z: sme2_frint_vector_vg4_multi<"frintp", 0b1001000>; } let Predicates = [HasSME2, HasSMEI16I64] in { diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 1549295a72bc4..32bdf17a4c3e3 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -689,8 +689,8 @@ let Predicates = [HasSVE] in { } // End HasSVE let Predicates = [HasSVEorSME] in { - defm FMLA_ZZZI : sve_fp_fma_by_indexed_elem<0b0, "fmla", int_aarch64_sve_fmla_lane>; - defm FMLS_ZZZI : sve_fp_fma_by_indexed_elem<0b1, "fmls", int_aarch64_sve_fmls_lane>; + defm FMLA_ZZZI : sve_fp_fma_by_indexed_elem<0b00, "fmla", int_aarch64_sve_fmla_lane>; + defm FMLS_ZZZI : sve_fp_fma_by_indexed_elem<0b01, "fmls", int_aarch64_sve_fmls_lane>; defm FCMLA_ZZZI : sve_fp_fcmla_by_indexed_elem<"fcmla", int_aarch64_sve_fcmla_lane>; defm FMUL_ZZZI : sve_fp_fmul_by_indexed_elem<"fmul", int_aarch64_sve_fmul_lane>; @@ -904,10 +904,16 @@ let Predicates = [HasSVEorSME] in { defm LD1SH_S_IMM : sve_mem_cld_si<0b1001, "ld1sh", Z_s, ZPR32>; defm LD1W_IMM : sve_mem_cld_si<0b1010, "ld1w", Z_s, ZPR32>; defm LD1W_D_IMM : sve_mem_cld_si<0b1011, "ld1w", Z_d, ZPR64>; + let Predicates = [HasSVE2p1] in { + defm LD1W_Q_IMM : sve_mem_128b_cld_si<0b10, "ld1w">; + } defm LD1SB_D_IMM : sve_mem_cld_si<0b1100, "ld1sb", Z_d, ZPR64>; defm LD1SB_S_IMM : sve_mem_cld_si<0b1101, "ld1sb", Z_s, ZPR32>; defm LD1SB_H_IMM : sve_mem_cld_si<0b1110, "ld1sb", Z_h, ZPR16>; defm LD1D_IMM : sve_mem_cld_si<0b1111, "ld1d", Z_d, ZPR64>; + let Predicates = [HasSVE2p1] in { + defm LD1D_Q_IMM : sve_mem_128b_cld_si<0b11, "ld1d">; + } // LD1R loads (splat scalar to vector) defm LD1RB_IMM : sve_mem_ld_dup<0b00, 0b00, "ld1rb", Z_b, ZPR8, uimm6s1>; @@ -965,10 +971,16 @@ let Predicates = [HasSVEorSME] in { defm LD1SH_S : sve_mem_cld_ss<0b1001, "ld1sh", Z_s, ZPR32, GPR64NoXZRshifted16>; defm LD1W : sve_mem_cld_ss<0b1010, "ld1w", Z_s, ZPR32, GPR64NoXZRshifted32>; defm LD1W_D : sve_mem_cld_ss<0b1011, "ld1w", Z_d, ZPR64, GPR64NoXZRshifted32>; + let Predicates = [HasSVE2p1] in { + defm LD1W_Q : sve_mem_128b_cld_ss<0b10, "ld1w", GPR64NoXZRshifted32>; + } defm LD1SB_D : sve_mem_cld_ss<0b1100, "ld1sb", Z_d, ZPR64, GPR64NoXZRshifted8>; defm LD1SB_S : sve_mem_cld_ss<0b1101, "ld1sb", Z_s, ZPR32, GPR64NoXZRshifted8>; defm LD1SB_H : sve_mem_cld_ss<0b1110, "ld1sb", Z_h, ZPR16, GPR64NoXZRshifted8>; defm LD1D : sve_mem_cld_ss<0b1111, "ld1d", Z_d, ZPR64, GPR64NoXZRshifted64>; + let Predicates = [HasSVE2p1] in { + defm LD1D_Q : sve_mem_128b_cld_ss<0b11, "ld1d", GPR64NoXZRshifted64>; + } } // End HasSVEorSME let Predicates = [HasSVE] in { @@ -1265,7 +1277,13 @@ let Predicates = [HasSVEorSME] in { defm ST1H_D_IMM : sve_mem_cst_si<0b01, 0b11, "st1h", Z_d, ZPR64>; defm ST1W_IMM : sve_mem_cst_si<0b10, 0b10, "st1w", Z_s, ZPR32>; defm ST1W_D_IMM : sve_mem_cst_si<0b10, 0b11, "st1w", Z_d, ZPR64>; + let Predicates = [HasSVE2p1] in { + defm ST1W_Q_IMM : sve_mem_cst_si<0b10, 0b00, "st1w", Z_q, ZPR128>; + } defm ST1D_IMM : sve_mem_cst_si<0b11, 0b11, "st1d", Z_d, ZPR64>; + let Predicates = [HasSVE2p1] in { + defm ST1D_Q_IMM : sve_mem_cst_si<0b11, 0b10, "st1d", Z_q, ZPR128>; + } // contiguous store with reg+reg addressing. defm ST1B : sve_mem_cst_ss<0b0000, "st1b", Z_b, ZPR8, GPR64NoXZRshifted8>; @@ -1277,7 +1295,13 @@ let Predicates = [HasSVEorSME] in { defm ST1H_D : sve_mem_cst_ss<0b0111, "st1h", Z_d, ZPR64, GPR64NoXZRshifted16>; defm ST1W : sve_mem_cst_ss<0b1010, "st1w", Z_s, ZPR32, GPR64NoXZRshifted32>; defm ST1W_D : sve_mem_cst_ss<0b1011, "st1w", Z_d, ZPR64, GPR64NoXZRshifted32>; + let Predicates = [HasSVE2p1] in { + defm ST1W_Q : sve_mem_cst_ss<0b1000, "st1w", Z_q, ZPR128, GPR64NoXZRshifted32>; + } defm ST1D : sve_mem_cst_ss<0b1111, "st1d", Z_d, ZPR64, GPR64NoXZRshifted64>; + let Predicates = [HasSVE2p1] in { + defm ST1D_Q : sve_mem_cst_ss<0b1110, "st1d", Z_q, ZPR128, GPR64NoXZRshifted64>; + } } // End HasSVEorSME let Predicates = [HasSVE] in { @@ -3696,3 +3720,31 @@ defm WHILEHI_CXX : sve2p1_int_while_rr_pn<"whilehi", 0b101>; defm WHILELO_CXX : sve2p1_int_while_rr_pn<"whilelo", 0b110>; defm WHILELS_CXX : sve2p1_int_while_rr_pn<"whilels", 0b111>; } // End HasSVE2p1_or_HasSME2 + +//===----------------------------------------------------------------------===// +// SVE2.1 non-widening BFloat16 to BFloat16 instructions +//===----------------------------------------------------------------------===// + +let Predicates = [HasSVE2p1_or_HasSME2p1, HasB16B16] in { +def BFADD_ZZZ : sve_fp_3op_u_zd<0b00, 0b000, "bfadd", ZPR16>; +def BFSUB_ZZZ : sve_fp_3op_u_zd<0b00, 0b001, "bfsub", ZPR16>; +def BFMUL_ZZZ : sve_fp_3op_u_zd<0b00, 0b010, "bfmul", ZPR16>; + +def BFMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b00, 0b00, "bfmla", ZPR16>; +def BFMLS_ZPmZZ : sve_fp_3op_p_zds_a<0b00, 0b01, "bfmls", ZPR16>; + +def BFADD_ZPZmZ : sve_fp_2op_p_zds<0b00, 0b0000, "bfadd", ZPR16>; +def BFSUB_ZPZmZ : sve_fp_2op_p_zds<0b00, 0b0001, "bfsub", ZPR16>; +def BFMUL_ZPZmZ : sve_fp_2op_p_zds<0b00, 0b0010, "bfmul", ZPR16>; +def BFMAXNM_ZPZmZ : sve_fp_2op_p_zds<0b00, 0b0100, "bfmaxnm", ZPR16>; +def BFMINNM_ZPZmZ : sve_fp_2op_p_zds<0b00, 0b0101, "bfminnm", ZPR16>; +def BFMAX_ZPZmZ : sve_fp_2op_p_zds<0b00, 0b0110, "bfmax", ZPR16>; +def BFMIN_ZPZmZ : sve_fp_2op_p_zds<0b00, 0b0111, "bfmin", ZPR16>; + +defm BFMLA_ZZZI : sve2p1_fp_bfma_by_indexed_elem<"bfmla", 0b10>; +defm BFMLS_ZZZI : sve2p1_fp_bfma_by_indexed_elem<"bfmls", 0b11>; + +defm BFMUL_ZZZI : sve2p1_fp_bfmul_by_indexed_elem<"bfmul">; + +def BFCLAMP_ZZZ : sve2p1_fclamp<"bfclamp", 0b00, ZPR16>; +} // End HasSVE2p1_or_HasSME2p1, HasB16B16 diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td index c472de8df2f8d..21a0e927d7567 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td +++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td @@ -18,7 +18,8 @@ def NeoverseN2Model : SchedMachineModel { let LoopMicroOpBufferSize = 16; // NOTE: Copied from Cortex-A57. let CompleteModel = 1; - list UnsupportedFeatures = SMEUnsupported.F; + list UnsupportedFeatures = !listconcat(SMEUnsupported.F, + [HasSVE2p1]); } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index 5b3b6c00ed216..4acf8a1bf8603 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -142,6 +142,7 @@ void AArch64Subtarget::initializeProperties() { MaxBytesForLoopAlignment = 8; break; case CortexA710: + case CortexA715: case CortexX2: PrefFunctionLogAlignment = 4; VScaleForTuning = 1; diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index 15c3961087d1c..4718a01ad2166 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -64,6 +64,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { CortexA78, CortexA78C, CortexA710, + CortexA715, CortexR82, CortexX1, CortexX1C, diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index cbb3d793899a9..3f98d1f00532a 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -975,20 +975,22 @@ instCombineSVECntElts(InstCombiner &IC, IntrinsicInst &II, unsigned NumElts) { static Optional instCombineSVEPTest(InstCombiner &IC, IntrinsicInst &II) { - IntrinsicInst *Op1 = dyn_cast(II.getArgOperand(0)); - IntrinsicInst *Op2 = dyn_cast(II.getArgOperand(1)); + IntrinsicInst *Pg = dyn_cast(II.getArgOperand(0)); + IntrinsicInst *Op = dyn_cast(II.getArgOperand(1)); - if (!Op1 || !Op2) + if (!Pg || !Op) return None; + Intrinsic::ID OpIID = Op->getIntrinsicID(); + IRBuilder<> Builder(II.getContext()); Builder.SetInsertPoint(&II); - if (Op1->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool && - Op2->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool && - Op1->getArgOperand(0)->getType() == Op2->getArgOperand(0)->getType()) { - Value *Ops[] = {Op1->getArgOperand(0), Op2->getArgOperand(0)}; - Type *Tys[] = {Op1->getArgOperand(0)->getType()}; + if (Pg->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool && + OpIID == Intrinsic::aarch64_sve_convert_to_svbool && + Pg->getArgOperand(0)->getType() == Op->getArgOperand(0)->getType()) { + Value *Ops[] = {Pg->getArgOperand(0), Op->getArgOperand(0)}; + Type *Tys[] = {Pg->getArgOperand(0)->getType()}; auto *PTest = Builder.CreateIntrinsic(II.getIntrinsicID(), Tys, Ops); @@ -999,12 +1001,21 @@ static Optional instCombineSVEPTest(InstCombiner &IC, // Transform PTEST_ANY(X=OP(PG,...), X) -> PTEST_ANY(PG, X)). // Later optimizations may rewrite sequence to use the flag-setting variant // of instruction X to remove PTEST. - if ((Op1 == Op2) && - (II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) && - ((Op1->getIntrinsicID() == Intrinsic::aarch64_sve_brkb_z) || - (Op1->getIntrinsicID() == Intrinsic::aarch64_sve_rdffr_z))) { - Value *Ops[] = {Op1->getArgOperand(0), Op1}; - Type *Tys[] = {Op1->getType()}; + if ((Pg == Op) && (II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) && + ((OpIID == Intrinsic::aarch64_sve_brka_z) || + (OpIID == Intrinsic::aarch64_sve_brkb_z) || + (OpIID == Intrinsic::aarch64_sve_brkpa_z) || + (OpIID == Intrinsic::aarch64_sve_brkpb_z) || + (OpIID == Intrinsic::aarch64_sve_rdffr_z) || + (OpIID == Intrinsic::aarch64_sve_and_z) || + (OpIID == Intrinsic::aarch64_sve_bic_z) || + (OpIID == Intrinsic::aarch64_sve_eor_z) || + (OpIID == Intrinsic::aarch64_sve_nand_z) || + (OpIID == Intrinsic::aarch64_sve_nor_z) || + (OpIID == Intrinsic::aarch64_sve_orn_z) || + (OpIID == Intrinsic::aarch64_sve_orr_z))) { + Value *Ops[] = {Pg->getArgOperand(0), Pg}; + Type *Tys[] = {Pg->getType()}; auto *PTest = Builder.CreateIntrinsic(II.getIntrinsicID(), Tys, Ops); PTest->takeName(&II); diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 49d54f8e9a849..cc6bd90b69568 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -70,7 +70,8 @@ enum class RegKind { SVEDataVector, SVEPredicateAsCounter, SVEPredicateVector, - Matrix + Matrix, + LookupTable }; enum class MatrixKind { Array, Tile, Row, Col }; @@ -265,6 +266,7 @@ class AArch64AsmParser : public MCTargetAsmParser { template OperandMatchResultTy tryParseGPROperand(OperandVector &Operands); + OperandMatchResultTy tryParseZTOperand(OperandVector &Operands); template OperandMatchResultTy tryParseSVEDataVector(OperandVector &Operands); template @@ -2786,9 +2788,12 @@ unsigned AArch64AsmParser::matchRegisterNameAlias(StringRef Name, if ((RegNum = matchMatrixRegName(Name))) return Kind == RegKind::Matrix ? RegNum : 0; + if (Name.equals_insensitive("zt0")) + return Kind == RegKind::LookupTable ? AArch64::ZT0 : 0; + // The parsed register must be of RegKind Scalar if ((RegNum = MatchRegisterName(Name))) - return Kind == RegKind::Scalar ? RegNum : 0; + return (Kind == RegKind::Scalar) ? RegNum : 0; if (!RegNum) { // Handle a few common aliases of registers. @@ -2824,6 +2829,8 @@ unsigned AArch64AsmParser::getNumRegsForRegKind(RegKind K) { case RegKind::SVEPredicateVector: case RegKind::SVEPredicateAsCounter: return 16; + case RegKind::LookupTable: + return 512; } llvm_unreachable("Unsupported RegKind"); } @@ -3469,6 +3476,7 @@ static const struct Extension { {"sve2-sha3", {AArch64::FeatureSVE2SHA3}}, {"sve2-bitperm", {AArch64::FeatureSVE2BitPerm}}, {"sve2p1", {AArch64::FeatureSVE2p1}}, + {"b16b16", {AArch64::FeatureB16B16}}, {"ls64", {AArch64::FeatureLS64}}, {"xs", {AArch64::FeatureXS}}, {"pauth", {AArch64::FeaturePAuth}}, @@ -3478,6 +3486,7 @@ static const struct Extension { {"sme-f64f64", {AArch64::FeatureSMEF64F64}}, {"sme-i16i64", {AArch64::FeatureSMEI16I64}}, {"sme2", {AArch64::FeatureSME2}}, + {"sme2p1", {AArch64::FeatureSME2p1}}, {"hbc", {AArch64::FeatureHBC}}, {"mops", {AArch64::FeatureMOPS}}, // FIXME: Unsupported extensions @@ -3966,6 +3975,9 @@ bool AArch64AsmParser::parseRegister(OperandVector &Operands) { if (!tryParseNeonVectorRegister(Operands)) return false; + if (tryParseZTOperand(Operands) == MatchOperand_Success) + return false; + // Otherwise try for a scalar register. if (tryParseGPROperand(Operands) == MatchOperand_Success) return false; @@ -4179,6 +4191,10 @@ AArch64AsmParser::tryParseVectorList(OperandVector &Operands, llvm_unreachable("Expected a valid vector kind"); } + if (RegTok.is(AsmToken::Identifier) && ParseRes == MatchOperand_NoMatch && + RegTok.getString().equals_insensitive("zt0")) + return MatchOperand_NoMatch; + if (RegTok.isNot(AsmToken::Identifier) || ParseRes == MatchOperand_ParseFail || (ParseRes == MatchOperand_NoMatch && NoMatchIsError && @@ -4328,6 +4344,42 @@ AArch64AsmParser::tryParseGPR64sp0Operand(OperandVector &Operands) { return MatchOperand_Success; } +OperandMatchResultTy +AArch64AsmParser::tryParseZTOperand(OperandVector &Operands) { + SMLoc StartLoc = getLoc(); + const AsmToken &Tok = getTok(); + std::string Name = Tok.getString().lower(); + + unsigned RegNum = matchRegisterNameAlias(Name, RegKind::LookupTable); + + if (RegNum == 0) + return MatchOperand_NoMatch; + + Operands.push_back(AArch64Operand::CreateReg( + RegNum, RegKind::LookupTable, StartLoc, getLoc(), getContext())); + Lex(); // Eat identifier token. + + // Check if register is followed by an index + if (parseOptionalToken(AsmToken::LBrac)) { + const MCExpr *ImmVal; + if (getParser().parseExpression(ImmVal)) + return MatchOperand_NoMatch; + const MCConstantExpr *MCE = dyn_cast(ImmVal); + if (!MCE) { + TokError("immediate value expected for vector index"); + return MatchOperand_ParseFail; + } + if (parseToken(AsmToken::RBrac, "']' expected")) + return MatchOperand_ParseFail; + + Operands.push_back(AArch64Operand::CreateImm( + MCConstantExpr::create(MCE->getValue(), getContext()), StartLoc, + getLoc(), getContext())); + } + + return MatchOperand_Success; +} + template OperandMatchResultTy AArch64AsmParser::tryParseGPROperand(OperandVector &Operands) { @@ -5434,6 +5486,8 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode, return Error(Loc, "index must be a multiple of 16 in range [-1024, 1008]."); case Match_InvalidMemoryIndexed8UImm5: return Error(Loc, "index must be a multiple of 8 in range [0, 248]."); + case Match_InvalidMemoryIndexed8UImm3: + return Error(Loc, "index must be a multiple of 8 in range [0, 56]."); case Match_InvalidMemoryIndexed4UImm5: return Error(Loc, "index must be a multiple of 4 in range [0, 124]."); case Match_InvalidMemoryIndexed2UImm5: @@ -5762,6 +5816,8 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode, return Error(Loc, "Invalid vector list, expected list with 4 consecutive " "SVE vectors, where the first vector is a multiple of 4 " "and with matching element types"); + case Match_InvalidLookupTable: + return Error(Loc, "Invalid lookup table, expected zt0"); default: llvm_unreachable("unexpected error code!"); } @@ -6176,6 +6232,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, case Match_InvalidMemoryIndexed8SImm7: case Match_InvalidMemoryIndexed16SImm7: case Match_InvalidMemoryIndexed8UImm5: + case Match_InvalidMemoryIndexed8UImm3: case Match_InvalidMemoryIndexed4UImm5: case Match_InvalidMemoryIndexed2UImm5: case Match_InvalidMemoryIndexed1UImm6: @@ -6318,6 +6375,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, case Match_InvalidSVCR: case Match_InvalidMatrixIndexGPR32_12_15: case Match_InvalidMatrixIndexGPR32_8_11: + case Match_InvalidLookupTable: case Match_InvalidSVEVectorListMul2x8: case Match_InvalidSVEVectorListMul2x16: case Match_InvalidSVEVectorListMul2x32: diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index 1438f026f6792..0ef906b583c06 100644 --- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -329,6 +329,9 @@ DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size, case AArch64::MPR8RegClassID: MI.insert(MI.begin() + i, MCOperand::createReg(AArch64::ZAB0)); break; + case AArch64::ZTRRegClassID: + MI.insert(MI.begin() + i, MCOperand::createReg(AArch64::ZT0)); + break; } } else if (Desc.OpInfo[i].OperandType == AArch64::OPERAND_IMPLICIT_IMM_0) { diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp index 9324ee41c3123..c11150a5a1230 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp @@ -1244,7 +1244,7 @@ void AArch64InstPrinter::printAMNoIndex(const MCInst *MI, unsigned OpNum, O << ']'; } -template +template void AArch64InstPrinter::printImmScale(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O) { @@ -1576,10 +1576,11 @@ void AArch64InstPrinter::printTypedVectorList(const MCInst *MI, unsigned OpNum, printVectorList(MI, OpNum, STI, O, Suffix); } +template void AArch64InstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O) { - O << "[" << MI->getOperand(OpNum).getImm() << "]"; + O << "[" << Scale * MI->getOperand(OpNum).getImm() << "]"; } void AArch64InstPrinter::printMatrixIndex(const MCInst *MI, unsigned OpNum, diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h index 7bfb095b5873c..aa4aad1dc7fc7 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h @@ -164,6 +164,7 @@ class AArch64InstPrinter : public MCInstPrinter { void printTypedVectorList(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); + template void printVectorIndex(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); void printMatrixIndex(const MCInst *MI, unsigned OpNum, diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 98f1c861a5c26..4f2b2d9694ed7 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -1765,82 +1765,176 @@ multiclass sme2_int_mla_long_array_vg4_multi op> { } //===----------------------------------------------------------------------===// -// SME2 multi-vec INT/ FP down convert +class sme2_frint_cvt_vg2_multisz, bits<5>op, RegisterOperand first_ty, + RegisterOperand second_ty, string mnemonic> + : I<(outs first_ty:$Zd), (ins second_ty:$Zn), + mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> { + bits<4> Zn; + bits<4> Zd; + let Inst{31-24} = 0b11000001; + let Inst{23-22} = sz; + let Inst{21-20} = 0b10; + let Inst{19-16} = op{4-1}; + let Inst{15-10} = 0b111000; + let Inst{9-6} = Zn; + let Inst{5} = op{0}; + let Inst{4-1} = Zd; + let Inst{0} = 0b0; +} + +// SME2 multi-vec FP to int convert two registers +// SME2 multi-vec int to FP two registers +multiclass sme2_fp_cvt_vg2_multi op> { + def NAME : sme2_frint_cvt_vg2_multi<0b00, op, ZZ_s_mul_r, ZZ_s_mul_r, mnemonic>; +} + +// SME2 multi-vec FRINT two registers +multiclass sme2_frint_vector_vg2_multi op> { + def _S : sme2_frint_cvt_vg2_multi<0b10, op, ZZ_s_mul_r, ZZ_s_mul_r, mnemonic>; +} -class sme2_cvt_vg2_single op, bits<2> is_int> +class sme2_frint_zip_cvt_vg4_multisz, bits<7>op, RegisterOperand first_ty, + RegisterOperand second_ty, string mnemonic> + : I<(outs first_ty:$Zd), (ins second_ty:$Zn), + mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> { + bits<3> Zn; + bits<3> Zd; + let Inst{31-24} = 0b11000001; + let Inst{23-22} = sz; + let Inst{21-20} = 0b11; + let Inst{19-16} = op{6-3}; + let Inst{15-10} = 0b111000; + let Inst{9-7} = Zn; + let Inst{6-5} = op{2-1}; + let Inst{4-2} = Zd; + let Inst{1} = op{0}; + let Inst{0} = 0b0; +} + +// SME2 multi-vec FP to int convert four registers +// SME2 multi-vec int to FP four registers +multiclass sme2_fp_cvt_vg4_multi op> { + def _S : sme2_frint_zip_cvt_vg4_multi<0b00, op, ZZZZ_s_mul_r, ZZZZ_s_mul_r, mnemonic>; +} + +// SME2 multi-vec quadwords ZIP four registers +multiclass sme2_zip_vector_vg4 op> { + def _B : sme2_frint_zip_cvt_vg4_multi<0b00, op, ZZZZ_b_mul_r, ZZZZ_b_mul_r, + mnemonic>; + def _H : sme2_frint_zip_cvt_vg4_multi<0b01, op, ZZZZ_h_mul_r, ZZZZ_h_mul_r, + mnemonic>; + def _S : sme2_frint_zip_cvt_vg4_multi<0b10, op, ZZZZ_s_mul_r, ZZZZ_s_mul_r, + mnemonic>; + def _D : sme2_frint_zip_cvt_vg4_multi<0b11, op, ZZZZ_d_mul_r, ZZZZ_d_mul_r, + mnemonic>; +} + +// SME2 multi-vec quadwords ZIP four registers +multiclass sme2_zip_vector_vg4_Q op> { + def NAME: sme2_frint_zip_cvt_vg4_multi<0b00, op, ZZZZ_q_mul_r, ZZZZ_q_mul_r, + mnemonic>; +} + +// SME2 multi-vec FRINT four registers +multiclass sme2_frint_vector_vg4_multi op> { + def _S : sme2_frint_zip_cvt_vg4_multi<0b10, op, ZZZZ_s_mul_r, ZZZZ_s_mul_r, + mnemonic>; +} + +class sme2_cvt_vg2_single op> : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn), mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> { bits<4> Zn; bits<5> Zd; let Inst{31-23} = 0b110000010; - let Inst{22} = op{1}; + let Inst{22} = op{3}; let Inst{21-18} = 0b1000; - let Inst{17-16} = is_int; + let Inst{17-16} = op{2-1}; let Inst{15-10} = 0b111000; let Inst{9-6} = Zn; let Inst{5} = op{0}; let Inst{4-0} = Zd; } +// SME2 multi-vec FP down convert two registers +// SME2 multi-vec int down convert two registers +multiclass sme2_cvt_vg2_single op> { + def NAME : sme2_cvt_vg2_single; +} + + +class sme2_unpk_vector_vg2sz, bit u, RegisterOperand first_ty, + RegisterOperand second_ty, string mnemonic> + : I<(outs first_ty:$Zd), (ins second_ty:$Zn), + mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> { + bits<5> Zn; + bits<4> Zd; + let Inst{31-24} = 0b11000001; + let Inst{23-22} = sz; + let Inst{21-10} = 0b100101111000; + let Inst{9-5} = Zn; + let Inst{4-1} = Zd; + let Inst{0} = u; +} + +// SME2 multi-vec unpack two registers +multiclass sme2_unpk_vector_vg2 { + def _H : sme2_unpk_vector_vg2<0b01, u, ZZ_h_mul_r, ZPR8, mnemonic>; + def _S : sme2_unpk_vector_vg2<0b10, u, ZZ_s_mul_r, ZPR16, mnemonic>; + def _D : sme2_unpk_vector_vg2<0b11, u, ZZ_d_mul_r, ZPR32, mnemonic>; +} + -class sme2_cvt_vg4_single op, ZPRRegOp zpr_ty, RegisterOperand vector_ty, - string mnemonic> - : I<(outs zpr_ty:$Zd), (ins vector_ty:$Zn), +class sme2_cvt_vg4_single op, RegisterOperand first_ty, + RegisterOperand second_ty, string mnemonic> + : I<(outs first_ty:$Zd), (ins second_ty:$Zn), mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> { bits<3> Zn; bits<5> Zd; let Inst{31-24} = 0b11000001; let Inst{23} = sz; let Inst{22} = op{2}; - let Inst{21-16} = 0b110011; - let Inst{15-10} = 0b111000; + let Inst{21-10} = 0b110011111000; let Inst{9-7} = Zn; let Inst{6-5} = op{1-0}; let Inst{4-0} = Zd; } - +// SME2 multi-vec int down convert four registers multiclass sme2_int_cvt_vg4_single op> { -def _StoB : sme2_cvt_vg4_single<0b0, op, ZPR8, ZZZZ_s_mul_r, mnemonic>; -def _DtoH : sme2_cvt_vg4_single<0b1, op, ZPR16, ZZZZ_d_mul_r, mnemonic>; +def _StoB : sme2_cvt_vg4_single<0, op, ZPR8, ZZZZ_s_mul_r, mnemonic>; +def _DtoH : sme2_cvt_vg4_single<1, op, ZPR16, ZZZZ_d_mul_r, mnemonic>; } -class sme2_fp_cvt_vg2_multi op> - : I<(outs ZZ_s_mul_r:$Zd), (ins ZZ_s_mul_r:$Zn), - mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> { +class sme2_unpk_vector_vg4sz, bit u, RegisterOperand first_ty, + RegisterOperand second_ty, string mnemonic> + : I<(outs first_ty:$Zd), (ins second_ty:$Zn), + mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> { bits<4> Zn; - bits<4> Zd; - let Inst{31-18} = 0b11000001001000; - let Inst{17-16} = op{2-1}; - let Inst{15-10} = 0b111000; + bits<3> Zd; + let Inst{31-24} = 0b11000001; + let Inst{23-22} = sz; + let Inst{21-10} = 0b110101111000; let Inst{9-6} = Zn; - let Inst{5} = op{0}; - let Inst{4-1} = Zd; - let Inst{0} = 0b0; + let Inst{5} = 0b0; + let Inst{4-2} = Zd; + let Inst{1} = 0b0; + let Inst{0} = u; } - -class sme2_fp_cvt_vg4_multi op> - : I<(outs ZZZZ_s_mul_r:$Zd), (ins ZZZZ_s_mul_r:$Zn), - mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> { - bits<3> Zn; - bits<3> Zd; - let Inst{31-18} = 0b11000001001100; - let Inst{17-16} = op{2-1}; - let Inst{15-10} = 0b111000; - let Inst{9-7} = Zn; - let Inst{6} = 0b0; - let Inst{5} = op{0}; - let Inst{4-2} = Zd; - let Inst{1-0} = 0b00; +// SME2 multi-vec UNPK four registers +multiclass sme2_unpk_vector_vg4 { + def _H : sme2_unpk_vector_vg4<0b01, u, ZZZZ_h_mul_r, ZZ_b_mul_r, mnemonic>; + def _S : sme2_unpk_vector_vg4<0b10, u, ZZZZ_s_mul_r, ZZ_h_mul_r, mnemonic>; + def _D : sme2_unpk_vector_vg4<0b11, u, ZZZZ_d_mul_r, ZZ_s_mul_r, mnemonic>; } //===----------------------------------------------------------------------===// // SME2 multi-vec CLAMP registers -class sme2_clamp_vector_vg24_multi sz, bits<2> op1, bit u, - RegisterOperand multi_vector_ty, - ZPRRegOp vector_ty, string mnemonic> +class sme2_zip_clamp_vector_vg24_multi sz, bits<3> op1, bit u, + RegisterOperand multi_vector_ty, + ZPRRegOp vector_ty, string mnemonic> : I<(outs multi_vector_ty:$Zd), (ins multi_vector_ty:$_Zd, vector_ty:$Zn, vector_ty:$Zm), mnemonic, "\t$Zd, $Zn, $Zm", @@ -1851,40 +1945,48 @@ class sme2_clamp_vector_vg24_multi sz, bits<2> op1, bit u, let Inst{23-22} = sz; let Inst{21} = 0b1; let Inst{20-16} = Zm; - let Inst{15-12} = 0b1100; - let Inst{11-10} = op1; + let Inst{15-13} = 0b110; + let Inst{12-10} = op1; let Inst{9-5} = Zn; let Inst{0} = u; let Constraints = "$Zd = $_Zd"; } -class sme2_clamp_vector_vg2_multi sz, bits<2> op1, bit u, - RegisterOperand multi_vector_ty, - ZPRRegOp vector_ty, string mnemonic> - : sme2_clamp_vector_vg24_multi sz, bits<3> op1, bit u, + RegisterOperand multi_vector_ty, + ZPRRegOp vector_ty, string mnemonic> + : sme2_zip_clamp_vector_vg24_multi{ bits<4> Zd; let Inst{4-1} = Zd; } multiclass sme2_fp_clamp_vector_vg2_multi{ - def _H : sme2_clamp_vector_vg2_multi<0b01, 0b00, 0b0, ZZ_h_mul_r, ZPR16, mnemonic>; - def _S : sme2_clamp_vector_vg2_multi<0b10, 0b00, 0b0, ZZ_s_mul_r, ZPR32, mnemonic>; - def _D : sme2_clamp_vector_vg2_multi<0b11, 0b00, 0b0, ZZ_d_mul_r, ZPR64, mnemonic>; + def _H : sme2_zip_clamp_vector_vg2_multi<0b01, 0b000, 0b0, ZZ_h_mul_r, ZPR16, mnemonic>; + def _S : sme2_zip_clamp_vector_vg2_multi<0b10, 0b000, 0b0, ZZ_s_mul_r, ZPR32, mnemonic>; + def _D : sme2_zip_clamp_vector_vg2_multi<0b11, 0b000, 0b0, ZZ_d_mul_r, ZPR64, mnemonic>; } multiclass sme2_int_clamp_vector_vg2_multi{ - def _B : sme2_clamp_vector_vg2_multi<0b00, 0b01, u, ZZ_b_mul_r, ZPR8, mnemonic>; - def _H : sme2_clamp_vector_vg2_multi<0b01, 0b01, u, ZZ_h_mul_r, ZPR16, mnemonic>; - def _S : sme2_clamp_vector_vg2_multi<0b10, 0b01, u, ZZ_s_mul_r, ZPR32, mnemonic>; - def _D : sme2_clamp_vector_vg2_multi<0b11, 0b01, u, ZZ_d_mul_r, ZPR64, mnemonic>; + def _B : sme2_zip_clamp_vector_vg2_multi<0b00, 0b001, u, ZZ_b_mul_r, ZPR8, mnemonic>; + def _H : sme2_zip_clamp_vector_vg2_multi<0b01, 0b001, u, ZZ_h_mul_r, ZPR16, mnemonic>; + def _S : sme2_zip_clamp_vector_vg2_multi<0b10, 0b001, u, ZZ_s_mul_r, ZPR32, mnemonic>; + def _D : sme2_zip_clamp_vector_vg2_multi<0b11, 0b001, u, ZZ_d_mul_r, ZPR64, mnemonic>; } -class sme2_clamp_vector_vg4_multi sz, bits<2> op1, bit u, +multiclass sme2_zip_vector_vg2 { + def _B : sme2_zip_clamp_vector_vg2_multi<0b00, 0b100, op, ZZ_b_mul_r, ZPR8, mnemonic>; + def _H : sme2_zip_clamp_vector_vg2_multi<0b01, 0b100, op, ZZ_h_mul_r, ZPR16, mnemonic>; + def _S : sme2_zip_clamp_vector_vg2_multi<0b10, 0b100, op, ZZ_s_mul_r, ZPR32, mnemonic>; + def _D : sme2_zip_clamp_vector_vg2_multi<0b11, 0b100, op, ZZ_d_mul_r, ZPR64, mnemonic>; + def _Q : sme2_zip_clamp_vector_vg2_multi<0b00, 0b101, op, ZZ_q_mul_r, ZPR128, mnemonic>; +} + +class sme2_clamp_vector_vg4_multi sz, bits<3> op1, bit u, RegisterOperand multi_vector_ty, ZPRRegOp vector_ty, string mnemonic> - : sme2_clamp_vector_vg24_multi{ bits<3> Zd; let Inst{4-2} = Zd; @@ -1892,16 +1994,16 @@ class sme2_clamp_vector_vg4_multi sz, bits<2> op1, bit u, } multiclass sme2_fp_clamp_vector_vg4_multi{ - def _H : sme2_clamp_vector_vg4_multi<0b01, 0b10, 0b0, ZZZZ_h_mul_r, ZPR16, mnemonic>; - def _S : sme2_clamp_vector_vg4_multi<0b10, 0b10, 0b0, ZZZZ_s_mul_r, ZPR32, mnemonic>; - def _D : sme2_clamp_vector_vg4_multi<0b11, 0b10, 0b0, ZZZZ_d_mul_r, ZPR64, mnemonic>; + def _H : sme2_clamp_vector_vg4_multi<0b01, 0b010, 0b0, ZZZZ_h_mul_r, ZPR16, mnemonic>; + def _S : sme2_clamp_vector_vg4_multi<0b10, 0b010, 0b0, ZZZZ_s_mul_r, ZPR32, mnemonic>; + def _D : sme2_clamp_vector_vg4_multi<0b11, 0b010, 0b0, ZZZZ_d_mul_r, ZPR64, mnemonic>; } multiclass sme2_int_clamp_vector_vg4_multi{ - def _B : sme2_clamp_vector_vg4_multi<0b00, 0b11, u, ZZZZ_b_mul_r, ZPR8, mnemonic>; - def _H : sme2_clamp_vector_vg4_multi<0b01, 0b11, u, ZZZZ_h_mul_r, ZPR16, mnemonic>; - def _S : sme2_clamp_vector_vg4_multi<0b10, 0b11, u, ZZZZ_s_mul_r, ZPR32, mnemonic>; - def _D : sme2_clamp_vector_vg4_multi<0b11, 0b11, u, ZZZZ_d_mul_r, ZPR64, mnemonic>; + def _B : sme2_clamp_vector_vg4_multi<0b00, 0b011, u, ZZZZ_b_mul_r, ZPR8, mnemonic>; + def _H : sme2_clamp_vector_vg4_multi<0b01, 0b011, u, ZZZZ_h_mul_r, ZPR16, mnemonic>; + def _S : sme2_clamp_vector_vg4_multi<0b10, 0b011, u, ZZZZ_s_mul_r, ZPR32, mnemonic>; + def _D : sme2_clamp_vector_vg4_multi<0b11, 0b011, u, ZZZZ_d_mul_r, ZPR64, mnemonic>; } //===----------------------------------------------------------------------===// @@ -2379,3 +2481,192 @@ multiclass sme2_int_mopx_tile op> { multiclass sme2_bfp_mopx_tile op> { def NAME : sme_outer_product_widening_inst; } + +//===----------------------------------------------------------------------===/// +// SME2 Zero Lookup Table. +class sme2_zero_zt opc> + : I<(outs ZTR:$ZT), (ins ), + mnemonic, "\t\\{ $ZT \\}", + "", []>, Sched<[]> { + let Inst{31-4} = 0b1100000001001000000000000000; + let Inst{3-0} = opc; +} + +//===----------------------------------------------------------------------===// +// SME2 lookup table load/store +class sme2_spill_fill_vector opc> + : I, Sched<[]> { + bits<5> Rn; + let Inst{31-22} = 0b1110000100; + let Inst{21-16} = opc{7-2}; + let Inst{15-10} = 0b100000; + let Inst{9-5} = Rn; + let Inst{4-2} = 0b000; + let Inst{1-0} = opc{1-0}; + + let mayLoad = !not(opc{7}); + let mayStore = opc{7}; +} + +//===----------------------------------------------------------------------===/// +// SME2 move to/from lookup table +class sme2_movt_zt_to_scalar opc> + : I<(outs GPR64:$Rt), (ins ZTR:$ZTt, uimm3s8:$imm3), + mnemonic, "\t$Rt, $ZTt$imm3", + "", []>, Sched<[]> { + bits<3> imm3; + bits<5> Rt; + let Inst{31-15} = 0b11000000010011000; + let Inst{14-12} = imm3; + let Inst{11-5} = opc; + let Inst{4-0} = Rt; +} + +class sme2_movt_scalar_to_zt opc> + : I<(outs ZTR:$ZTt), (ins uimm3s8:$imm3, GPR64:$Rt), + mnemonic, "\t$ZTt$imm3, $Rt", + "", []>, Sched<[]> { + bits<3> imm3; + bits<5> Rt; + let Inst{31-15} = 0b11000000010011100; + let Inst{14-12} = imm3; + let Inst{11-5} = opc; + let Inst{4-0} = Rt; +} + +//===----------------------------------------------------------------------===// +// SME2 lookup table expand one register +class sme2_luti_vector_index sz, bits<7> opc, RegisterOperand vector_ty, + AsmVectorIndexOpnd index_ty, string mnemonic> + : I<(outs vector_ty:$Zd), + (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i), + mnemonic, "\t$Zd, $ZTt, $Zn$i", + "", []>, Sched<[]> { + bits<5> Zn; + bits<5> Zd; + let Inst{31-19} = 0b1100000011001; + let Inst{18-14} = opc{6-2}; + let Inst{13-12} = sz; + let Inst{11-10} = opc{1-0}; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +class sme2_luti2_vector_index sz, RegisterOperand vector_ty, + string mnemonic> + : sme2_luti_vector_index { + bits<4> i; + let Inst{17-14} = i; +} + +multiclass sme2_luti2_vector_index { + def _B : sme2_luti2_vector_index<0b00, ZPR8, mnemonic>; + def _H : sme2_luti2_vector_index<0b01, ZPR16, mnemonic>; + def _S : sme2_luti2_vector_index<0b10, ZPR32, mnemonic>; +} + +class sme2_luti4_vector_index sz, RegisterOperand vector_ty, + string mnemonic> + : sme2_luti_vector_index { + bits<3> i; + let Inst{16-14} = i; +} + +multiclass sme2_luti4_vector_index { + def _B : sme2_luti4_vector_index<0b00, ZPR8, mnemonic>; + def _H : sme2_luti4_vector_index<0b01, ZPR16, mnemonic>; + def _S : sme2_luti4_vector_index<0b10, ZPR32, mnemonic>; +} + +// SME2 lookup table expand two contiguous registers +class sme2_luti_vector_vg2_index sz, bits<6> opc, RegisterOperand vector_ty, + AsmVectorIndexOpnd index_ty, string mnemonic> + : I<(outs vector_ty:$Zd), + (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i), + mnemonic, "\t$Zd, $ZTt, $Zn$i", + "", []>, Sched<[]> { + bits<5> Zn; + bits<4> Zd; + let Inst{31-19} = 0b1100000010001; + let Inst{18-15} = opc{5-2}; + let Inst{14} = 0b1; + let Inst{13-12} = sz; + let Inst{11-10} = opc{1-0}; + let Inst{9-5} = Zn; + let Inst{4-1} = Zd; + let Inst{0} = 0b0; +} + +class sme2_luti2_vector_vg2_index sz, RegisterOperand vector_ty, + string mnemonic> + : sme2_luti_vector_vg2_index { + bits<3> i; + let Inst{17-15} = i; +} + +multiclass sme2_luti2_vector_vg2_index { + def _B : sme2_luti2_vector_vg2_index<0b00, ZZ_b_mul_r, mnemonic>; + def _H : sme2_luti2_vector_vg2_index<0b01, ZZ_h_mul_r, mnemonic>; + def _S : sme2_luti2_vector_vg2_index<0b10, ZZ_s_mul_r, mnemonic>; +} + +class sme2_luti4_vector_vg2_index sz, RegisterOperand vector_ty, + string mnemonic> + : sme2_luti_vector_vg2_index { + bits<2> i; + let Inst{16-15} = i; +} + +multiclass sme2_luti4_vector_vg2_index { + def _B : sme2_luti4_vector_vg2_index<0b00, ZZ_b_mul_r, mnemonic>; + def _H : sme2_luti4_vector_vg2_index<0b01, ZZ_h_mul_r, mnemonic>; + def _S : sme2_luti4_vector_vg2_index<0b10, ZZ_s_mul_r, mnemonic>; +} + +// SME2 lookup table expand four contiguous registers +class sme2_luti_vector_vg4_index sz, bits<5>opc, RegisterOperand vector_ty, + AsmVectorIndexOpnd index_ty, string mnemonic> + : I<(outs vector_ty:$Zd), + (ins ZTR:$ZTt, ZPRAny:$Zn, index_ty:$i), + mnemonic, "\t$Zd, $ZTt, $Zn$i", + "", []>, Sched<[]> { + bits<5> Zn; + bits<3> Zd; + let Inst{31-19} = 0b1100000010001; + let Inst{18-16} = opc{4-2}; + let Inst{15-14} = 0b10; + let Inst{13-12} = sz; + let Inst{11-10} = opc{1-0}; + let Inst{9-5} = Zn; + let Inst{4-2} = Zd; + let Inst{1-0} = 0b00; +} + +class sme2_luti2_vector_vg4_index sz, RegisterOperand vector_ty, + string mnemonic> + : sme2_luti_vector_vg4_index { + bits<2> i; + let Inst{17-16} = i; +} + +multiclass sme2_luti2_vector_vg4_index { + def _B : sme2_luti2_vector_vg4_index<0b00, ZZZZ_b_mul_r, mnemonic>; + def _H : sme2_luti2_vector_vg4_index<0b01, ZZZZ_h_mul_r, mnemonic>; + def _S : sme2_luti2_vector_vg4_index<0b10, ZZZZ_s_mul_r, mnemonic>; +} + +class sme2_luti4_vector_vg4_index sz, RegisterOperand vector_ty, + string mnemonic> + : sme2_luti_vector_vg4_index { + bits<1> i; + let Inst{16} = i; +} + +multiclass sme2_luti4_vector_vg4_index { + def _H : sme2_luti4_vector_vg4_index<0b01, ZZZZ_h_mul_r, mnemonic>; + def _S : sme2_luti4_vector_vg4_index<0b10, ZZZZ_s_mul_r, mnemonic>; +} + diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 01ef367fef752..f9d6abdd52929 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -2105,19 +2105,18 @@ class sve_fp_3op_p_zds_a sz, bits<2> opc, string asm, ZPRRegOp zprty> let Constraints = "$Zda = $_Zda"; let ElementSize = zprty.ElementSize; + let DestructiveInstType = DestructiveTernaryCommWithRev; } multiclass sve_fp_3op_p_zds_a opc, string asm, string Ps, SDPatternOperator op, string revname, bit isReverseInstr=0> { - let DestructiveInstType = DestructiveTernaryCommWithRev in { def _H : sve_fp_3op_p_zds_a<0b01, opc, asm, ZPR16>, SVEPseudo2Instr, SVEInstr2Rev; def _S : sve_fp_3op_p_zds_a<0b10, opc, asm, ZPR32>, SVEPseudo2Instr, SVEInstr2Rev; def _D : sve_fp_3op_p_zds_a<0b11, opc, asm, ZPR64>, SVEPseudo2Instr, SVEInstr2Rev; - } def : SVE_4_Op_Pat(NAME # _H)>; def : SVE_4_Op_Pat(NAME # _S)>; @@ -2173,7 +2172,7 @@ multiclass sve_fp_3op_p_zds_zx { // SVE Floating Point Multiply-Add - Indexed Group //===----------------------------------------------------------------------===// -class sve_fp_fma_by_indexed_elem sz, bit opc, string asm, +class sve_fp_fma_by_indexed_elem sz, bits<2> opc, string asm, ZPRRegOp zprty1, ZPRRegOp zprty2, Operand itype> : I<(outs zprty1:$Zda), (ins zprty1:$_Zda, zprty1:$Zn, zprty2:$Zm, itype:$iop), @@ -2183,8 +2182,8 @@ class sve_fp_fma_by_indexed_elem sz, bit opc, string asm, let Inst{31-24} = 0b01100100; let Inst{23-22} = sz; let Inst{21} = 0b1; - let Inst{15-11} = 0; - let Inst{10} = opc; + let Inst{15-12} = 0b0000; + let Inst{11-10} = opc; let Inst{9-5} = Zn; let Inst{4-0} = Zda; @@ -2193,7 +2192,18 @@ class sve_fp_fma_by_indexed_elem sz, bit opc, string asm, let ElementSize = ElementSizeNone; } -multiclass sve_fp_fma_by_indexed_elem opc> { + def NAME : sve_fp_fma_by_indexed_elem<{0, ?}, opc, asm, ZPR16, ZPR3b16, + VectorIndexH32b> { + bits<3> Zm; + bits<3> iop; + let Inst{22} = iop{2}; + let Inst{20-19} = iop{1-0}; + let Inst{18-16} = Zm; + } +} + +multiclass sve_fp_fma_by_indexed_elem opc, string asm, SDPatternOperator op> { def _H : sve_fp_fma_by_indexed_elem<{0, ?}, opc, asm, ZPR16, ZPR3b16, VectorIndexH32b> { bits<3> Zm; @@ -2228,8 +2238,8 @@ multiclass sve_fp_fma_by_indexed_elem sz, string asm, ZPRRegOp zprty, - ZPRRegOp zprty2, Operand itype> +class sve_fp_fmul_by_indexed_elem sz, bit o2, string asm, ZPRRegOp zprty, + ZPRRegOp zprty2, Operand itype> : I<(outs zprty:$Zd), (ins zprty:$Zn, zprty2:$Zm, itype:$iop), asm, "\t$Zd, $Zn, $Zm$iop", "", []>, Sched<[]> { bits<5> Zd; @@ -2237,26 +2247,38 @@ class sve_fp_fmul_by_indexed_elem sz, string asm, ZPRRegOp zprty, let Inst{31-24} = 0b01100100; let Inst{23-22} = sz; let Inst{21} = 0b1; - let Inst{15-10} = 0b001000; + let Inst{15-12} = 0b0010; + let Inst{11} = o2; + let Inst{10} = 0b0; let Inst{9-5} = Zn; let Inst{4-0} = Zd; } +multiclass sve2p1_fp_bfmul_by_indexed_elem { + def NAME : sve_fp_fmul_by_indexed_elem<{0, ?}, 0b1, asm, ZPR16, ZPR3b16, VectorIndexH32b> { + bits<3> Zm; + bits<3> iop; + let Inst{22} = iop{2}; + let Inst{20-19} = iop{1-0}; + let Inst{18-16} = Zm; + } +} + multiclass sve_fp_fmul_by_indexed_elem { - def _H : sve_fp_fmul_by_indexed_elem<{0, ?}, asm, ZPR16, ZPR3b16, VectorIndexH32b> { + def _H : sve_fp_fmul_by_indexed_elem<{0, ?}, 0b0, asm, ZPR16, ZPR3b16, VectorIndexH32b> { bits<3> Zm; bits<3> iop; let Inst{22} = iop{2}; let Inst{20-19} = iop{1-0}; let Inst{18-16} = Zm; } - def _S : sve_fp_fmul_by_indexed_elem<0b10, asm, ZPR32, ZPR3b32, VectorIndexS32b> { + def _S : sve_fp_fmul_by_indexed_elem<0b10, 0b0, asm, ZPR32, ZPR3b32, VectorIndexS32b> { bits<3> Zm; bits<2> iop; let Inst{20-19} = iop; let Inst{18-16} = Zm; } - def _D : sve_fp_fmul_by_indexed_elem<0b11, asm, ZPR64, ZPR4b64, VectorIndexD32b> { + def _D : sve_fp_fmul_by_indexed_elem<0b11, 0b0, asm, ZPR64, ZPR4b64, VectorIndexD32b> { bits<4> Zm; bit iop; let Inst{20} = iop; @@ -9199,3 +9221,65 @@ multiclass sve_mem_sst_128b_64_unscaled { def : InstAlias(NAME) Z_q:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, XZR), 1>; } + + +// SVE contiguous load (quadwords, scalar plus immediate) +class sve_mem_128b_cld_si dtype, string mnemonic> + : I<(outs Z_q:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), + mnemonic, "\t$Zt, $Pg/z, [$Rn, $imm4, mul vl]", + "", []>, Sched<[]> { + bits<5> Zt; + bits<5> Rn; + bits<3> Pg; + bits<4> imm4; + let Inst{31-25} = 0b1010010; + let Inst{24-23} = dtype; + let Inst{22-20} = 0b001; + let Inst{19-16} = imm4; + let Inst{15-13} = 0b001; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayLoad = 1; +} + +multiclass sve_mem_128b_cld_si dtype, string mnemonic> { + def NAME : sve_mem_128b_cld_si; + + def : InstAlias(NAME) Z_q:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 1>; + def : InstAlias(NAME) ZPR128:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, 0), 0>; + def : InstAlias(NAME) ZPR128:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s1:$imm4), 0>; +} + + +// SVE contiguous load (quadwords, scalar plus scalar) +class sve_mem_128b_cld_ss dtype, string mnemonic, RegisterOperand gprsh_ty> + : I<(outs Z_q:$Zt), (ins PPR3bAny:$Pg, GPR64sp:$Rn, gprsh_ty:$Rm), + mnemonic, "\t$Zt, $Pg/z, [$Rn, $Rm]", "", + []>, Sched<[]> { + bits<5> Zt; + bits<5> Rn; + bits<3> Pg; + bits<5> Rm; + let Inst{31-25} = 0b1010010; + let Inst{24-23} = dtype; + let Inst{22-21} = 0b00; + let Inst{20-16} = Rm; + let Inst{15-13} = 0b100; + let Inst{12-10} = Pg; + let Inst{9-5} = Rn; + let Inst{4-0} = Zt; + + let mayLoad = 1; +} + +multiclass sve_mem_128b_cld_ss dtype, string mnemonic, RegisterOperand gprsh_ty> { + def NAME : sve_mem_128b_cld_ss; + + def : InstAlias(NAME) ZPR128:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprsh_ty:$Rm), 0>; +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td index aa36045491701..c2b084bc0779d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -153,6 +153,10 @@ def gi_smrd_buffer_sgpr_imm : GIComplexOperandMatcher, GIComplexPatternEquiv; +def gi_vop3_mad_mix_mods : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + // Separate load nodes are defined to glue m0 initialization in // SelectionDAG. The GISel selector can just insert m0 initialization // directly before selecting a glue-less load, so hide this diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 0a68966935105..2538d175dde2a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -522,6 +522,60 @@ bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const { return true; } +bool AMDGPUInstructionSelector::selectG_FMA_FMAD(MachineInstr &I) const { + assert(I.getOpcode() == AMDGPU::G_FMA || I.getOpcode() == AMDGPU::G_FMAD); + + // Try to manually select MAD_MIX/FMA_MIX. + Register Dst = I.getOperand(0).getReg(); + LLT ResultTy = MRI->getType(Dst); + bool IsFMA = I.getOpcode() == AMDGPU::G_FMA; + if (ResultTy != LLT::scalar(32) || + (IsFMA ? !Subtarget->hasFmaMixInsts() : !Subtarget->hasMadMixInsts())) + return false; + + // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand + // using the conversion from f16. + bool MatchedSrc0, MatchedSrc1, MatchedSrc2; + auto [Src0, Src0Mods] = + selectVOP3PMadMixModsImpl(I.getOperand(1), MatchedSrc0); + auto [Src1, Src1Mods] = + selectVOP3PMadMixModsImpl(I.getOperand(2), MatchedSrc1); + auto [Src2, Src2Mods] = + selectVOP3PMadMixModsImpl(I.getOperand(3), MatchedSrc2); + +#ifndef NDEBUG + const SIMachineFunctionInfo *MFI = + I.getMF()->getInfo(); + AMDGPU::SIModeRegisterDefaults Mode = MFI->getMode(); + assert((IsFMA || !Mode.allFP32Denormals()) && + "fmad selected with denormals enabled"); +#endif + + // TODO: We can select this with f32 denormals enabled if all the sources are + // converted from f16 (in which case fmad isn't legal). + if (!MatchedSrc0 && !MatchedSrc1 && !MatchedSrc2) + return false; + + const unsigned OpC = IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32; + MachineInstr *MixInst = + BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpC), Dst) + .addImm(Src0Mods) + .addReg(Src0) + .addImm(Src1Mods) + .addReg(Src1) + .addImm(Src2Mods) + .addReg(Src2) + .addImm(0) + .addImm(0) + .addImm(0); + + if (!constrainSelectedInstRegOperands(*MixInst, TII, TRI, RBI)) + return false; + + I.eraseFromParent(); + return true; +} + bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const { MachineBasicBlock *BB = MI.getParent(); Register DstReg = MI.getOperand(0).getReg(); @@ -3228,6 +3282,11 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) { return selectG_FABS(I); case TargetOpcode::G_EXTRACT: return selectG_EXTRACT(I); + case TargetOpcode::G_FMA: + case TargetOpcode::G_FMAD: + if (selectG_FMA_FMAD(I)) + return true; + return selectImpl(I, *CoverageInfo); case TargetOpcode::G_MERGE_VALUES: case TargetOpcode::G_CONCAT_VECTORS: return selectG_MERGE_VALUES(I); @@ -4679,6 +4738,137 @@ AMDGPUInstructionSelector::selectSMRDBufferSgprImm(MachineOperand &Root) const { [=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedOffset); }}}; } +// Variant of stripBitCast that returns the instruction instead of a +// MachineOperand. +static MachineInstr *stripBitCast(MachineInstr *MI, MachineRegisterInfo &MRI) { + if (MI->getOpcode() == AMDGPU::G_BITCAST) + return getDefIgnoringCopies(MI->getOperand(1).getReg(), MRI); + return MI; +} + +// Figure out if this is really an extract of the high 16-bits of a dword, +// returns nullptr if it isn't. +static MachineInstr *isExtractHiElt(MachineInstr *Inst, + MachineRegisterInfo &MRI) { + Inst = stripBitCast(Inst, MRI); + + if (Inst->getOpcode() != AMDGPU::G_TRUNC) + return nullptr; + + MachineInstr *TruncOp = + getDefIgnoringCopies(Inst->getOperand(1).getReg(), MRI); + TruncOp = stripBitCast(TruncOp, MRI); + + // G_LSHR x, (G_CONSTANT i32 16) + if (TruncOp->getOpcode() == AMDGPU::G_LSHR) { + auto SrlAmount = getIConstantVRegValWithLookThrough( + TruncOp->getOperand(2).getReg(), MRI); + if (SrlAmount && SrlAmount->Value.getZExtValue() == 16) { + MachineInstr *SrlOp = + getDefIgnoringCopies(TruncOp->getOperand(1).getReg(), MRI); + return stripBitCast(SrlOp, MRI); + } + } + + // G_SHUFFLE_VECTOR x, y, shufflemask(1, 1|0) + // 1, 0 swaps the low/high 16 bits. + // 1, 1 sets the high 16 bits to be the same as the low 16. + // in any case, it selects the high elts. + if (TruncOp->getOpcode() == AMDGPU::G_SHUFFLE_VECTOR) { + assert(MRI.getType(TruncOp->getOperand(0).getReg()) == + LLT::fixed_vector(2, 16)); + + ArrayRef Mask = TruncOp->getOperand(3).getShuffleMask(); + assert(Mask.size() == 2); + + if (Mask[0] == 1 && Mask[1] <= 1) { + MachineInstr *LHS = + getDefIgnoringCopies(TruncOp->getOperand(1).getReg(), MRI); + return stripBitCast(LHS, MRI); + } + } + + return nullptr; +} + +std::pair +AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(MachineOperand &Root, + bool &Matched) const { + Matched = false; + + Register Src; + unsigned Mods; + std::tie(Src, Mods) = selectVOP3ModsImpl(Root); + + MachineInstr *MI = getDefIgnoringCopies(Src, *MRI); + if (MI->getOpcode() == AMDGPU::G_FPEXT) { + MachineOperand *MO = &MI->getOperand(1); + Src = MO->getReg(); + MI = getDefIgnoringCopies(Src, *MRI); + + assert(MRI->getType(Src) == LLT::scalar(16)); + + // See through bitcasts. + // FIXME: Would be nice to use stripBitCast here. + if (MI->getOpcode() == AMDGPU::G_BITCAST) { + MO = &MI->getOperand(1); + Src = MO->getReg(); + MI = getDefIgnoringCopies(Src, *MRI); + } + + const auto CheckAbsNeg = [&]() { + // Be careful about folding modifiers if we already have an abs. fneg is + // applied last, so we don't want to apply an earlier fneg. + if ((Mods & SISrcMods::ABS) == 0) { + unsigned ModsTmp; + std::tie(Src, ModsTmp) = selectVOP3ModsImpl(*MO); + MI = getDefIgnoringCopies(Src, *MRI); + + if ((ModsTmp & SISrcMods::NEG) != 0) + Mods ^= SISrcMods::NEG; + + if ((ModsTmp & SISrcMods::ABS) != 0) + Mods |= SISrcMods::ABS; + } + }; + + CheckAbsNeg(); + + // op_sel/op_sel_hi decide the source type and source. + // If the source's op_sel_hi is set, it indicates to do a conversion from + // fp16. If the sources's op_sel is set, it picks the high half of the + // source register. + + Mods |= SISrcMods::OP_SEL_1; + + if (MachineInstr *ExtractHiEltMI = isExtractHiElt(MI, *MRI)) { + Mods |= SISrcMods::OP_SEL_0; + MI = ExtractHiEltMI; + MO = &MI->getOperand(0); + Src = MO->getReg(); + + CheckAbsNeg(); + } + + Matched = true; + } + + return {Src, Mods}; +} + +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectVOP3PMadMixMods(MachineOperand &Root) const { + Register Src; + unsigned Mods; + bool Matched; + std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched); + + return {{ + [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods + }}; +} + void AMDGPUInstructionSelector::renderTruncImm32(MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index e444370fdd070..f48976953fdd5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -97,6 +97,7 @@ class AMDGPUInstructionSelector final : public InstructionSelector { bool selectG_UADDO_USUBO_UADDE_USUBE(MachineInstr &I) const; bool selectG_AMDGPU_MAD_64_32(MachineInstr &I) const; bool selectG_EXTRACT(MachineInstr &I) const; + bool selectG_FMA_FMAD(MachineInstr &I) const; bool selectG_MERGE_VALUES(MachineInstr &I) const; bool selectG_UNMERGE_VALUES(MachineInstr &I) const; bool selectG_BUILD_VECTOR(MachineInstr &I) const; @@ -293,6 +294,10 @@ class AMDGPUInstructionSelector final : public InstructionSelector { ComplexRendererFns selectSMRDBufferImm32(MachineOperand &Root) const; ComplexRendererFns selectSMRDBufferSgprImm(MachineOperand &Root) const; + std::pair selectVOP3PMadMixModsImpl(MachineOperand &Root, + bool &Matched) const; + ComplexRendererFns selectVOP3PMadMixMods(MachineOperand &Root) const; + void renderTruncImm32(MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx = -1) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index dfa3c0af6526c..79dc60c93f403 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -5035,7 +5035,7 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic( // s16 -> <2 x s16>, and <3 x s16> -> <4 x s16>, LLT RoundedTy; - // S32 vector to to cover all data, plus TFE result element. + // S32 vector to cover all data, plus TFE result element. LLT TFETy; // Register type to use for each loaded component. Will be S32 or V2S16. diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp index 1b0d7bb43c80f..9dbcb548f8476 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/ModRef.h" #include "llvm/IR/Module.h" #include "llvm/IR/ValueSymbolTable.h" #include "llvm/Support/CommandLine.h" @@ -992,7 +993,8 @@ FunctionCallee AMDGPULibFunc::getOrInsertFunction(Module *M, } else { AttributeList Attr; LLVMContext &Ctx = M->getContext(); - Attr = Attr.addFnAttribute(Ctx, Attribute::ReadOnly); + Attr = Attr.addFnAttribute( + Ctx, Attribute::getWithMemoryEffects(Ctx, MemoryEffects::readOnly())); Attr = Attr.addFnAttribute(Ctx, Attribute::NoUnwind); C = M->getOrInsertFunction(FuncName, FuncTy, Attr); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index f3310a6ec3684..483c7037acf34 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -857,6 +857,27 @@ bool GCNTTIImpl::useGPUDivergenceAnalysis() const { return !UseLegacyDA; } +bool GCNTTIImpl::isReadRegisterSourceOfDivergence( + const IntrinsicInst *ReadReg) const { + Metadata *MD = + cast(ReadReg->getArgOperand(0))->getMetadata(); + StringRef RegName = + cast(cast(MD)->getOperand(0))->getString(); + + // Special case registers that look like VCC. + MVT VT = MVT::getVT(ReadReg->getType()); + if (VT == MVT::i1) + return true; + + // Special case scalar registers that start with 'v'. + if (RegName.startswith("vcc") || RegName.empty()) + return false; + + // VGPR or AGPR is divergent. There aren't any specially named vector + // registers. + return RegName[0] == 'v' || RegName[0] == 'a'; +} + /// \returns true if the result of the value could potentially be /// different across workitems in a wavefront. bool GCNTTIImpl::isSourceOfDivergence(const Value *V) const { @@ -880,8 +901,12 @@ bool GCNTTIImpl::isSourceOfDivergence(const Value *V) const { if (isa(V) || isa(V)) return true; - if (const IntrinsicInst *Intrinsic = dyn_cast(V)) + if (const IntrinsicInst *Intrinsic = dyn_cast(V)) { + if (Intrinsic->getIntrinsicID() == Intrinsic::read_register) + return isReadRegisterSourceOfDivergence(Intrinsic); + return AMDGPU::isIntrinsicSourceOfDivergence(Intrinsic->getIntrinsicID()); + } // Assume all function calls are a source of divergence. if (const CallInst *CI = dyn_cast(V)) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index 4ee785f83ba24..fb54cfd09da30 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -162,6 +162,8 @@ class GCNTTIImpl final : public BasicTTIImplBase { using BaseT::getVectorInstrCost; InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index); + + bool isReadRegisterSourceOfDivergence(const IntrinsicInst *ReadReg) const; bool isSourceOfDivergence(const Value *V) const; bool isAlwaysUniform(const Value *V) const; diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index c32c56b1c8f32..c1eb61f2f4ac2 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -3507,7 +3507,7 @@ bool AMDGPUAsmParser::validateConstantBusLimitations( return true; // Check special imm operands (used by madmk, etc) - if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { + if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) { ++NumLiterals; LiteralSize = 4; } @@ -7897,7 +7897,7 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); if (IsGFX10Plus) addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); - if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) + if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::tfe)) addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); if (!IsGFX10Plus) @@ -8205,9 +8205,9 @@ void cvtVOP3DstOpSelOnly(MCInst &Inst) { const int Ops[] = { AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2 }; - for (SrcNum = 0; - SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; - ++SrcNum); + for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]); + ++SrcNum) + ; assert(SrcNum > 0); unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); @@ -8268,17 +8268,17 @@ void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) } } - if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); - } + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high)) + addOptionalImmOperand(Inst, Operands, OptionalIdx, + AMDGPUOperand::ImmTyHigh); - if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); - } + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) + addOptionalImmOperand(Inst, Operands, OptionalIdx, + AMDGPUOperand::ImmTyClampSI); - if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); - } + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) + addOptionalImmOperand(Inst, Operands, OptionalIdx, + AMDGPUOperand::ImmTyOModSI); } void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands) @@ -8351,7 +8351,7 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); } - if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers)) { // This instruction has src modifiers for (unsigned E = Operands.size(); I != E; ++I) { AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); @@ -8377,13 +8377,13 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, } } - if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); - } + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) + addOptionalImmOperand(Inst, Operands, OptionalIdx, + AMDGPUOperand::ImmTyClampSI); - if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); - } + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) + addOptionalImmOperand(Inst, Operands, OptionalIdx, + AMDGPUOperand::ImmTyOModSI); // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): // it has src2 register operand that is tied to dst operand @@ -8427,7 +8427,7 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, Inst.addOperand(Inst.getOperand(0)); } - if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in)) { assert(!IsPacked); Inst.addOperand(Inst.getOperand(0)); } @@ -8885,7 +8885,7 @@ void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, unsigned Opc = Inst.getOpcode(); const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); bool HasModifiers = - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; + AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers); // MAC instructions are special because they have 'old' // operand which is not tied to dst (but assumed to be). @@ -8943,17 +8943,17 @@ void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, llvm_unreachable("unhandled operand type"); } } - if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); - } - if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { + + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); - } + if (Desc.TSFlags & SIInstrFlags::VOP3P) cvtVOP3P(Inst, Operands, OptionalIdx); else if (Desc.TSFlags & SIInstrFlags::VOP3) cvtVOP3OpSel(Inst, Operands, OptionalIdx); - else if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) { + else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) { addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); } @@ -8966,9 +8966,10 @@ void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); - if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); - } + + if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) + addOptionalImmOperand(Inst, Operands, OptionalIdx, + AMDGPUOperand::ImmTyDppFi); } } @@ -8977,7 +8978,7 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool I unsigned Opc = Inst.getOpcode(); bool HasModifiers = - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; + AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers); unsigned I = 1; const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { @@ -9038,7 +9039,7 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool I addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); - if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { + if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) { addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); } } @@ -9180,41 +9181,38 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, SkippedVcc = false; } - if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && - Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && - Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { + const unsigned Opc = Inst.getOpcode(); + if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 && + Opc != AMDGPU::V_NOP_sdwa_vi) { // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments switch (BasicInstType) { case SIInstrFlags::VOP1: - if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), - AMDGPU::OpName::clamp) != -1) { + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); - } - if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), - AMDGPU::OpName::omod) != -1) { + + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); - } - if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), - AMDGPU::OpName::dst_sel) != -1) { + + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel)) addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); - } - if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), - AMDGPU::OpName::dst_unused) != -1) { + + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused)) addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); - } + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); break; case SIInstrFlags::VOP2: addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); - if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { + + if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod)) addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); - } + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); @@ -9222,7 +9220,7 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, break; case SIInstrFlags::VOPC: - if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) + if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp)) addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 43a1dfc7f561b..3969e8cf451c5 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -740,7 +740,7 @@ DecodeStatus AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const { DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const { if (STI.getFeatureBits()[AMDGPU::FeatureGFX9] || STI.getFeatureBits()[AMDGPU::FeatureGFX10]) { - if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst) != -1) + if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst)) // VOPC - insert clamp insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp); } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) { @@ -804,7 +804,7 @@ bool AMDGPUDisassembler::isMacDPP(MCInst &MI) const { if (OldIdx != -1 && Desc.getOperandConstraint( OldIdx, MCOI::OperandConstraint::TIED_TO) == -1) { - assert(AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2) != -1); + assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2)); assert(Desc.getOperandConstraint( AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2), MCOI::OperandConstraint::TIED_TO) == DST_IDX); @@ -838,19 +838,19 @@ DecodeStatus AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const { unsigned DescNumOps = MCII->get(Opc).getNumOperands(); if (MI.getNumOperands() < DescNumOps && - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) { + AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) { auto Mods = collectVOPModifiers(MI); insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel), AMDGPU::OpName::op_sel); } else { // Insert dummy unused src modifiers. if (MI.getNumOperands() < DescNumOps && - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) + AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers)) insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::src0_modifiers); if (MI.getNumOperands() < DescNumOps && - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers) != -1) + AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers)) insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::src1_modifiers); } @@ -865,7 +865,7 @@ DecodeStatus AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const { unsigned Opc = MI.getOpcode(); unsigned DescNumOps = MCII->get(Opc).getNumOperands(); if (MI.getNumOperands() < DescNumOps && - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) { + AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) { auto Mods = collectVOPModifiers(MI); insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel), AMDGPU::OpName::op_sel); @@ -900,9 +900,8 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const { assert(VDataIdx != -1); if (BaseOpcode->BVH) { // Add A16 operand for intersect_ray instructions - if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16) > -1) { + if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::a16)) addOperand(MI, MCOperand::createImm(1)); - } return MCDisassembler::Success; } @@ -1020,23 +1019,23 @@ DecodeStatus AMDGPUDisassembler::convertVOP3PDPPInst(MCInst &MI) const { auto Mods = collectVOPModifiers(MI, true); if (MI.getNumOperands() < DescNumOps && - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) + AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in)) insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in); if (MI.getNumOperands() < DescNumOps && - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) + AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel), AMDGPU::OpName::op_sel); if (MI.getNumOperands() < DescNumOps && - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi) != -1) + AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel_hi)) insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSelHi), AMDGPU::OpName::op_sel_hi); if (MI.getNumOperands() < DescNumOps && - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo) != -1) + AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_lo)) insertNamedMCOperand(MI, MCOperand::createImm(Mods.NegLo), AMDGPU::OpName::neg_lo); if (MI.getNumOperands() < DescNumOps && - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi) != -1) + AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi)) insertNamedMCOperand(MI, MCOperand::createImm(Mods.NegHi), AMDGPU::OpName::neg_hi); @@ -1049,16 +1048,16 @@ DecodeStatus AMDGPUDisassembler::convertVOPCDPPInst(MCInst &MI) const { unsigned DescNumOps = MCII->get(Opc).getNumOperands(); if (MI.getNumOperands() < DescNumOps && - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old) != -1) + AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::old)) insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old); if (MI.getNumOperands() < DescNumOps && - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) + AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers)) insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::src0_modifiers); if (MI.getNumOperands() < DescNumOps && - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers) != -1) + AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers)) insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::src1_modifiers); return MCDisassembler::Success; diff --git a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td index a9a3421e81924..3d4f8d52fdc69 100644 --- a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td +++ b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td @@ -310,14 +310,6 @@ multiclass AtomicPat; } -multiclass AtomicIncDecPat { - // FIXME: Add _RTN version. We need per WI scratch location to store the old value - // EXTRACT_SUBREG here is dummy, we know the node has no uses - def : EGOrCaymanPat<(i32 (node_noret i32:$ptr, C)), - (EXTRACT_SUBREG (inst_noret - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (MOV_IMM_I32 -1), sub0), $ptr), sub1)>; -} // CMPSWAP is pattern is special // EXTRACT_SUBREG here is dummy, we know the node has no uses @@ -349,14 +341,6 @@ defm AtomicOrPat : AtomicPat ; defm AtomicXorPat : AtomicPat ; -defm AtomicIncAddPat : AtomicIncDecPat ; -defm AtomicIncSubPat : AtomicIncDecPat ; -defm AtomicDecAddPat : AtomicIncDecPat ; -defm AtomicDecSubPat : AtomicIncDecPat ; // Should be predicated on FeatureFP64 // def FMA_64 : R600_3OP < diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp index de245ef57def7..be4b477547ad3 100644 --- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp +++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp @@ -272,8 +272,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, (0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)))); DPPInst.addImm(Mod0->getImm()); ++NumOperands; - } else if (AMDGPU::getNamedOperandIdx(DPPOp, - AMDGPU::OpName::src0_modifiers) != -1) { + } else if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::src0_modifiers)) { DPPInst.addImm(0); ++NumOperands; } @@ -296,8 +295,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, (0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)))); DPPInst.addImm(Mod1->getImm()); ++NumOperands; - } else if (AMDGPU::getNamedOperandIdx(DPPOp, - AMDGPU::OpName::src1_modifiers) != -1) { + } else if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::src1_modifiers)) { DPPInst.addImm(0); ++NumOperands; } @@ -333,18 +331,16 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, } if (HasVOP3DPP) { auto *ClampOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::clamp); - if (ClampOpr && - AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::clamp) != -1) { + if (ClampOpr && AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::clamp)) { DPPInst.addImm(ClampOpr->getImm()); } auto *VdstInOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst_in); if (VdstInOpr && - AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::vdst_in) != -1) { + AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::vdst_in)) { DPPInst.add(*VdstInOpr); } auto *OmodOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::omod); - if (OmodOpr && - AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::omod) != -1) { + if (OmodOpr && AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::omod)) { DPPInst.addImm(OmodOpr->getImm()); } // Validate OP_SEL has to be set to all 0 and OP_SEL_HI has to be set to @@ -357,7 +353,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, Fail = true; break; } - if (AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::op_sel) != -1) + if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::op_sel)) DPPInst.addImm(OpSel); } if (auto *OpSelHiOpr = @@ -371,17 +367,15 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI, Fail = true; break; } - if (AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::op_sel_hi) != -1) + if (AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::op_sel_hi)) DPPInst.addImm(OpSelHi); } auto *NegOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::neg_lo); - if (NegOpr && - AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::neg_lo) != -1) { + if (NegOpr && AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::neg_lo)) { DPPInst.addImm(NegOpr->getImm()); } auto *NegHiOpr = TII->getNamedOperand(OrigMI, AMDGPU::OpName::neg_hi); - if (NegHiOpr && - AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::neg_hi) != -1) { + if (NegHiOpr && AMDGPU::hasNamedOperand(DPPOp, AMDGPU::OpName::neg_hi)) { DPPInst.addImm(NegHiOpr->getImm()); } } diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 81013db1f0034..cb1d5a6fdf003 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -919,8 +919,7 @@ int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) { if (DstSel->getImm() == AMDGPU::SDWA::DWORD) return false; } else { - if ((AMDGPU::getNamedOperandIdx(MI.getOpcode(), - AMDGPU::OpName::op_sel) == -1) || + if (!AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::op_sel) || !(TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers) ->getImm() & SISrcMods::DST_OP_SEL)) diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp index 2f38f7f65f80b..f9bed9a76c6fb 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp @@ -325,12 +325,14 @@ bool GCNDownwardRPTracker::reset(const MachineInstr &MI, bool GCNDownwardRPTracker::advanceBeforeNext() { assert(MRI && "call reset first"); + if (!LastTrackedMI) + return NextMI == MBBEnd; - NextMI = skipDebugInstructionsForward(NextMI, MBBEnd); - if (NextMI == MBBEnd) - return false; + assert(NextMI == MBBEnd || !NextMI->isDebugInstr()); - SlotIndex SI = LIS.getInstructionIndex(*NextMI).getBaseIndex(); + SlotIndex SI = NextMI == MBBEnd + ? LIS.getInstructionIndex(*LastTrackedMI).getDeadSlot() + : LIS.getInstructionIndex(*NextMI).getBaseIndex(); assert(SI.isValid()); // Remove dead registers or mask bits. @@ -355,7 +357,9 @@ bool GCNDownwardRPTracker::advanceBeforeNext() { MaxPressure = max(MaxPressure, CurPressure); - return true; + LastTrackedMI = nullptr; + + return NextMI == MBBEnd; } void GCNDownwardRPTracker::advanceToNext() { @@ -379,9 +383,9 @@ void GCNDownwardRPTracker::advanceToNext() { } bool GCNDownwardRPTracker::advance() { - // If we have just called reset live set is actual. - if ((NextMI == MBBEnd) || (LastTrackedMI && !advanceBeforeNext())) + if (NextMI == MBBEnd) return false; + advanceBeforeNext(); advanceToNext(); return true; } diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h index b6ad960a8a65f..72e18acc1b8e4 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h @@ -172,8 +172,8 @@ class GCNDownwardRPTracker : public GCNRPTracker { // Returns false if block is empty except debug values. bool reset(const MachineInstr &MI, const LiveRegSet *LiveRegs = nullptr); - // Move to the state right before the next MI. Returns false if reached - // end of the block. + // Move to the state right before the next MI or after the end of MBB. + // Returns false if reached end of the block. bool advanceBeforeNext(); // Move to the state at the MI, advanceBeforeNext has to be called first. diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index 1577c1761aadd..25fcf422bfbe7 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -538,7 +538,6 @@ void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx, RPTracker.advanceToNext(); RPTracker.advance(MBB->end()); } - RPTracker.reset(*OnlySucc->begin(), &RPTracker.getLiveRegs()); RPTracker.advanceBeforeNext(); MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs(); } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp index 1ba05e765bc31..aa55ba5c1e291 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp @@ -297,12 +297,12 @@ uint64_t SIMCCodeEmitter::getImplicitOpSelHiEncoding(int Opcode) const { using namespace AMDGPU::VOP3PEncoding; using namespace AMDGPU::OpName; - if (AMDGPU::getNamedOperandIdx(Opcode, op_sel_hi) != -1) { - if (AMDGPU::getNamedOperandIdx(Opcode, src2) != -1) + if (AMDGPU::hasNamedOperand(Opcode, op_sel_hi)) { + if (AMDGPU::hasNamedOperand(Opcode, src2)) return 0; - if (AMDGPU::getNamedOperandIdx(Opcode, src1) != -1) + if (AMDGPU::hasNamedOperand(Opcode, src1)) return OP_SEL_HI_2; - if (AMDGPU::getNamedOperandIdx(Opcode, src0) != -1) + if (AMDGPU::hasNamedOperand(Opcode, src0)) return OP_SEL_HI_1 | OP_SEL_HI_2; } return OP_SEL_HI_0 | OP_SEL_HI_1 | OP_SEL_HI_2; @@ -369,9 +369,7 @@ void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, return; // Do not print literals from SISrc Operands for insts with mandatory literals - int ImmLitIdx = - AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm); - if (ImmLitIdx != -1) + if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::imm)) return; // Check for additional literals diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 4897f481bf3c9..11108f6a999d4 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -62,17 +62,7 @@ struct FoldCandidate { bool isGlobal() const { return Kind == MachineOperand::MO_GlobalAddress; } - bool isCommuted() const { - return Commuted; - } - - bool needsShrink() const { - return ShrinkOpcode != -1; - } - - int getShrinkOpcode() const { - return ShrinkOpcode; - } + bool needsShrink() const { return ShrinkOpcode != -1; } }; class SIFoldOperands : public MachineFunctionPass { @@ -111,6 +101,8 @@ class SIFoldOperands : public MachineFunctionPass { bool tryFoldCndMask(MachineInstr &MI) const; bool tryFoldZeroHighBits(MachineInstr &MI) const; bool foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const; + bool tryFoldFoldableCopy(MachineInstr &MI, + MachineOperand *&CurrentKnownM0Val) const; const MachineOperand *isClamp(const MachineInstr &MI) const; bool tryFoldClamp(MachineInstr &MI); @@ -173,19 +165,17 @@ bool SIFoldOperands::frameIndexMayFold(const MachineInstr &UseMI, int OpNo, if (!OpToFold.isFI()) return false; + const unsigned Opc = UseMI.getOpcode(); if (TII->isMUBUF(UseMI)) - return OpNo == AMDGPU::getNamedOperandIdx(UseMI.getOpcode(), - AMDGPU::OpName::vaddr); + return OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr); if (!TII->isFLATScratch(UseMI)) return false; - int SIdx = AMDGPU::getNamedOperandIdx(UseMI.getOpcode(), - AMDGPU::OpName::saddr); + int SIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr); if (OpNo == SIdx) return true; - int VIdx = AMDGPU::getNamedOperandIdx(UseMI.getOpcode(), - AMDGPU::OpName::vaddr); + int VIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr); return OpNo == VIdx && SIdx == -1; } @@ -198,11 +188,11 @@ bool SIFoldOperands::updateOperand(FoldCandidate &Fold) const { MachineOperand &Old = MI->getOperand(Fold.UseOpNo); assert(Old.isReg()); + + const uint64_t TSFlags = MI->getDesc().TSFlags; if (Fold.isImm()) { - if (MI->getDesc().TSFlags & SIInstrFlags::IsPacked && - !(MI->getDesc().TSFlags & SIInstrFlags::IsMAI) && - (!ST->hasDOTOpSelHazard() || - !(MI->getDesc().TSFlags & SIInstrFlags::IsDOT)) && + if (TSFlags & SIInstrFlags::IsPacked && !(TSFlags & SIInstrFlags::IsMAI) && + (!ST->hasDOTOpSelHazard() || !(TSFlags & SIInstrFlags::IsDOT)) && AMDGPU::isFoldableLiteralV216(Fold.ImmToFold, ST->hasInv2PiInlineImm())) { // Set op_sel/op_sel_hi on this operand or bail out if op_sel is @@ -256,7 +246,7 @@ bool SIFoldOperands::updateOperand(FoldCandidate &Fold) const { return false; } - int Op32 = Fold.getShrinkOpcode(); + int Op32 = Fold.ShrinkOpcode; MachineOperand &Dst0 = MI->getOperand(0); MachineOperand &Dst1 = MI->getOperand(1); assert(Dst0.isDef() && Dst1.isDef()); @@ -285,7 +275,7 @@ bool SIFoldOperands::updateOperand(FoldCandidate &Fold) const { MI->removeOperand(I); MI->setDesc(TII->get(AMDGPU::IMPLICIT_DEF)); - if (Fold.isCommuted()) + if (Fold.Commuted) TII->commuteInstruction(*Inst32, false); return true; } @@ -323,11 +313,7 @@ bool SIFoldOperands::updateOperand(FoldCandidate &Fold) const { static bool isUseMIInFoldList(ArrayRef FoldList, const MachineInstr *MI) { - for (auto Candidate : FoldList) { - if (Candidate.UseMI == MI) - return true; - } - return false; + return any_of(FoldList, [&](const auto &C) { return C.UseMI == MI; }); } static void appendFoldCandidate(SmallVectorImpl &FoldList, @@ -486,7 +472,6 @@ bool SIFoldOperands::isUseSafeToFold(const MachineInstr &MI, } return true; - //return !MI.hasRegisterImplicitUseOperand(UseMO.getReg()); } // Find a def of the UseReg, check if it is a reg_sequence and find initializers @@ -606,10 +591,9 @@ void SIFoldOperands::foldOperand( return; // FIXME: Fold operands with subregs. - if (UseOp.isReg() && OpToFold.isReg()) { - if (UseOp.isImplicit() || UseOp.getSubReg() != AMDGPU::NoSubRegister) - return; - } + if (UseOp.isReg() && OpToFold.isReg() && + (UseOp.isImplicit() || UseOp.getSubReg() != AMDGPU::NoSubRegister)) + return; // Special case for REG_SEQUENCE: We can't fold literals into // REG_SEQUENCE instructions, so we have to fold them into the @@ -659,12 +643,11 @@ void SIFoldOperands::foldOperand( // safe to fold the addressing mode, even pre-GFX9. UseMI->getOperand(UseOpIdx).ChangeToFrameIndex(OpToFold.getIndex()); + const unsigned Opc = UseMI->getOpcode(); if (TII->isFLATScratch(*UseMI) && - AMDGPU::getNamedOperandIdx(UseMI->getOpcode(), - AMDGPU::OpName::vaddr) != -1 && - AMDGPU::getNamedOperandIdx(UseMI->getOpcode(), - AMDGPU::OpName::saddr) == -1) { - unsigned NewOpc = AMDGPU::getFlatScratchInstSSfromSV(UseMI->getOpcode()); + AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vaddr) && + !AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::saddr)) { + unsigned NewOpc = AMDGPU::getFlatScratchInstSSfromSV(Opc); UseMI->setDesc(TII->get(NewOpc)); } @@ -700,8 +683,10 @@ void SIFoldOperands::foldOperand( Use.getParent()->getOperandNo(&Use), &UseMI->getOperand(1)); } + for (auto &F : CopyUses) { - foldOperand(*F.OpToFold, F.UseMI, F.UseOpNo, FoldList, CopiesToReplace); + foldOperand(*F.OpToFold, F.UseMI, F.UseOpNo, FoldList, + CopiesToReplace); } } @@ -826,15 +811,15 @@ void SIFoldOperands::foldOperand( if (Size != 4) return; - if (TRI->isAGPR(*MRI, UseMI->getOperand(0).getReg()) && - TRI->isVGPR(*MRI, UseMI->getOperand(1).getReg())) + + Register Reg0 = UseMI->getOperand(0).getReg(); + Register Reg1 = UseMI->getOperand(1).getReg(); + if (TRI->isAGPR(*MRI, Reg0) && TRI->isVGPR(*MRI, Reg1)) UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64)); - else if (TRI->isVGPR(*MRI, UseMI->getOperand(0).getReg()) && - TRI->isAGPR(*MRI, UseMI->getOperand(1).getReg())) + else if (TRI->isVGPR(*MRI, Reg0) && TRI->isAGPR(*MRI, Reg1)) UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64)); - else if (ST->hasGFX90AInsts() && - TRI->isAGPR(*MRI, UseMI->getOperand(0).getReg()) && - TRI->isAGPR(*MRI, UseMI->getOperand(1).getReg())) + else if (ST->hasGFX90AInsts() && TRI->isAGPR(*MRI, Reg0) && + TRI->isAGPR(*MRI, Reg1)) UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_MOV_B32)); return; } @@ -1018,10 +1003,12 @@ static unsigned getMovOpc(bool IsScalar) { return IsScalar ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; } -/// Remove any leftover implicit operands from mutating the instruction. e.g. -/// if we replace an s_and_b32 with a copy, we don't need the implicit scc def -/// anymore. -static void stripExtraCopyOperands(MachineInstr &MI) { +static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) { + MI.setDesc(NewDesc); + + // Remove any leftover implicit operands from mutating the instruction. e.g. + // if we replace an s_and_b32 with a copy, we don't need the implicit scc def + // anymore. const MCInstrDesc &Desc = MI.getDesc(); unsigned NumOps = Desc.getNumOperands() + Desc.getNumImplicitUses() + @@ -1031,24 +1018,18 @@ static void stripExtraCopyOperands(MachineInstr &MI) { MI.removeOperand(I); } -static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) { - MI.setDesc(NewDesc); - stripExtraCopyOperands(MI); -} - MachineOperand * SIFoldOperands::getImmOrMaterializedImm(MachineOperand &Op) const { - if (Op.isReg()) { - // If this has a subregister, it obviously is a register source. - if (Op.getSubReg() != AMDGPU::NoSubRegister || !Op.getReg().isVirtual()) - return &Op; - - MachineInstr *Def = MRI->getVRegDef(Op.getReg()); - if (Def && Def->isMoveImmediate()) { - MachineOperand &ImmSrc = Def->getOperand(1); - if (ImmSrc.isImm()) - return &ImmSrc; - } + // If this has a subregister, it obviously is a register source. + if (!Op.isReg() || Op.getSubReg() != AMDGPU::NoSubRegister || + !Op.getReg().isVirtual()) + return &Op; + + MachineInstr *Def = MRI->getVRegDef(Op.getReg()); + if (Def && Def->isMoveImmediate()) { + MachineOperand &ImmSrc = Def->getOperand(1); + if (ImmSrc.isImm()) + return &ImmSrc; } return &Op; @@ -1125,9 +1106,8 @@ bool SIFoldOperands::tryConstantFoldOp(MachineInstr *MI) const { return true; } - if (MI->getOpcode() == AMDGPU::V_AND_B32_e64 || - MI->getOpcode() == AMDGPU::V_AND_B32_e32 || - MI->getOpcode() == AMDGPU::S_AND_B32) { + if (Opc == AMDGPU::V_AND_B32_e64 || Opc == AMDGPU::V_AND_B32_e32 || + Opc == AMDGPU::S_AND_B32) { if (Src1Val == 0) { // y = and x, 0 => y = v_mov_b32 0 MI->removeOperand(Src0Idx); @@ -1136,16 +1116,14 @@ bool SIFoldOperands::tryConstantFoldOp(MachineInstr *MI) const { // y = and x, -1 => y = copy x MI->removeOperand(Src1Idx); mutateCopyOp(*MI, TII->get(AMDGPU::COPY)); - stripExtraCopyOperands(*MI); } else return false; return true; } - if (MI->getOpcode() == AMDGPU::V_XOR_B32_e64 || - MI->getOpcode() == AMDGPU::V_XOR_B32_e32 || - MI->getOpcode() == AMDGPU::S_XOR_B32) { + if (Opc == AMDGPU::V_XOR_B32_e64 || Opc == AMDGPU::V_XOR_B32_e32 || + Opc == AMDGPU::S_XOR_B32) { if (Src1Val == 0) { // y = xor x, 0 => y = copy x MI->removeOperand(Src1Idx); @@ -1208,14 +1186,13 @@ bool SIFoldOperands::tryFoldZeroHighBits(MachineInstr &MI) const { Register Src1 = MI.getOperand(2).getReg(); MachineInstr *SrcDef = MRI->getVRegDef(Src1); - if (ST->zeroesHigh16BitsOfDest(SrcDef->getOpcode())) { - Register Dst = MI.getOperand(0).getReg(); - MRI->replaceRegWith(Dst, SrcDef->getOperand(0).getReg()); - MI.eraseFromParent(); - return true; - } + if (!ST->zeroesHigh16BitsOfDest(SrcDef->getOpcode())) + return false; - return false; + Register Dst = MI.getOperand(0).getReg(); + MRI->replaceRegWith(Dst, SrcDef->getOperand(0).getReg()); + MI.eraseFromParent(); + return true; } bool SIFoldOperands::foldInstOperand(MachineInstr &MI, @@ -1284,7 +1261,7 @@ bool SIFoldOperands::foldInstOperand(MachineInstr &MI, LLVM_DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " << static_cast(Fold.UseOpNo) << " of " << *Fold.UseMI); - } else if (Fold.isCommuted()) { + } else if (Fold.Commuted) { // Restoring instruction's original operand order if fold has failed. TII->commuteInstruction(*Fold.UseMI, false); } @@ -1292,6 +1269,73 @@ bool SIFoldOperands::foldInstOperand(MachineInstr &MI, return true; } +bool SIFoldOperands::tryFoldFoldableCopy( + MachineInstr &MI, MachineOperand *&CurrentKnownM0Val) const { + // Specially track simple redefs of m0 to the same value in a block, so we + // can erase the later ones. + if (MI.getOperand(0).getReg() == AMDGPU::M0) { + MachineOperand &NewM0Val = MI.getOperand(1); + if (CurrentKnownM0Val && CurrentKnownM0Val->isIdenticalTo(NewM0Val)) { + MI.eraseFromParent(); + return true; + } + + // We aren't tracking other physical registers + CurrentKnownM0Val = (NewM0Val.isReg() && NewM0Val.getReg().isPhysical()) + ? nullptr + : &NewM0Val; + return false; + } + + MachineOperand &OpToFold = MI.getOperand(1); + bool FoldingImm = OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal(); + + // FIXME: We could also be folding things like TargetIndexes. + if (!FoldingImm && !OpToFold.isReg()) + return false; + + if (OpToFold.isReg() && !OpToFold.getReg().isVirtual()) + return false; + + // Prevent folding operands backwards in the function. For example, + // the COPY opcode must not be replaced by 1 in this example: + // + // %3 = COPY %vgpr0; VGPR_32:%3 + // ... + // %vgpr0 = V_MOV_B32_e32 1, implicit %exec + if (!MI.getOperand(0).getReg().isVirtual()) + return false; + + bool Changed = foldInstOperand(MI, OpToFold); + + // If we managed to fold all uses of this copy then we might as well + // delete it now. + // The only reason we need to follow chains of copies here is that + // tryFoldRegSequence looks forward through copies before folding a + // REG_SEQUENCE into its eventual users. + auto *InstToErase = &MI; + while (MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) { + auto &SrcOp = InstToErase->getOperand(1); + auto SrcReg = SrcOp.isReg() ? SrcOp.getReg() : Register(); + InstToErase->eraseFromParent(); + Changed = true; + InstToErase = nullptr; + if (!SrcReg || SrcReg.isPhysical()) + break; + InstToErase = MRI->getVRegDef(SrcReg); + if (!InstToErase || !TII->isFoldableCopy(*InstToErase)) + break; + } + + if (InstToErase && InstToErase->isRegSequence() && + MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) { + InstToErase->eraseFromParent(); + Changed = true; + } + + return Changed; +} + // Clamp patterns are canonically selected to v_max_* instructions, so only // handle them. const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const { @@ -1666,9 +1710,9 @@ bool SIFoldOperands::tryFoldLoad(MachineInstr &MI) { SmallVector Users; SmallVector MoveRegs; - for (const MachineInstr &I : MRI->use_nodbg_instructions(DefReg)) { + for (const MachineInstr &I : MRI->use_nodbg_instructions(DefReg)) Users.push_back(&I); - } + if (Users.empty()) return false; @@ -1681,9 +1725,8 @@ bool SIFoldOperands::tryFoldLoad(MachineInstr &MI) { if (TRI->isAGPR(*MRI, DstReg)) continue; MoveRegs.push_back(DstReg); - for (const MachineInstr &U : MRI->use_nodbg_instructions(DstReg)) { + for (const MachineInstr &U : MRI->use_nodbg_instructions(DstReg)) Users.push_back(&U); - } } const TargetRegisterClass *RC = MRI->getRegClass(DefReg); @@ -1746,82 +1789,22 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { continue; } - if (!TII->isFoldableCopy(MI)) { - // Saw an unknown clobber of m0, so we no longer know what it is. - if (CurrentKnownM0Val && MI.modifiesRegister(AMDGPU::M0, TRI)) - CurrentKnownM0Val = nullptr; - - // TODO: Omod might be OK if there is NSZ only on the source - // instruction, and not the omod multiply. - if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) || - !tryFoldOMod(MI)) - Changed |= tryFoldClamp(MI); - + if (TII->isFoldableCopy(MI)) { + Changed |= tryFoldFoldableCopy(MI, CurrentKnownM0Val); continue; } - // Specially track simple redefs of m0 to the same value in a block, so we - // can erase the later ones. - if (MI.getOperand(0).getReg() == AMDGPU::M0) { - MachineOperand &NewM0Val = MI.getOperand(1); - if (CurrentKnownM0Val && CurrentKnownM0Val->isIdenticalTo(NewM0Val)) { - MI.eraseFromParent(); - Changed = true; - continue; - } + // Saw an unknown clobber of m0, so we no longer know what it is. + if (CurrentKnownM0Val && MI.modifiesRegister(AMDGPU::M0, TRI)) + CurrentKnownM0Val = nullptr; - // We aren't tracking other physical registers - CurrentKnownM0Val = (NewM0Val.isReg() && NewM0Val.getReg().isPhysical()) ? - nullptr : &NewM0Val; - continue; - } - - MachineOperand &OpToFold = MI.getOperand(1); - bool FoldingImm = - OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal(); - - // FIXME: We could also be folding things like TargetIndexes. - if (!FoldingImm && !OpToFold.isReg()) - continue; - - if (OpToFold.isReg() && !OpToFold.getReg().isVirtual()) - continue; - - // Prevent folding operands backwards in the function. For example, - // the COPY opcode must not be replaced by 1 in this example: - // - // %3 = COPY %vgpr0; VGPR_32:%3 - // ... - // %vgpr0 = V_MOV_B32_e32 1, implicit %exec - if (!MI.getOperand(0).getReg().isVirtual()) - continue; - - Changed |= foldInstOperand(MI, OpToFold); - - // If we managed to fold all uses of this copy then we might as well - // delete it now. - // The only reason we need to follow chains of copies here is that - // tryFoldRegSequence looks forward through copies before folding a - // REG_SEQUENCE into its eventual users. - auto *InstToErase = &MI; - while (MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) { - auto &SrcOp = InstToErase->getOperand(1); - auto SrcReg = SrcOp.isReg() ? SrcOp.getReg() : Register(); - InstToErase->eraseFromParent(); - Changed = true; - InstToErase = nullptr; - if (!SrcReg || SrcReg.isPhysical()) - break; - InstToErase = MRI->getVRegDef(SrcReg); - if (!InstToErase || !TII->isFoldableCopy(*InstToErase)) - break; - } - if (InstToErase && InstToErase->isRegSequence() && - MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) { - InstToErase->eraseFromParent(); - Changed = true; - } + // TODO: Omod might be OK if there is NSZ only on the source + // instruction, and not the omod multiply. + if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) || + !tryFoldOMod(MI)) + Changed |= tryFoldClamp(MI); } } + return Changed; } diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index fe5090f9c01c2..c785cfdaaef82 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -1188,6 +1188,7 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized( // correct register value. But not sure the register value alone is for (MachineInstr &MI : MBB) { if (MI.isDebugValue() && MI.getOperand(0).isFI() && + !MFI.isFixedObjectIndex(MI.getOperand(0).getIndex()) && SpillFIs[MI.getOperand(0).getIndex()]) { MI.getOperand(0).ChangeToRegister(Register(), false /*isDef*/); } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 24ef9fdb7b8cf..b6243c986bd18 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -30,9 +30,11 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsR600.h" +#include "llvm/IR/ModRef.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/KnownBits.h" @@ -958,7 +960,8 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, AMDGPU::lookupRsrcIntrinsic(IntrID)) { AttributeList Attr = Intrinsic::getAttributes(CI.getContext(), (Intrinsic::ID)IntrID); - if (Attr.hasFnAttr(Attribute::ReadNone)) + MemoryEffects ME = Attr.getMemoryEffects(); + if (ME.doesNotAccessMemory()) return false; SIMachineFunctionInfo *MFI = MF.getInfo(); @@ -974,7 +977,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, } Info.flags |= MachineMemOperand::MODereferenceable; - if (Attr.hasFnAttr(Attribute::ReadOnly)) { + if (ME.onlyReadsMemory()) { unsigned DMaskLanes = 4; if (RsrcIntr->IsImage) { @@ -998,7 +1001,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, // FIXME: What does alignment mean for an image? Info.opc = ISD::INTRINSIC_W_CHAIN; Info.flags |= MachineMemOperand::MOLoad; - } else if (Attr.hasFnAttr(Attribute::WriteOnly)) { + } else if (ME.onlyWritesMemory()) { Info.opc = ISD::INTRINSIC_VOID; Type *DataTy = CI.getArgOperand(0)->getType(); @@ -9988,8 +9991,11 @@ SDValue SITargetLowering::performRcpCombine(SDNode *N, EVT VT = N->getValueType(0); SDValue N0 = N->getOperand(0); - if (N0.isUndef()) - return N0; + if (N0.isUndef()) { + return DCI.DAG.getConstantFP( + APFloat::getQNaN(SelectionDAG::EVTToAPFloatSemantics(VT)), SDLoc(N), + VT); + } if (VT == MVT::f32 && (N0.getOpcode() == ISD::UINT_TO_FP || N0.getOpcode() == ISD::SINT_TO_FP)) { @@ -10375,7 +10381,7 @@ SDValue SITargetLowering::performFCanonicalizeCombine( // If it's free to do so, push canonicalizes further up the source, which may // find a canonical source. // - // TODO: More opcodes. Note this is unsafe for the the _ieee minnum/maxnum for + // TODO: More opcodes. Note this is unsafe for the _ieee minnum/maxnum for // sNaNs. if (SrcOpc == ISD::FMINNUM || SrcOpc == ISD::FMAXNUM) { auto *CRHS = dyn_cast(N0.getOperand(1)); @@ -11865,7 +11871,7 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, if (TII->isMIMG(Opcode) && !TII->get(Opcode).mayStore() && !TII->isGather4(Opcode) && - AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::dmask) != -1) { + AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::dmask)) { return adjustWritemask(Node, DAG); } @@ -12864,6 +12870,19 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { if (Ty->isDoubleTy() && Subtarget->hasGFX90AInsts()) return ReportUnsafeHWInst(AtomicExpansionKind::None); + // If it is in flat address space, and the type is float, we will try to + // expand it, if the target supports global and lds atomic fadd. The + // reason we need that is, in the expansion, we emit the check of address + // space. If it is in global address space, we emit the global atomic + // fadd; if it is in shared address space, we emit the LDS atomic fadd. + if (AS == AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy() && + Subtarget->hasLDSFPAtomicAdd()) { + if (RMW->use_empty() && Subtarget->hasAtomicFaddNoRtnInsts()) + return AtomicExpansionKind::Expand; + if (!RMW->use_empty() && Subtarget->hasAtomicFaddRtnInsts()) + return AtomicExpansionKind::Expand; + } + return AtomicExpansionKind::CmpXChg; } @@ -13064,3 +13083,140 @@ bool SITargetLowering::checkForPhysRegDependency( } return false; } + +void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const { + assert(Subtarget->hasAtomicFaddInsts() && + "target should have atomic fadd instructions"); + assert(AI->getType()->isFloatTy() && + AI->getPointerAddressSpace() == AMDGPUAS::FLAT_ADDRESS && + "generic atomicrmw expansion only supports FP32 operand in flat " + "address space"); + assert(AI->getOperation() == AtomicRMWInst::FAdd && + "only fadd is supported for now"); + + // Given: atomicrmw fadd float* %addr, float %val ordering + // + // With this expansion we produce the following code: + // [...] + // %int8ptr = bitcast float* %addr to i8* + // br label %atomicrmw.check.shared + // + // atomicrmw.check.shared: + // %is.shared = call i1 @llvm.amdgcn.is.shared(i8* %int8ptr) + // br i1 %is.shared, label %atomicrmw.shared, label %atomicrmw.check.private + // + // atomicrmw.shared: + // %cast.shared = addrspacecast float* %addr to float addrspace(3)* + // %loaded.shared = atomicrmw fadd float addrspace(3)* %cast.shared, + // float %val ordering + // br label %atomicrmw.phi + // + // atomicrmw.check.private: + // %is.private = call i1 @llvm.amdgcn.is.private(i8* %int8ptr) + // br i1 %is.private, label %atomicrmw.private, label %atomicrmw.global + // + // atomicrmw.private: + // %cast.private = addrspacecast float* %addr to float addrspace(5)* + // %loaded.private = load float, float addrspace(5)* %cast.private + // %val.new = fadd float %loaded.private, %val + // store float %val.new, float addrspace(5)* %cast.private + // br label %atomicrmw.phi + // + // atomicrmw.global: + // %cast.global = addrspacecast float* %addr to float addrspace(1)* + // %loaded.global = atomicrmw fadd float addrspace(1)* %cast.global, + // float %val ordering + // br label %atomicrmw.phi + // + // atomicrmw.phi: + // %loaded.phi = phi float [ %loaded.shared, %atomicrmw.shared ], + // [ %loaded.private, %atomicrmw.private ], + // [ %loaded.global, %atomicrmw.global ] + // br label %atomicrmw.end + // + // atomicrmw.end: + // [...] + + IRBuilder<> Builder(AI); + LLVMContext &Ctx = Builder.getContext(); + + BasicBlock *BB = Builder.GetInsertBlock(); + Function *F = BB->getParent(); + BasicBlock *ExitBB = + BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end"); + BasicBlock *CheckSharedBB = + BasicBlock::Create(Ctx, "atomicrmw.check.shared", F, ExitBB); + BasicBlock *SharedBB = BasicBlock::Create(Ctx, "atomicrmw.shared", F, ExitBB); + BasicBlock *CheckPrivateBB = + BasicBlock::Create(Ctx, "atomicrmw.check.private", F, ExitBB); + BasicBlock *PrivateBB = + BasicBlock::Create(Ctx, "atomicrmw.private", F, ExitBB); + BasicBlock *GlobalBB = BasicBlock::Create(Ctx, "atomicrmw.global", F, ExitBB); + BasicBlock *PhiBB = BasicBlock::Create(Ctx, "atomicrmw.phi", F, ExitBB); + + Value *Val = AI->getValOperand(); + Type *ValTy = Val->getType(); + Value *Addr = AI->getPointerOperand(); + PointerType *PtrTy = cast(Addr->getType()); + + auto CreateNewAtomicRMW = [AI](IRBuilder<> &Builder, Value *Addr, + Value *Val) -> Value * { + AtomicRMWInst *OldVal = + Builder.CreateAtomicRMW(AI->getOperation(), Addr, Val, AI->getAlign(), + AI->getOrdering(), AI->getSyncScopeID()); + SmallVector> MDs; + AI->getAllMetadata(MDs); + for (auto &P : MDs) + OldVal->setMetadata(P.first, P.second); + return OldVal; + }; + + std::prev(BB->end())->eraseFromParent(); + Builder.SetInsertPoint(BB); + Value *Int8Ptr = Builder.CreateBitCast(Addr, Builder.getInt8PtrTy()); + Builder.CreateBr(CheckSharedBB); + + Builder.SetInsertPoint(CheckSharedBB); + CallInst *IsShared = Builder.CreateIntrinsic(Intrinsic::amdgcn_is_shared, {}, + {Int8Ptr}, nullptr, "is.shared"); + Builder.CreateCondBr(IsShared, SharedBB, CheckPrivateBB); + + Builder.SetInsertPoint(SharedBB); + Value *CastToLocal = Builder.CreateAddrSpaceCast( + Addr, + PointerType::getWithSamePointeeType(PtrTy, AMDGPUAS::LOCAL_ADDRESS)); + Value *LoadedShared = CreateNewAtomicRMW(Builder, CastToLocal, Val); + Builder.CreateBr(PhiBB); + + Builder.SetInsertPoint(CheckPrivateBB); + CallInst *IsPrivate = Builder.CreateIntrinsic( + Intrinsic::amdgcn_is_private, {}, {Int8Ptr}, nullptr, "is.private"); + Builder.CreateCondBr(IsPrivate, PrivateBB, GlobalBB); + + Builder.SetInsertPoint(PrivateBB); + Value *CastToPrivate = Builder.CreateAddrSpaceCast( + Addr, + PointerType::getWithSamePointeeType(PtrTy, AMDGPUAS::PRIVATE_ADDRESS)); + Value *LoadedPrivate = + Builder.CreateLoad(ValTy, CastToPrivate, "loaded.private"); + Value *NewVal = Builder.CreateFAdd(LoadedPrivate, Val, "val.new"); + Builder.CreateStore(NewVal, CastToPrivate); + Builder.CreateBr(PhiBB); + + Builder.SetInsertPoint(GlobalBB); + Value *CastToGlobal = Builder.CreateAddrSpaceCast( + Addr, + PointerType::getWithSamePointeeType(PtrTy, AMDGPUAS::GLOBAL_ADDRESS)); + Value *LoadedGlobal = CreateNewAtomicRMW(Builder, CastToGlobal, Val); + Builder.CreateBr(PhiBB); + + Builder.SetInsertPoint(PhiBB); + PHINode *Loaded = Builder.CreatePHI(ValTy, 3, "loaded.phi"); + Loaded->addIncoming(LoadedShared, SharedBB); + Loaded->addIncoming(LoadedPrivate, PrivateBB); + Loaded->addIncoming(LoadedGlobal, GlobalBB); + Builder.CreateBr(ExitBB); + + AI->replaceAllUsesWith(Loaded); + AI->eraseFromParent(); +} diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 9e8ff565fe6bd..b47730f5d3b27 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -493,6 +493,7 @@ class SITargetLowering final : public AMDGPUTargetLowering { AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const override; AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override; + void emitExpandAtomicRMW(AtomicRMWInst *AI) const override; const TargetRegisterClass *getRegClassFor(MVT VT, bool isDivergent) const override; diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 5e0ae4c2581f6..a12fb3abdfedc 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -216,7 +216,7 @@ class WaitcntBrackets { } // Mapping from event to counter. - InstCounterType eventCounter(WaitEventType E) { + InstCounterType eventCounter(WaitEventType E) const { for (auto T : inst_counter_types()) { if (WaitEventMaskForInst[T] & (1 << E)) return T; @@ -471,7 +471,7 @@ class SIInsertWaitcnts : public MachineFunctionPass { bool applyPreexistingWaitcnt(WaitcntBrackets &ScoreBrackets, MachineInstr &OldWaitcntInstr, AMDGPU::Waitcnt &Wait, - MachineBasicBlock::instr_iterator It); + MachineBasicBlock::instr_iterator It) const; }; } // end anonymous namespace @@ -564,15 +564,13 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo *TII, } if (Inst.mayStore()) { - if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), - AMDGPU::OpName::data0) != -1) { + if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::data0)) { setExpScore( &Inst, TII, TRI, MRI, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0), CurrScore); } - if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), - AMDGPU::OpName::data1) != -1) { + if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::data1)) { setExpScore(&Inst, TII, TRI, MRI, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data1), @@ -850,7 +848,7 @@ FunctionPass *llvm::createSIInsertWaitcntsPass() { /// preexisting waitcnt are required for correctness. bool SIInsertWaitcnts::applyPreexistingWaitcnt( WaitcntBrackets &ScoreBrackets, MachineInstr &OldWaitcntInstr, - AMDGPU::Waitcnt &Wait, MachineBasicBlock::instr_iterator It) { + AMDGPU::Waitcnt &Wait, MachineBasicBlock::instr_iterator It) const { bool Modified = false; MachineInstr *WaitcntInstr = nullptr; MachineInstr *WaitcntVsCntInstr = nullptr; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 27e0d56c76ec4..234e6c3e796aa 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -213,8 +213,8 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, if (isSMRD(Opc0) && isSMRD(Opc1)) { // Skip time and cache invalidation instructions. - if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::sbase) == -1 || - AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::sbase) == -1) + if (!AMDGPU::hasNamedOperand(Opc0, AMDGPU::OpName::sbase) || + !AMDGPU::hasNamedOperand(Opc1, AMDGPU::OpName::sbase)) return false; unsigned NumOps = getNumOperandsNoGlue(Load0); @@ -3797,8 +3797,7 @@ bool SIInstrInfo::hasModifiers(unsigned Opcode) const { // The src0_modifier operand is present on all instructions // that have modifiers. - return AMDGPU::getNamedOperandIdx(Opcode, - AMDGPU::OpName::src0_modifiers) != -1; + return AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src0_modifiers); } bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI, @@ -3891,10 +3890,10 @@ MachineInstr *SIInstrInfo::buildShrunkInst(MachineInstr &MI, // Add the dst operand if the 32-bit encoding also has an explicit $vdst. // For VOPC instructions, this is replaced by an implicit def of vcc. - if (AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::vdst) != -1) { + if (AMDGPU::hasNamedOperand(Op32, AMDGPU::OpName::vdst)) { // dst Inst32.add(MI.getOperand(0)); - } else if (AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::sdst) != -1) { + } else if (AMDGPU::hasNamedOperand(Op32, AMDGPU::OpName::sdst)) { // VOPCX instructions won't be writing to an explicit dst, so this should // not fail for these instructions. assert(((MI.getOperand(0).getReg() == AMDGPU::VCC) || @@ -4852,9 +4851,8 @@ const TargetRegisterClass *SIInstrInfo::getRegClass(const MCInstrDesc &TID, (TID.TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata); if (DataIdx != -1) { - IsAllocatable = VDstIdx != -1 || - AMDGPU::getNamedOperandIdx(TID.Opcode, - AMDGPU::OpName::data1) != -1; + IsAllocatable = VDstIdx != -1 || AMDGPU::hasNamedOperand( + TID.Opcode, AMDGPU::OpName::data1); } } return adjustAllocatableRegClass(ST, RI, MF.getRegInfo(), TID, RegClass, @@ -6362,7 +6360,6 @@ MachineBasicBlock *SIInstrInfo::moveToVALU(MachineInstr &TopInst, continue; } - if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) { // We cannot move this instruction to the VALU, so we should try to // legalize its operands instead. @@ -6372,43 +6369,90 @@ MachineBasicBlock *SIInstrInfo::moveToVALU(MachineInstr &TopInst, continue; } + // Handle converting generic instructions like COPY-to-SGPR into + // COPY-to-VGPR. + if (NewOpcode == Opcode) { + Register DstReg = Inst.getOperand(0).getReg(); + const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(Inst); + + if (Inst.isCopy() && Inst.getOperand(1).getReg().isVirtual() && + NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) { + // Instead of creating a copy where src and dst are the same register + // class, we just replace all uses of dst with src. These kinds of + // copies interfere with the heuristics MachineSink uses to decide + // whether or not to split a critical edge. Since the pass assumes + // that copies will end up as machine instructions and not be + // eliminated. + addUsersToMoveToVALUWorklist(DstReg, MRI, Worklist); + MRI.replaceRegWith(DstReg, Inst.getOperand(1).getReg()); + MRI.clearKillFlags(Inst.getOperand(1).getReg()); + Inst.getOperand(0).setReg(DstReg); + + // Make sure we don't leave around a dead VGPR->SGPR copy. Normally + // these are deleted later, but at -O0 it would leave a suspicious + // looking illegal copy of an undef register. + for (unsigned I = Inst.getNumOperands() - 1; I != 0; --I) + Inst.removeOperand(I); + Inst.setDesc(get(AMDGPU::IMPLICIT_DEF)); + continue; + } + + Register NewDstReg = MRI.createVirtualRegister(NewDstRC); + MRI.replaceRegWith(DstReg, NewDstReg); + legalizeOperands(Inst, MDT); + addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist); + continue; + } + // Use the new VALU Opcode. - const MCInstrDesc &NewDesc = get(NewOpcode); - Inst.setDesc(NewDesc); + auto NewInstr = BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(NewOpcode)) + .setMIFlags(Inst.getFlags()); + for (const MachineOperand &Op : Inst.explicit_operands()) + NewInstr->addOperand(Op); // Remove any references to SCC. Vector instructions can't read from it, and // We're just about to add the implicit use / defs of VCC, and we don't want // both. - for (unsigned i = Inst.getNumOperands() - 1; i > 0; --i) { - MachineOperand &Op = Inst.getOperand(i); - if (Op.isReg() && Op.getReg() == AMDGPU::SCC) { + for (MachineOperand &Op : Inst.implicit_operands()) { + if (Op.getReg() == AMDGPU::SCC) { // Only propagate through live-def of SCC. if (Op.isDef() && !Op.isDead()) addSCCDefUsersToVALUWorklist(Op, Inst, Worklist); if (Op.isUse()) - addSCCDefsToVALUWorklist(Op, Worklist); - Inst.removeOperand(i); + addSCCDefsToVALUWorklist(NewInstr, Worklist); } } + Inst.eraseFromParent(); + + Register NewDstReg; + if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) { + Register DstReg = NewInstr->getOperand(0).getReg(); + assert(DstReg.isVirtual()); + + // Update the destination register class. + const TargetRegisterClass *NewDstRC = + getDestEquivalentVGPRClass(*NewInstr); + assert(NewDstRC); + + NewDstReg = MRI.createVirtualRegister(NewDstRC); + MRI.replaceRegWith(DstReg, NewDstReg); + } + if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) { // We are converting these to a BFE, so we need to add the missing // operands for the size and offset. unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16; - Inst.addOperand(MachineOperand::CreateImm(0)); - Inst.addOperand(MachineOperand::CreateImm(Size)); - + NewInstr.addImm(0); + NewInstr.addImm(Size); } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) { // The VALU version adds the second operand to the result, so insert an // extra 0 operand. - Inst.addOperand(MachineOperand::CreateImm(0)); + NewInstr.addImm(0); } - Inst.addImplicitDefUseOperands(*Inst.getParent()->getParent()); - fixImplicitOperands(Inst); - if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) { - const MachineOperand &OffsetWidthOp = Inst.getOperand(2); + const MachineOperand &OffsetWidthOp = NewInstr->getOperand(2); // If we need to move this to VGPRs, we need to unpack the second operand // back into the 2 separate ones for bit offset and width. assert(OffsetWidthOp.isImm() && @@ -6417,56 +6461,20 @@ MachineBasicBlock *SIInstrInfo::moveToVALU(MachineInstr &TopInst, uint32_t Offset = Imm & 0x3f; // Extract bits [5:0]. uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16]. - Inst.removeOperand(2); // Remove old immediate. - Inst.addOperand(MachineOperand::CreateImm(Offset)); - Inst.addOperand(MachineOperand::CreateImm(BitWidth)); + NewInstr->removeOperand(2); + NewInstr.addImm(Offset); + NewInstr.addImm(BitWidth); } - bool HasDst = Inst.getOperand(0).isReg() && Inst.getOperand(0).isDef(); - Register NewDstReg; - if (HasDst) { - Register DstReg = Inst.getOperand(0).getReg(); - if (DstReg.isPhysical()) - continue; - - // Update the destination register class. - const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(Inst); - if (!NewDstRC) - continue; - - if (Inst.isCopy() && Inst.getOperand(1).getReg().isVirtual() && - NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) { - // Instead of creating a copy where src and dst are the same register - // class, we just replace all uses of dst with src. These kinds of - // copies interfere with the heuristics MachineSink uses to decide - // whether or not to split a critical edge. Since the pass assumes - // that copies will end up as machine instructions and not be - // eliminated. - addUsersToMoveToVALUWorklist(DstReg, MRI, Worklist); - MRI.replaceRegWith(DstReg, Inst.getOperand(1).getReg()); - MRI.clearKillFlags(Inst.getOperand(1).getReg()); - Inst.getOperand(0).setReg(DstReg); - - // Make sure we don't leave around a dead VGPR->SGPR copy. Normally - // these are deleted later, but at -O0 it would leave a suspicious - // looking illegal copy of an undef register. - for (unsigned I = Inst.getNumOperands() - 1; I != 0; --I) - Inst.removeOperand(I); - Inst.setDesc(get(AMDGPU::IMPLICIT_DEF)); - continue; - } - - NewDstReg = MRI.createVirtualRegister(NewDstRC); - MRI.replaceRegWith(DstReg, NewDstReg); - } + fixImplicitOperands(*NewInstr); // Legalize the operands - CreatedBBTmp = legalizeOperands(Inst, MDT); + CreatedBBTmp = legalizeOperands(*NewInstr, MDT); if (CreatedBBTmp && TopInst.getParent() == CreatedBBTmp) CreatedBB = CreatedBBTmp; - if (HasDst) - addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist); + if (NewDstReg) + addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist); } return CreatedBB; } @@ -7229,11 +7237,8 @@ void SIInstrInfo::addSCCDefUsersToVALUWorklist(MachineOperand &Op, // SCC must be changed to an instruction that defines VCC. This function makes // sure that the instruction that defines SCC is added to the moveToVALU // worklist. -void SIInstrInfo::addSCCDefsToVALUWorklist(MachineOperand &Op, +void SIInstrInfo::addSCCDefsToVALUWorklist(MachineInstr *SCCUseInst, SetVectorType &Worklist) const { - assert(Op.isReg() && Op.getReg() == AMDGPU::SCC && Op.isUse()); - - MachineInstr *SCCUseInst = Op.getParent(); // Look for a preceding instruction that either defines VCC or SCC. If VCC // then there is nothing to do because the defining instruction has been // converted to a VALU already. If SCC then that instruction needs to be diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index b80838c393fcc..bf4330ed00683 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -130,7 +130,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { MachineInstr &SCCDefInst, SetVectorType &Worklist, Register NewCond = Register()) const; - void addSCCDefsToVALUWorklist(MachineOperand &Op, + void addSCCDefsToVALUWorklist(MachineInstr *SCCUseInst, SetVectorType &Worklist) const; const TargetRegisterClass * @@ -723,7 +723,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { } /// \returns true if this is an s_store_dword* instruction. This is more - /// specific than than isSMEM && mayStore. + /// specific than isSMEM && mayStore. static bool isScalarStore(const MachineInstr &MI) { return MI.getDesc().TSFlags & SIInstrFlags::SCALAR_STORE; } diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index 79f2826aa5cec..0eefce86f60ab 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -412,8 +412,8 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) { } if (TII.isMIMG(Opc)) { // Ignore instructions encoded without vaddr. - if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr) == -1 && - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0) == -1) + if (!AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vaddr) && + !AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vaddr0)) return UNKNOWN; // Ignore BVH instructions if (AMDGPU::getMIMGBaseOpcode(Opc)->BVH) @@ -1385,7 +1385,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSMemLoadImmPair( New.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset)); // For convenience, when SGPR_IMM buffer loads are merged into a // zero-offset load, we generate its SGPR variant. - if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset) != -1) + if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::offset)) New.addImm(MergedOffset); New.addImm(CI.CPol).addMemOperand(combineKnownAdjacentMMOs(CI, Paired)); diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp index 251ac626e21cb..345395db13fb4 100644 --- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp @@ -319,6 +319,7 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) { // adequate to lower the DIExpression. It should be worked out later. for (MachineInstr &MI : MBB) { if (MI.isDebugValue() && MI.getOperand(0).isFI() && + !MFI.isFixedObjectIndex(MI.getOperand(0).getIndex()) && SpillFIs[MI.getOperand(0).getIndex()]) { MI.getOperand(0).ChangeToRegister(Register(), false /*isDef*/); } diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 99967507a200f..80ce18e55c499 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -106,7 +106,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) if (ST.hasGFX90AInsts() && ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() && - !mayUseAGPRs(MF)) + !mayUseAGPRs(F)) MayNeedAGPRs = false; // We will select all MAI with VGPR operands. } @@ -664,8 +664,8 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields( return false; } -bool SIMachineFunctionInfo::mayUseAGPRs(const MachineFunction &MF) const { - for (const BasicBlock &BB : MF.getFunction()) { +bool SIMachineFunctionInfo::mayUseAGPRs(const Function &F) const { + for (const BasicBlock &BB : F) { for (const Instruction &I : BB) { const auto *CB = dyn_cast(&I); if (!CB) diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index aff679deb069d..19b74ce319fa1 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -358,7 +358,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { // as the input registers. Register ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG; - // This is the the unswizzled offset from the current dispatch's scratch wave + // This is the unswizzled offset from the current dispatch's scratch wave // base to the beginning of the current function's frame. Register FrameOffsetReg = AMDGPU::FP_REG; @@ -462,7 +462,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { // VGPR used for SGPR spills Register VGPR; - // If the VGPR is is used for SGPR spills in a non-entrypoint function, the + // If the VGPR is used for SGPR spills in a non-entrypoint function, the // stack slot used to save/restore it in the prolog/epilog. Optional FI; @@ -991,7 +991,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { // \returns true if a function has a use of AGPRs via inline asm or // has a call which may use it. - bool mayUseAGPRs(const MachineFunction &MF) const; + bool mayUseAGPRs(const Function &F) const; // \returns true if a function needs or may need AGPRs. bool usesAGPRs(const MachineFunction &MF) const; diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp index aed84437b8908..85de3a5484111 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp @@ -226,7 +226,7 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) { auto DefSegment = SelLI->FindSegmentContaining(SelIdx.getRegSlot()); assert(DefSegment != SelLI->end() && "No live interval segment covering definition?"); - for (auto I = DefSegment; I != SelLI->end(); ++I) { + for (auto I = DefSegment; I != SelLI->end() && I->start <= AndIdx; ++I) { SlotIndex Start = I->start < SelIdx.getRegSlot() ? SelIdx.getRegSlot() : I->start; SlotIndex End = I->end < AndIdx.getRegSlot() || I->end.isBlock() ? diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp index 2ae3157bab490..ae2c10116de85 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp @@ -36,7 +36,7 @@ /// the instructions in bb.then will only overwrite lanes that will never be /// accessed in bb.else. /// -/// This pass aims to to tell register allocator that %a is in-fact dead, +/// This pass aims to tell register allocator that %a is in-fact dead, /// through inserting a phi-node in bb.flow saying that %a is undef when coming /// from bb.then, and then replace the uses in the bb.else with the result of /// newly inserted phi. diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp index e768a2f3e1a5d..b21dbb7626e6a 100644 --- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -1002,24 +1002,21 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, // Copy dst, if it is present in original then should also be present in SDWA MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst); if (Dst) { - assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1); + assert(AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::vdst)); SDWAInst.add(*Dst); } else if ((Dst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst))) { - assert(Dst && - AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1); + assert(Dst && AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::sdst)); SDWAInst.add(*Dst); } else { - assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1); + assert(AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::sdst)); SDWAInst.addReg(TRI->getVCC(), RegState::Define); } // Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and // src0_modifiers (except for v_nop_sdwa, but it can't get here) MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); - assert( - Src0 && - AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0) != -1 && - AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_modifiers) != -1); + assert(Src0 && AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::src0) && + AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::src0_modifiers)); if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)) SDWAInst.addImm(Mod->getImm()); else @@ -1029,9 +1026,8 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, // Copy src1 if present, initialize src1_modifiers. MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); if (Src1) { - assert( - AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1) != -1 && - AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_modifiers) != -1); + assert(AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::src1) && + AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::src1_modifiers)); if (auto *Mod = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers)) SDWAInst.addImm(Mod->getImm()); else @@ -1050,7 +1046,7 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, } // Copy clamp if present, initialize otherwise - assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::clamp) != -1); + assert(AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::clamp)); MachineOperand *Clamp = TII->getNamedOperand(MI, AMDGPU::OpName::clamp); if (Clamp) { SDWAInst.add(*Clamp); @@ -1059,7 +1055,7 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, } // Copy omod if present, initialize otherwise if needed - if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1) { + if (AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::omod)) { MachineOperand *OMod = TII->getNamedOperand(MI, AMDGPU::OpName::omod); if (OMod) { SDWAInst.add(*OMod); @@ -1069,7 +1065,7 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, } // Copy dst_sel if present, initialize otherwise if needed - if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_sel) != -1) { + if (AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::dst_sel)) { MachineOperand *DstSel = TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel); if (DstSel) { SDWAInst.add(*DstSel); @@ -1079,7 +1075,7 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, } // Copy dst_unused if present, initialize otherwise if needed - if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_unused) != -1) { + if (AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::dst_unused)) { MachineOperand *DstUnused = TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused); if (DstUnused) { SDWAInst.add(*DstUnused); @@ -1089,7 +1085,7 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, } // Copy src0_sel if present, initialize otherwise - assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_sel) != -1); + assert(AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::src0_sel)); MachineOperand *Src0Sel = TII->getNamedOperand(MI, AMDGPU::OpName::src0_sel); if (Src0Sel) { SDWAInst.add(*Src0Sel); @@ -1099,7 +1095,7 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, // Copy src1_sel if present, initialize otherwise if needed if (Src1) { - assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_sel) != -1); + assert(AMDGPU::hasNamedOperand(SDWAOpcode, AMDGPU::OpName::src1_sel)); MachineOperand *Src1Sel = TII->getNamedOperand(MI, AMDGPU::OpName::src1_sel); if (Src1Sel) { SDWAInst.add(*Src1Sel); diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 99292129257f2..bb656329b3d41 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1235,10 +1235,9 @@ static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII, unsigned LoadStoreOp, unsigned EltSize) { bool IsStore = TII->get(LoadStoreOp).mayStore(); - bool HasVAddr = AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) != -1; + bool HasVAddr = AMDGPU::hasNamedOperand(LoadStoreOp, AMDGPU::OpName::vaddr); bool UseST = - !HasVAddr && - AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::saddr) < 0; + !HasVAddr && !AMDGPU::hasNamedOperand(LoadStoreOp, AMDGPU::OpName::saddr); switch (EltSize) { case 4: @@ -2140,7 +2139,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, if (!Offset) { unsigned Opc = MI->getOpcode(); int NewOpc = -1; - if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr) != -1) { + if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vaddr)) { NewOpc = AMDGPU::getFlatScratchInstSVfromSVS(Opc); } else if (ST.hasFlatScratchSTMode()) { // On GFX10 we have ST mode to use no registers for an address. diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index dda515595e4d9..80e4dada6b36a 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -430,7 +430,7 @@ unsigned getVOPDOpcode(unsigned Opc) { } bool isVOPD(unsigned Opc) { - return AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0X) != -1; + return AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0X); } bool isTrue16Inst(unsigned Opc) { diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index a8642a0d1da85..778987cb03e76 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -299,6 +299,11 @@ unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs, LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx); +LLVM_READONLY +inline bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx) { + return getNamedOperandIdx(Opcode, NamedIdx) != -1; +} + LLVM_READONLY int getSOPPWithRelaxation(uint16_t Opcode); diff --git a/llvm/lib/Target/AMDGPU/VINTERPInstructions.td b/llvm/lib/Target/AMDGPU/VINTERPInstructions.td index c63fbbc241d90..71de20223e9f6 100644 --- a/llvm/lib/Target/AMDGPU/VINTERPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VINTERPInstructions.td @@ -63,6 +63,10 @@ def VOP3_VINTERP_F32 : VOPProfile<[f32, f32, f32, f32]> { let HasOpSel = 0; let HasModifiers = 1; + let Src0Mod = FPVRegInputMods; + let Src1Mod = FPVRegInputMods; + let Src2Mod = FPVRegInputMods; + let Outs64 = (outs VGPR_32:$vdst); let Ins64 = (ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0, Src1Mod:$src1_modifiers, VRegSrc_32:$src1, @@ -77,6 +81,10 @@ class VOP3_VINTERP_F16 ArgVT> : VOPProfile { let HasOpSel = 1; let HasModifiers = 1; + let Src0Mod = FPVRegInputMods; + let Src1Mod = FPVRegInputMods; + let Src2Mod = FPVRegInputMods; + let Outs64 = (outs VGPR_32:$vdst); let Ins64 = (ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0, Src1Mod:$src1_modifiers, VRegSrc_32:$src1, diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index bb2b918837c6e..fdbdfe5c47f9e 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -660,12 +660,9 @@ let SubtargetPredicate = isGFX11Only in defm : IMAD32_Pats; def VOP3_PERMLANE_Profile : VOP3_Profile, VOP3_OPSEL> { - let Src0RC64 = VRegSrc_32; - let Src1RC64 = SCSrc_b32; - let Src2RC64 = SCSrc_b32; let InsVOP3OpSel = (ins IntOpSelMods:$src0_modifiers, VRegSrc_32:$src0, - IntOpSelMods:$src1_modifiers, SCSrc_b32:$src1, - IntOpSelMods:$src2_modifiers, SCSrc_b32:$src2, + IntOpSelMods:$src1_modifiers, SSrc_b32:$src1, + IntOpSelMods:$src2_modifiers, SSrc_b32:$src2, VGPR_32:$vdst_in, op_sel0:$op_sel); let HasClamp = 0; let HasExtVOP3DPP = 0; diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index 679f6db7453fd..78c4455fc9848 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -168,7 +168,7 @@ multiclass MadFmaMixPats; def : GCNPat < @@ -181,7 +181,7 @@ multiclass MadFmaMixPats; def : GCNPat < diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index c84fe4d661974..b822f15ed193b 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -6855,25 +6855,25 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG, // If one of the operands is a constant vector zero, attempt to fold the // comparison to a specialized compare-against-zero form. - SDValue SingleOp; - if (ISD::isBuildVectorAllZeros(Op1.getNode())) - SingleOp = Op0; - else if (ISD::isBuildVectorAllZeros(Op0.getNode())) { + if (ISD::isBuildVectorAllZeros(Op0.getNode()) && + (Opc == ARMCC::GE || Opc == ARMCC::GT || Opc == ARMCC::EQ || + Opc == ARMCC::NE)) { if (Opc == ARMCC::GE) Opc = ARMCC::LE; else if (Opc == ARMCC::GT) Opc = ARMCC::LT; - SingleOp = Op1; + std::swap(Op0, Op1); } SDValue Result; - if (SingleOp.getNode()) { - Result = DAG.getNode(ARMISD::VCMPZ, dl, CmpVT, SingleOp, + if (ISD::isBuildVectorAllZeros(Op1.getNode()) && + (Opc == ARMCC::GE || Opc == ARMCC::GT || Opc == ARMCC::LE || + Opc == ARMCC::LT || Opc == ARMCC::NE || Opc == ARMCC::EQ)) + Result = DAG.getNode(ARMISD::VCMPZ, dl, CmpVT, Op0, DAG.getConstant(Opc, dl, MVT::i32)); - } else { + else Result = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1, DAG.getConstant(Opc, dl, MVT::i32)); - } Result = DAG.getSExtOrTrunc(Result, dl, VT); diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp index 1961444be8cfb..e49c162cdb781 100644 --- a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp @@ -241,7 +241,7 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSGD( } // end namespace -INITIALIZE_PASS(LoongArchPreRAExpandPseudo, "LoongArch-prera-expand-pseudo", +INITIALIZE_PASS(LoongArchPreRAExpandPseudo, "loongarch-prera-expand-pseudo", LOONGARCH_PRERA_EXPAND_PSEUDO_NAME, false, false) namespace llvm { diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td index 0e2f57551f334..3767fc1d793ba 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td @@ -284,7 +284,11 @@ def : Pat<(loongarch_movgr2fr_w_la64 GPR:$src), (MOVGR2FR_W GPR:$src)>; def : Pat<(loongarch_movfr2gr_s_la64 FPR32:$src), (MOVFR2GR_S FPR32:$src)>; // int -> f32 -def : Pat<(f32 (sint_to_fp GPR:$src)), (FFINT_S_W (MOVGR2FR_W GPR:$src))>; +def : Pat<(f32 (sint_to_fp (i64 (sexti32 (i64 GPR:$src))))), + (FFINT_S_W (MOVGR2FR_W GPR:$src))>; +// uint -> f32 +def : Pat<(f32 (uint_to_fp (i64 (sexti32 (i64 GPR:$src))))), + (FFINT_S_W (MOVGR2FR_W GPR:$src))>; } // Predicates = [HasBasicF, IsLA64] // FP Rounding diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td index d061b6426e244..44a80054f0a90 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td @@ -288,13 +288,11 @@ def : Pat<(f64 fpimm1), (FCVT_D_S (FFINT_S_W (MOVGR2FR_W (ADDI_W R0, 1))))>; /// Convert int to FP let Predicates = [HasBasicD, IsLA64] in { +def : Pat<(f32 (sint_to_fp GPR:$src)), (FFINT_S_L (MOVGR2FR_D GPR:$src))>; def : Pat<(f64 (sint_to_fp (i64 (sexti32 (i64 GPR:$src))))), (FFINT_D_W (MOVGR2FR_W GPR:$src))>; def : Pat<(f64 (sint_to_fp GPR:$src)), (FFINT_D_L (MOVGR2FR_D GPR:$src))>; -def : Pat<(f64 (uint_to_fp (i64 (zexti32 (i64 GPR:$src))))), - (FFINT_D_W (MOVGR2FR_W GPR:$src))>; - def : Pat<(bitconvert GPR:$src), (MOVGR2FR_D GPR:$src)>; } // Predicates = [HasBasicD, IsLA64] let Predicates = [HasBasicD, IsLA32] in { diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp index 415ca4d871cda..8ba1f9c1b27f6 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp @@ -213,6 +213,12 @@ bool LoongArchDAGToDAGISel::selectSExti32(SDValue N, SDValue &Val) { Val = N.getOperand(0); return true; } + if (N.getOpcode() == LoongArchISD::BSTRPICK && + N.getConstantOperandVal(1) < UINT64_C(0X1F) && + N.getConstantOperandVal(2) == UINT64_C(0)) { + Val = N; + return true; + } MVT VT = N.getSimpleValueType(); if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - 32)) { Val = N; diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 6f477413636a9..49e8ce02abccd 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -21,6 +21,7 @@ #include "MCTargetDesc/LoongArchMCTargetDesc.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicsLoongArch.h" #include "llvm/Support/Debug.h" @@ -160,7 +161,12 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setLibcallName(RTLIB::MUL_I128, nullptr); setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom); - setOperationAction(ISD::UINT_TO_FP, GRLenVT, Custom); + setOperationAction(ISD::UINT_TO_FP, GRLenVT, Expand); + if ((Subtarget.is64Bit() && Subtarget.hasBasicF() && + !Subtarget.hasBasicD())) { + setOperationAction(ISD::SINT_TO_FP, GRLenVT, Custom); + setOperationAction(ISD::UINT_TO_FP, GRLenVT, Custom); + } // Compute derived properties from the register classes. computeRegisterProperties(STI.getRegisterInfo()); @@ -220,6 +226,8 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, return lowerBITCAST(Op, DAG); case ISD::UINT_TO_FP: return lowerUINT_TO_FP(Op, DAG); + case ISD::SINT_TO_FP: + return lowerSINT_TO_FP(Op, DAG); case ISD::VASTART: return lowerVASTART(Op, DAG); case ISD::FRAMEADDR: @@ -302,19 +310,61 @@ SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op, SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { + assert(Subtarget.is64Bit() && Subtarget.hasBasicF() && + !Subtarget.hasBasicD() && "unexpected target features"); + + SDLoc DL(Op); + SDValue Op0 = Op.getOperand(0); + if (Op0->getOpcode() == ISD::AND) { + auto *C = dyn_cast(Op0.getOperand(1)); + if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF)) + return Op; + } + + if (Op0->getOpcode() == LoongArchISD::BSTRPICK && + Op0.getConstantOperandVal(1) < UINT64_C(0X1F) && + Op0.getConstantOperandVal(2) == UINT64_C(0)) + return Op; + + if (Op0.getOpcode() == ISD::AssertZext && + dyn_cast(Op0.getOperand(1))->getVT().bitsLT(MVT::i32)) + return Op; + + EVT OpVT = Op0.getValueType(); + EVT RetVT = Op.getValueType(); + RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT); + MakeLibCallOptions CallOptions; + CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true); + SDValue Chain = SDValue(); + SDValue Result; + std::tie(Result, Chain) = + makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain); + return Result; +} + +SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op, + SelectionDAG &DAG) const { + assert(Subtarget.is64Bit() && Subtarget.hasBasicF() && + !Subtarget.hasBasicD() && "unexpected target features"); SDLoc DL(Op); - auto &TLI = DAG.getTargetLoweringInfo(); - SDValue Tmp1, Tmp2; - SDValue Op1 = Op.getOperand(0); - if (Op1->getOpcode() == ISD::AssertZext || - Op1->getOpcode() == ISD::AssertSext) + SDValue Op0 = Op.getOperand(0); + + if ((Op0.getOpcode() == ISD::AssertSext || + Op0.getOpcode() == ISD::SIGN_EXTEND_INREG) && + dyn_cast(Op0.getOperand(1))->getVT().bitsLE(MVT::i32)) return Op; - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op.getOperand(0)); - SDValue Res = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f64, Trunc); - SDNode *N = Res.getNode(); - TLI.expandUINT_TO_FP(N, Tmp1, Tmp2, DAG); - return Tmp1; + + EVT OpVT = Op0.getValueType(); + EVT RetVT = Op.getValueType(); + RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT); + MakeLibCallOptions CallOptions; + CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true); + SDValue Chain = SDValue(); + SDValue Result; + std::tie(Result, Chain) = + makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain); + return Result; } SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op, diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index 1e411fb34f727..358da7feb20b6 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -171,6 +171,7 @@ class LoongArchTargetLowering : public TargetLowering { SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const; SDValue lowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/Mips/Mips16HardFloat.cpp b/llvm/lib/Target/Mips/Mips16HardFloat.cpp index 419f0ac1a8a72..8b928e36b9da5 100644 --- a/llvm/lib/Target/Mips/Mips16HardFloat.cpp +++ b/llvm/lib/Target/Mips/Mips16HardFloat.cpp @@ -12,6 +12,7 @@ #include "MipsTargetMachine.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/ModRef.h" #include "llvm/IR/Module.h" #include "llvm/IR/Value.h" #include "llvm/Support/Debug.h" @@ -409,7 +410,8 @@ static bool fixupFPReturnAndCall(Function &F, Module *M, // functions will take place. // A = A.addFnAttribute(C, "__Mips16RetHelper"); - A = A.addFnAttribute(C, Attribute::ReadNone); + A = A.addFnAttribute( + C, Attribute::getWithMemoryEffects(C, MemoryEffects::none())); A = A.addFnAttribute(C, Attribute::NoInline); FunctionCallee F = (M->getOrInsertFunction(Name, A, MyVoid, T)); CallInst::Create(F, Params, "", &I); diff --git a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 915f689ac688c..e7f7c0cd32ed8 100644 --- a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -29,7 +29,7 @@ using namespace llvm; -DEFINE_PPC_REGCLASSES; +DEFINE_PPC_REGCLASSES // Evaluate an expression containing condition register // or condition register field symbols. Returns positive @@ -291,6 +291,26 @@ struct PPCOperand : public MCParsedAsmOperand { return (unsigned) Imm.Val; } + unsigned getDMRROWReg() const { + assert(isDMRROWRegNumber() && "Invalid access!"); + return (unsigned)Imm.Val; + } + + unsigned getDMRROWpReg() const { + assert(isDMRROWpRegNumber() && "Invalid access!"); + return (unsigned)Imm.Val; + } + + unsigned getDMRReg() const { + assert(isDMRRegNumber() && "Invalid access!"); + return (unsigned)Imm.Val; + } + + unsigned getDMRpReg() const { + assert(isDMRpRegNumber() && "Invalid access!"); + return (unsigned)Imm.Val; + } + unsigned getVSRpEvenReg() const { assert(isVSRpEvenRegNumber() && "Invalid access!"); return (unsigned) Imm.Val >> 1; @@ -391,6 +411,18 @@ struct PPCOperand : public MCParsedAsmOperand { bool isACCRegNumber() const { return Kind == Immediate && isUInt<3>(getImm()); } + bool isDMRROWRegNumber() const { + return Kind == Immediate && isUInt<6>(getImm()); + } + bool isDMRROWpRegNumber() const { + return Kind == Immediate && isUInt<5>(getImm()); + } + bool isDMRRegNumber() const { + return Kind == Immediate && isUInt<3>(getImm()); + } + bool isDMRpRegNumber() const { + return Kind == Immediate && isUInt<2>(getImm()); + } bool isVSRpEvenRegNumber() const { return Kind == Immediate && isUInt<6>(getImm()) && ((getImm() & 1) == 0); } @@ -507,6 +539,36 @@ struct PPCOperand : public MCParsedAsmOperand { Inst.addOperand(MCOperand::createReg(ACCRegs[getACCReg()])); } + void addRegDMRROWRCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(DMRROWRegs[getDMRROWReg()])); + } + + void addRegDMRROWpRCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(DMRROWpRegs[getDMRROWpReg()])); + } + + void addRegDMRRCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(DMRRegs[getDMRReg()])); + } + + void addRegDMRpRCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(DMRpRegs[getDMRpReg()])); + } + + void addRegWACCRCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(WACCRegs[getACCReg()])); + } + + void addRegWACC_HIRCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(WACC_HIRegs[getACCReg()])); + } + void addRegVSRpRCOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::createReg(VSRpRegs[getVSRpEvenReg()])); @@ -1218,6 +1280,27 @@ bool PPCAsmParser::MatchRegisterName(unsigned &RegNo, int64_t &IntVal) { } else if (Name.startswith_insensitive("cr") && !Name.substr(2).getAsInteger(10, IntVal) && IntVal < 8) { RegNo = CRRegs[IntVal]; + } else if (Name.startswith_insensitive("acc") && + !Name.substr(3).getAsInteger(10, IntVal) && IntVal < 8) { + RegNo = ACCRegs[IntVal]; + } else if (Name.startswith_insensitive("wacc_hi") && + !Name.substr(7).getAsInteger(10, IntVal) && IntVal < 8) { + RegNo = ACCRegs[IntVal]; + } else if (Name.startswith_insensitive("wacc") && + !Name.substr(4).getAsInteger(10, IntVal) && IntVal < 8) { + RegNo = WACCRegs[IntVal]; + } else if (Name.startswith_insensitive("dmrrowp") && + !Name.substr(7).getAsInteger(10, IntVal) && IntVal < 32) { + RegNo = DMRROWpRegs[IntVal]; + } else if (Name.startswith_insensitive("dmrrow") && + !Name.substr(6).getAsInteger(10, IntVal) && IntVal < 64) { + RegNo = DMRROWRegs[IntVal]; + } else if (Name.startswith_insensitive("dmrp") && + !Name.substr(4).getAsInteger(10, IntVal) && IntVal < 4) { + RegNo = DMRROWpRegs[IntVal]; + } else if (Name.startswith_insensitive("dmr") && + !Name.substr(3).getAsInteger(10, IntVal) && IntVal < 8) { + RegNo = DMRRegs[IntVal]; } else return true; getParser().Lex(); diff --git a/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp index d3d720054f16a..21fee2441f32a 100644 --- a/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp +++ b/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -17,7 +17,7 @@ using namespace llvm; -DEFINE_PPC_REGCLASSES; +DEFINE_PPC_REGCLASSES #define DEBUG_TYPE "ppc-disassembler" @@ -187,6 +187,45 @@ static DecodeStatus DecodeACCRCRegisterClass(MCInst &Inst, uint64_t RegNo, return decodeRegisterClass(Inst, RegNo, ACCRegs); } +static DecodeStatus DecodeWACCRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, WACCRegs); +} + +static DecodeStatus DecodeWACC_HIRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, WACC_HIRegs); +} + +// TODO: Make this function static when the register class is used by a new +// instruction. +DecodeStatus DecodeDMRROWRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, DMRROWRegs); +} + +static DecodeStatus DecodeDMRROWpRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, DMRROWpRegs); +} + +static DecodeStatus DecodeDMRRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, DMRRegs); +} + +// TODO: Make this function static when the register class is used by a new +// instruction. +DecodeStatus DecodeDMRpRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, DMRpRegs); +} + static DecodeStatus DecodeVSRpRCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const MCDisassembler *Decoder) { diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h index 3ca6f394f60b6..e4521aebad7ef 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -136,6 +136,17 @@ static inline bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME) { X##22, X##23, X##24, X##25, X##26, X##27, X##28, X##29, X##30, X##31 \ } +#define PPC_REGS0_63(X) \ + { \ + X##0, X##1, X##2, X##3, X##4, X##5, X##6, X##7, X##8, X##9, X##10, X##11, \ + X##12, X##13, X##14, X##15, X##16, X##17, X##18, X##19, X##20, X##21, \ + X##22, X##23, X##24, X##25, X##26, X##27, X##28, X##29, X##30, X##31, \ + X##32, X##33, X##34, X##35, X##36, X##37, X##38, X##39, X##40, X##41, \ + X##42, X##43, X##44, X##45, X##46, X##47, X##48, X##49, X##50, X##51, \ + X##52, X##53, X##54, X##55, X##56, X##57, X##58, X##59, X##60, X##61, \ + X##62, X##63 \ + } + #define PPC_REGS_NO0_31(Z, X) \ { \ Z, X##1, X##2, X##3, X##4, X##5, X##6, X##7, X##8, X##9, X##10, X##11, \ @@ -155,6 +166,16 @@ static inline bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME) { HI##28, HI##29, HI##30, HI##31 \ } +#define PPC_REGS0_7(X) \ + { \ + X##0, X##1, X##2, X##3, X##4, X##5, X##6, X##7 \ + } + +#define PPC_REGS0_3(X) \ + { \ + X##0, X##1, X##2, X##3 \ + } + using llvm::MCPhysReg; #define DEFINE_PPC_REGCLASSES \ @@ -185,5 +206,13 @@ using llvm::MCPhysReg; PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN, \ PPC::CR7LT, PPC::CR7GT, PPC::CR7EQ, PPC::CR7UN}; \ static const MCPhysReg CRRegs[8] = PPC_REGS0_7(PPC::CR); \ - static const MCPhysReg ACCRegs[8] = PPC_REGS0_7(PPC::ACC) + static const MCPhysReg ACCRegs[8] = PPC_REGS0_7(PPC::ACC); \ + static const MCPhysReg WACCRegs[8] = PPC_REGS0_7(PPC::WACC); \ + static const MCPhysReg WACC_HIRegs[8] = PPC_REGS0_7(PPC::WACC_HI); \ + static const MCPhysReg DMRROWpRegs[32] = PPC_REGS0_31(PPC::DMRROWp); \ + static const MCPhysReg DMRROWRegs[64] = PPC_REGS0_63(PPC::DMRROW); \ + static const MCPhysReg DMRRegs[8] = PPC_REGS0_7(PPC::DMR); \ + static const MCPhysReg DMRpRegs[4] = PPC_REGS0_3(PPC::DMRp); + + #endif // LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCTARGETDESC_H diff --git a/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp b/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp index 5320ae0da031a..87d62f1619bc3 100644 --- a/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -275,13 +275,9 @@ void PPCCTRLoops::expandNormalLoops(MachineLoop *ML, MachineInstr *Start, // merge the two-predecessor loop header with its successor. If the // successor happens to be a header of nest loop, then we will have a header // which has more than 2 predecessors. - assert(std::find(ML->getHeader()->predecessors().begin(), - ML->getHeader()->predecessors().end(), - Exiting) != ML->getHeader()->predecessors().end() && + assert(llvm::is_contained(ML->getHeader()->predecessors(), Exiting) && "Loop latch is not loop header predecessor!"); - assert(std::find(ML->getHeader()->predecessors().begin(), - ML->getHeader()->predecessors().end(), - Preheader) != ML->getHeader()->predecessors().end() && + assert(llvm::is_contained(ML->getHeader()->predecessors(), Preheader) && "Loop preheader is not loop header predecessor!"); PHIMIB.addReg(ADDIDef).addMBB(Exiting); diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td b/llvm/lib/Target/PowerPC/PPCInstrFuture.td new file mode 100644 index 0000000000000..63b77e46f01f4 --- /dev/null +++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td @@ -0,0 +1,14 @@ +//===-- PPCInstrFuture.td - Future Instruction Set --------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the instructions introduced for the Future CPU. +// +//===----------------------------------------------------------------------===// + + diff --git a/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td b/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td new file mode 100644 index 0000000000000..4da2969857d55 --- /dev/null +++ b/llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td @@ -0,0 +1,116 @@ +//===-- PPCInstrFutureMMA.td - Future Instruction Set ------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the instructions introduced for the Future CPU for MMA. +// +//===----------------------------------------------------------------------===// + +class XX3Form_AT3_XABp5_P1 opcode, bits<8> xo, dag OOL, dag IOL, + string asmstr, list pattern> + : I { + bits<3> AT; + bits<5> XAp; + bits<5> XBp; + bits<1> P; + + let Pattern = pattern; + + let Inst{6-8} = AT{2-0}; + let Inst{9-10} = 0; + let Inst{11-14} = XAp{3-0}; + let Inst{15} = P; + let Inst{16-19} = XBp{3-0}; + let Inst{20} = 0; + let Inst{21-28} = xo; + let Inst{29} = XAp{4}; + let Inst{30} = XBp{4}; + let Inst{31} = 0; +} + +class XX2Form_AT3_XBp5_P2 opcode, bits<9> xo, dag OOL, dag IOL, + string asmstr, list pattern> + : I { + bits<3> AT; + bits<5> XBp; + bits<2> P; + + let Pattern = pattern; + + let Inst{6-8} = AT{2-0}; + let Inst{9-14} = 0; + let Inst{15} = P{0}; + let Inst{16-19} = XBp{3-0}; + let Inst{20} = P{1}; + let Inst{21-29} = xo; + let Inst{30} = XBp{4}; + let Inst{31} = 0; +} + +class XForm_ATB3 opcode, bits<5> o, bits<10> xo, dag OOL, dag IOL, + string asmstr, list pattern> + : I { + bits<3> AT; + bits<3> AB; + + let Pattern = pattern; + + let Inst{6-8} = AT{2-0}; + let Inst{9-10} = 0; + let Inst{11-15} = o; + let Inst{16-18} = AB{2-0}; + let Inst{19-20} = 0; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +let Predicates = [IsISAFuture] in { + def DMXXEXTFDMR512 : XX3Form_AT3_XABp5_P1<60, 226, + (outs vsrprc:$XAp, vsrprc:$XBp), + (ins wacc:$AT), + "dmxxextfdmr512 $AT, $XAp, $XBp, 0", []> { + let P = 0; + } + + def DMXXEXTFDMR512_HI : XX3Form_AT3_XABp5_P1<60, 226, + (outs vsrprc:$XAp, vsrprc:$XBp), + (ins wacc_hi:$AT), + "dmxxextfdmr512 $AT, $XAp, $XBp, 1", []> { + let P = 1; + } + + def DMXXINSTFDMR512 : XX3Form_AT3_XABp5_P1<60, 234, (outs wacc:$AT), + (ins vsrprc:$XAp, vsrprc:$XBp), + "dmxxinstfdmr512 $AT, $XAp, $XBp, 0", []> { + let P = 0; + } + + def DMXXINSTFDMR512_HI : XX3Form_AT3_XABp5_P1<60, 234, (outs wacc_hi:$AT), + (ins vsrprc:$XAp, vsrprc:$XBp), + "dmxxinstfdmr512 $AT, $XAp, $XBp, 1", []> { + let P = 1; + } + + def DMXXEXTFDMR256 : XX2Form_AT3_XBp5_P2<60, 484, (outs vsrprc:$XBp), + (ins dmrrowp:$AT, u2imm:$P), + "dmxxextfdmr256 $AT, $XBp, $P", []>; + + def DMXXINSTFDMR256 : XX2Form_AT3_XBp5_P2<60, 485, (outs dmrrowp:$AT), + (ins vsrprc:$XBp, u2imm:$P), + "dmxxinstfdmr256 $AT, $XBp, $P", []>; + + def DMMR : XForm_ATB3<31, 6, 177, (outs dmr:$AT), (ins dmr:$AB), + "dmmr $AT, $AB", []>; + + def DMXOR : XForm_ATB3<31, 7, 177, (outs dmr:$AT), (ins dmr:$ATi, dmr:$AB), + "dmxor $AT, $AB", []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + + def DMSETDMRZ : XForm_AT3<31, 2, 177, (outs dmr:$AT), (ins), + "dmsetdmrz $AT", NoItinerary, []>; +} diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 965bdaaa8ecc5..7a7dd2f30e943 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -613,7 +613,7 @@ void PPCInstrInfo::finalizeInsInstrs( } bool PPCInstrInfo::shouldReduceRegisterPressure( - MachineBasicBlock *MBB, RegisterClassInfo *RegClassInfo) const { + const MachineBasicBlock *MBB, const RegisterClassInfo *RegClassInfo) const { if (!EnableFMARegPressureReduction) return false; @@ -635,10 +635,11 @@ bool PPCInstrInfo::shouldReduceRegisterPressure( return false; const TargetRegisterInfo *TRI = &getRegisterInfo(); - MachineFunction *MF = MBB->getParent(); - MachineRegisterInfo *MRI = &MF->getRegInfo(); + const MachineFunction *MF = MBB->getParent(); + const MachineRegisterInfo *MRI = &MF->getRegInfo(); - auto GetMBBPressure = [&](MachineBasicBlock *MBB) -> std::vector { + auto GetMBBPressure = + [&](const MachineBasicBlock *MBB) -> std::vector { RegionPressure Pressure; RegPressureTracker RPTracker(Pressure); @@ -646,10 +647,7 @@ bool PPCInstrInfo::shouldReduceRegisterPressure( RPTracker.init(MBB->getParent(), RegClassInfo, nullptr, MBB, MBB->end(), /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true); - for (MachineBasicBlock::iterator MII = MBB->instr_end(), - MIE = MBB->instr_begin(); - MII != MIE; --MII) { - MachineInstr &MI = *std::prev(MII); + for (const auto &MI : reverse(*MBB)) { if (MI.isDebugValue() || MI.isDebugLabel()) continue; RegisterOperands RegOpers; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h index 4c720e251f15c..9f150be7f746c 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -471,9 +471,9 @@ class PPCInstrInfo : public PPCGenInstrInfo { /// when the register pressure is high for one BB. /// Return true if register pressure for \p MBB is high and ABI is supported /// to reduce register pressure. Otherwise return false. - bool - shouldReduceRegisterPressure(MachineBasicBlock *MBB, - RegisterClassInfo *RegClassInfo) const override; + bool shouldReduceRegisterPressure( + const MachineBasicBlock *MBB, + const RegisterClassInfo *RegClassInfo) const override; /// Fixup the placeholders we put in genAlternativeCodeSequence() for /// MachineCombiner. diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index ea4ef12bf21d2..8c8891b4b05cd 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -3278,6 +3278,8 @@ def : Pat<(not i1:$in), // Prefixed instructions may require access to the above defs at a later // time so we include this after the def. include "PPCInstrP10.td" +include "PPCInstrFutureMMA.td" +include "PPCInstrFuture.td" include "PPCInstrMMA.td" // Patterns for arithmetic i1 operations. diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h index aaa841fffa1b3..fea1a3afab1c2 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h @@ -184,7 +184,33 @@ class PPCRegisterInfo : public PPCGenRegisterInfo { return RegName + 2; } return RegName + 1; - case 'c': if (RegName[1] == 'r') return RegName + 2; + case 'c': + if (RegName[1] == 'r') + return RegName + 2; + break; + case 'w': + // For wacc and wacc_hi + if (RegName[1] == 'a' && RegName[2] == 'c' && RegName[3] == 'c') { + if (RegName[4] == '_') + return RegName + 7; + else + return RegName + 4; + } + break; + case 'd': + // For dmr, dmrp, dmrrow, dmrrowp + if (RegName[1] == 'm' && RegName[2] == 'r') { + if (RegName[3] == 'r' && RegName[4] == 'o' && RegName[5] == 'w' && + RegName[6] == 'p') + return RegName + 7; + else if (RegName[3] == 'r' && RegName[4] == 'o' && RegName[5] == 'w') + return RegName + 6; + else if (RegName[3] == 'p') + return RegName + 4; + else + return RegName + 3; + } + break; } return RegName; diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td index 49b26cd160608..32f8163a38828 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td @@ -446,6 +446,7 @@ def G8pRC : } include "PPCRegisterInfoMMA.td" +include "PPCRegisterInfoDMR.td" //===----------------------------------------------------------------------===// // PowerPC Operand Definitions. @@ -1013,3 +1014,48 @@ def acc : RegisterOperand { def uacc : RegisterOperand { let ParserMatchClass = PPCRegACCRCAsmOperand; } + +// DMR Register Operands +def PPCRegDMRROWRCAsmOperand : AsmOperandClass { + let Name = "RegDMRROWRC"; + let PredicateMethod = "isDMRROWRegNumber"; +} + +def dmrrow : RegisterOperand { + let ParserMatchClass = PPCRegDMRROWRCAsmOperand; +} + +def PPCRegDMRROWpRCAsmOperand : AsmOperandClass { + let Name = "RegDMRROWpRC"; + let PredicateMethod = "isDMRROWpRegNumber"; +} + +def dmrrowp : RegisterOperand { + let ParserMatchClass = PPCRegDMRROWpRCAsmOperand; +} + +def wacc : RegisterOperand { + let ParserMatchClass = PPCRegACCRCAsmOperand; +} + +def wacc_hi : RegisterOperand { + let ParserMatchClass = PPCRegACCRCAsmOperand; +} + +def PPCRegDMRRCAsmOperand : AsmOperandClass { + let Name = "RegDMRRC"; + let PredicateMethod = "isDMRRegNumber"; +} + +def dmr : RegisterOperand { + let ParserMatchClass = PPCRegDMRRCAsmOperand; +} + +def PPCRegDMRpRCAsmOperand : AsmOperandClass { + let Name = "RegDMRpRC"; + let PredicateMethod = "isDMRpRegNumber"; +} + +def dmrp : RegisterOperand { + let ParserMatchClass = PPCRegDMRpRCAsmOperand; +} diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfoDMR.td b/llvm/lib/Target/PowerPC/PPCRegisterInfoDMR.td new file mode 100644 index 0000000000000..1c3e7621825bd --- /dev/null +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfoDMR.td @@ -0,0 +1,164 @@ +//===- PPCRegisterInfoDMR.td - The PowerPC Register File *- tablegen -*----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Register info specific to Power PC Dense Math Registers(DMR). +// +// Register classes in this file are related to the Dense Math Registers (DMR). +// There are a total of 8 DMR registers numbered 0 to 7. +// The 4 different views of each DMR register. +// +// [ DMR0 ] +// | WACC0 | WACC_HI0 | +// | DMRROWp0 | DMRROWp1 | DMRROWp2 | DMRROWp3 | +// |DMRROW0|DMRROW1|DMRROW2|DMRROW3|DMRROW4|DMRROW5|DMRROW6|DMRROW7| +// [128bits|128bits|128bits|128bits|128bits|128bits|128bits|128bits] +// +// In addition to the above classes two consecutive DMR registers make a DMR +// DMR pair (DMRp) that is 2048 bits. +//===----------------------------------------------------------------------===// + +let Namespace = "PPC" in { +def sub_dmrrow0 : SubRegIndex<128>; +def sub_dmrrow1 : SubRegIndex<128, 128>; +def sub_dmrrowp0 : SubRegIndex<256>; +def sub_dmrrowp1 : SubRegIndex<256, 256>; +def sub_wacc_lo : SubRegIndex<512>; +def sub_wacc_hi : SubRegIndex<512, 512>; +def sub_dmr0 : SubRegIndex<1024>; +def sub_dmr1 : SubRegIndex<1024, 1024>; +} + +// A single row in a DMR register. +// There are 8 128 bit rows in each DMR register and 8 DMR registers so that +// makes 64 DMRROW registers in total. +class DMRROW num, string n> : PPCReg { + let HWEncoding{5-0} = num; +} + +// A consecutive pair of DMR row registers. +class DMRROWp num, string n, list subregs> : PPCReg { + let HWEncoding{4-0} = num; + let SubRegs = subregs; +} + +// WACC - Wide ACC registers. Accumulator registers that are subregs of DMR. +// These ACC registers no longer include VSR regs as subregs. +class WACC num, string n, list subregs> : PPCReg { + let HWEncoding{2-0} = num; + let SubRegs = subregs; +} + +// High bits for the ACC registers. +// When the ACC register is used these bits are ignored. +// When the ACC register is the target, these bits are set to zero. +class WACC_HI num, string n, list subregs> : PPCReg { + let HWEncoding{2-0} = num; + let SubRegs = subregs; +} + +class DMR num, string n, list subregs> : PPCReg { + let HWEncoding{2-0} = num; + let SubRegs = subregs; +} + +class DMRp num, string n, list subregs> : PPCReg { + let HWEncoding{1-0} = num; + let SubRegs = subregs; +} + +// The DMR Row type registers are the lowest level of registers and have no +// subregs. +foreach Index = 0-63 in { + def DMRROW#Index : DMRROW, DwarfRegNum<[-1, -1]>; +} + +// DMRROW pairs are consecutive pairs. +// DMRROWp0 = DMRROW0, DMRROW1 +// DMRROWp1 = DMRROW2, DMRROW3 +// DMRROWp2 = DMRROW4, DMRROW5 +// etc... +let SubRegIndices = [sub_dmrrow0, sub_dmrrow1] in { + foreach Index = 0-31 in { + def DMRROWp#Index : DMRROWp("DMRROW"#!mul(Index, 2)), + !cast("DMRROW"#!add(!mul(Index, 2), 1))]>, DwarfRegNum<[-1, -1]>; + } +} + +let SubRegIndices = [sub_dmrrowp0, sub_dmrrowp1] in { + // WACC0 = DMRROWp0, DMRROWp1 + // WACC1 = DMRROWp4, DMRROWp5 + // WACC2 = DMRROWp8, DMRROWp9 + // etc... + foreach Index = 0-7 in { + def WACC#Index : WACC("DMRROWp"#!mul(Index, 4)), + !cast("DMRROWp"#!add(!mul(Index, 4), 1))]>, DwarfRegNum<[-1, -1]>; + } + + // WACC_HI0 = DMRROWp2, DMRROWp3 + // WACC_HI1 = DMRROWp6, DMRROWp7 + // WACC_HI2 = DMRROWp10, DMRROWp11 + // etc... + foreach Index = 0-7 in { + def WACC_HI#Index : WACC_HI("DMRROWp"#!add(!mul(Index, 4), 2)), + !cast("DMRROWp"#!add(!mul(Index, 4), 3))]>, DwarfRegNum<[-1, -1]>; + } +} + +// DMR0 = WACC0, WACC_HI0 +// DMR1 = WACC1, WACC_HI1 +// DMR2 = WACC2, WACC_HI2 +// etc... +let SubRegIndices = [sub_wacc_lo, sub_wacc_hi] in { + foreach Index = 0-7 in { + def DMR#Index : DMR("WACC"#Index), !cast("WACC_HI"#Index)]>, DwarfRegNum<[-1, -1]>; + } +} + +// DMRp0 = DMR0, DMR1 +// DMRp1 = DMR2, DMR3 +// DMRp2 = DMR4, DMR5 +// DMRp3 = DMR6, DMR7 +let SubRegIndices = [sub_dmr0, sub_dmr1] in { + def DMRp0 : DMRp<0, "dmrp0", [DMR0, DMR1]>, DwarfRegNum<[-1, -1]>; + def DMRp1 : DMRp<1, "dmrp1", [DMR2, DMR3]>, DwarfRegNum<[-1, -1]>; + def DMRp2 : DMRp<2, "dmrp2", [DMR4, DMR5]>, DwarfRegNum<[-1, -1]>; + def DMRp3 : DMRp<3, "dmrp3", [DMR6, DMR7]>, DwarfRegNum<[-1, -1]>; +} + +def DMRROWRC : RegisterClass<"PPC", [v128i1], 128, + (add (sequence "DMRROW%u", 0, 63))> { + let Size = 128; +} + +def DMRROWpRC : RegisterClass<"PPC", [v256i1], 128, + (add (sequence "DMRROWp%u", 0, 31))> { + let Size = 256; +} + +def WACCRC : RegisterClass<"PPC", [v512i1], 128, + (add (sequence "WACC%u", 0, 7))> { + let Size = 512; +} + +def WACC_HIRC : RegisterClass<"PPC", [v512i1], 128, + (add (sequence "WACC_HI%u", 0, 7))> { + let Size = 512; +} + +def DMRRC : RegisterClass<"PPC", [v1024i1], 128, + (add (sequence "DMR%u", 0, 7))> { + let Size = 1024; +} + +def DMRpRC : RegisterClass<"PPC", [v2048i1], 128, + (add DMRp0, DMRp1, DMRp2, DMRp3)> { + let Size = 2048; +} diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP9.td b/llvm/lib/Target/PowerPC/PPCScheduleP9.td index d350111717159..e9f4daa62de3c 100644 --- a/llvm/lib/Target/PowerPC/PPCScheduleP9.td +++ b/llvm/lib/Target/PowerPC/PPCScheduleP9.td @@ -38,10 +38,11 @@ def P9Model : SchedMachineModel { let CompleteModel = 1; - // Do not support SPE (Signal Processing Engine), prefixed instructions on - // Power 9, paired vector mem ops, MMA, PC relative mem ops, or instructions - // introduced in ISA 3.1. - let UnsupportedFeatures = [HasSPE, PrefixInstrs, PairedVectorMemops, MMA, + // Do not support SPE (Signal Processing Engine) or prefixed instructions on + // Power 9, or MMA, or paired vector mem ops, or PC relative mem ops, or + // instructions introduced after ISA 3.0. + let UnsupportedFeatures = [HasSPE, PrefixInstrs, MMA, + PairedVectorMemops, PCRelativeMemops, IsISA3_1, IsISAFuture]; } diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 1b8a4c557afa2..4f07d9ece8f39 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -18,6 +18,7 @@ #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLowering.h" +#include "llvm/Support/TargetParser.h" namespace llvm { class RISCVSubtarget; @@ -325,11 +326,6 @@ enum NodeType : unsigned { }; } // namespace RISCVISD -namespace RISCV { -// We use 64 bits as the known part in the scalable vector types. -static constexpr unsigned RVVBitsPerBlock = 64; -} // namespace RISCV - class RISCVTargetLowering : public TargetLowering { const RISCVSubtarget &Subtarget; diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td index 18b31f85bfdb4..f8de46ea7cfff 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td +++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td @@ -125,12 +125,14 @@ def lookupRISCVOpcodeByName : SearchIndex { } def OPC_LOAD : RISCVOpcode<"LOAD", 0b0000011>; def OPC_LOAD_FP : RISCVOpcode<"LOAD_FP", 0b0000111>; +def OPC_CUSTOM_0 : RISCVOpcode<"CUSTOM_0", 0b0001011>; def OPC_MISC_MEM : RISCVOpcode<"MISC_MEM", 0b0001111>; def OPC_OP_IMM : RISCVOpcode<"OP_IMM", 0b0010011>; def OPC_AUIPC : RISCVOpcode<"AUIPC", 0b0010111>; def OPC_OP_IMM_32 : RISCVOpcode<"OP_IMM_32", 0b0011011>; def OPC_STORE : RISCVOpcode<"STORE", 0b0100011>; def OPC_STORE_FP : RISCVOpcode<"STORE_FP", 0b0100111>; +def OPC_CUSTOM_1 : RISCVOpcode<"CUSTOM_1", 0b0101011>; def OPC_AMO : RISCVOpcode<"AMO", 0b0101111>; def OPC_OP : RISCVOpcode<"OP", 0b0110011>; def OPC_LUI : RISCVOpcode<"LUI", 0b0110111>; @@ -141,10 +143,12 @@ def OPC_NMSUB : RISCVOpcode<"NMSUB", 0b1001011>; def OPC_NMADD : RISCVOpcode<"NMADD", 0b1001111>; def OPC_OP_FP : RISCVOpcode<"OP_FP", 0b1010011>; def OPC_OP_V : RISCVOpcode<"OP_V", 0b1010111>; +def OPC_CUSTOM_2 : RISCVOpcode<"CUSTOM_2", 0b1011011>; def OPC_BRANCH : RISCVOpcode<"BRANCH", 0b1100011>; def OPC_JALR : RISCVOpcode<"JALR", 0b1100111>; def OPC_JAL : RISCVOpcode<"JAL", 0b1101111>; def OPC_SYSTEM : RISCVOpcode<"SYSTEM", 0b1110011>; +def OPC_CUSTOM_3 : RISCVOpcode<"CUSTOM_3", 0b1111011>; class RVInst pattern, InstFormat format> diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td index 65e36e0aa3b8c..1b4813720d97b 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td @@ -402,97 +402,124 @@ multiclass VIndexLoadStore EEWList> { multiclass VALU_IV_V_X_I funct6, Operand optype = simm5, string vw = "v"> { def V : VALUVV, - Sched<[WriteVIALUV, ReadVIALUV, ReadVIALUV, ReadVMask]>; + Sched<[WriteVIALUV_UpperBound, ReadVIALUV_UpperBound, + ReadVIALUV_UpperBound, ReadVMask]>; def X : VALUVX, - Sched<[WriteVIALUX, ReadVIALUV, ReadVIALUX, ReadVMask]>; + Sched<[WriteVIALUX_UpperBound, ReadVIALUV_UpperBound, + ReadVIALUX_UpperBound, ReadVMask]>; def I : VALUVI, - Sched<[WriteVIALUI, ReadVIALUV, ReadVMask]>; + Sched<[WriteVIALUI_UpperBound, ReadVIALUV_UpperBound, + ReadVMask]>; } multiclass VALU_IV_V_X funct6, string vw = "v"> { def V : VALUVV, - Sched<[WriteVIALUV, ReadVIALUV, ReadVIALUV, ReadVMask]>; + Sched<[WriteVIALUV_UpperBound, ReadVIALUV_UpperBound, + ReadVIALUV_UpperBound, ReadVMask]>; def X : VALUVX, - Sched<[WriteVIALUX, ReadVIALUV, ReadVIALUX, ReadVMask]>; + Sched<[WriteVIALUX_UpperBound, ReadVIALUV_UpperBound, + ReadVIALUX_UpperBound, ReadVMask]>; } multiclass VALU_IV_X_I funct6, Operand optype = simm5, string vw = "v"> { def X : VALUVX, - Sched<[WriteVIALUV, ReadVIALUV, ReadVIALUX, ReadVMask]>; + Sched<[WriteVIALUV_UpperBound, ReadVIALUV_UpperBound, + ReadVIALUX_UpperBound, ReadVMask]>; def I : VALUVI, - Sched<[WriteVIALUI, ReadVIALUV, ReadVMask]>; + Sched<[WriteVIALUI_UpperBound, ReadVIALUV_UpperBound, + ReadVMask]>; } multiclass VALU_MV_V_X funct6, string vw = "v"> { def V : VALUVV, - Sched<[WriteVIWALUV, ReadVIWALUV, ReadVIWALUV, ReadVMask]>; + Sched<[WriteVIWALUV_UpperBound, ReadVIWALUV_UpperBound, + ReadVIWALUV_UpperBound, ReadVMask]>; def X : VALUVX, - Sched<[WriteVIWALUX, ReadVIWALUV, ReadVIWALUX, ReadVMask]>; + Sched<[WriteVIWALUX_UpperBound, ReadVIWALUV_UpperBound, + ReadVIWALUX_UpperBound, ReadVMask]>; } multiclass VMAC_MV_V_X funct6, string vw = "v"> { def V : VALUrVV, - Sched<[WriteVIMulAddV, ReadVIMulAddV, ReadVIMulAddV, ReadVMask]>; + Sched<[WriteVIMulAddV_UpperBound, ReadVIMulAddV_UpperBound, + ReadVIMulAddV_UpperBound, ReadVMask]>; def X : VALUrVX, - Sched<[WriteVIMulAddX, ReadVIMulAddV, ReadVIMulAddX, ReadVMask]>; + Sched<[WriteVIMulAddX_UpperBound, ReadVIMulAddV_UpperBound, + ReadVIMulAddX_UpperBound, ReadVMask]>; } multiclass VWMAC_MV_V_X funct6, string vw = "v"> { def V : VALUrVV, - Sched<[WriteVIWMulAddV, ReadVIWMulAddV, ReadVIWMulAddV, ReadVMask]>; + Sched<[WriteVIWMulAddV_UpperBound, ReadVIWMulAddV_UpperBound, + ReadVIWMulAddV_UpperBound, ReadVMask]>; def X : VALUrVX, - Sched<[WriteVIWMulAddX, ReadVIWMulAddV, ReadVIWMulAddX, ReadVMask]>; + Sched<[WriteVIWMulAddX_UpperBound, ReadVIWMulAddV_UpperBound, + ReadVIWMulAddX_UpperBound, ReadVMask]>; } multiclass VWMAC_MV_X funct6, string vw = "v"> { def X : VALUrVX, - Sched<[WriteVIWMulAddX, ReadVIWMulAddV, ReadVIWMulAddX, ReadVMask]>; + Sched<[WriteVIWMulAddX_UpperBound, ReadVIWMulAddV_UpperBound, + ReadVIWMulAddX_UpperBound, ReadVMask]>; } multiclass VALU_MV_VS2 funct6, bits<5> vs1> { def "" : VALUVs2, - Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; + Sched<[WriteVExtV_UpperBound, ReadVExtV_UpperBound, + ReadVMask]>; } multiclass VALUm_IV_V_X_I funct6> { def VM : VALUmVV, - Sched<[WriteVICALUV, ReadVICALUV, ReadVICALUV, ReadVMask]>; + Sched<[WriteVICALUV_UpperBound, ReadVICALUV_UpperBound, + ReadVICALUV_UpperBound, ReadVMask]>; def XM : VALUmVX, - Sched<[WriteVICALUX, ReadVICALUV, ReadVICALUX, ReadVMask]>; + Sched<[WriteVICALUX_UpperBound, ReadVICALUV_UpperBound, + ReadVICALUX_UpperBound, ReadVMask]>; def IM : VALUmVI, - Sched<[WriteVICALUI, ReadVICALUV, ReadVMask]>; + Sched<[WriteVICALUI_UpperBound, ReadVICALUV_UpperBound, + ReadVMask]>; } multiclass VMRG_IV_V_X_I funct6> { def VM : VALUmVV, - Sched<[WriteVIMergeV, ReadVIMergeV, ReadVIMergeV, ReadVMask]>; + Sched<[WriteVIMergeV_UpperBound, ReadVIMergeV_UpperBound, + ReadVIMergeV_UpperBound, ReadVMask]>; def XM : VALUmVX, - Sched<[WriteVIMergeX, ReadVIMergeV, ReadVIMergeX, ReadVMask]>; + Sched<[WriteVIMergeX_UpperBound, ReadVIMergeV_UpperBound, + ReadVIMergeX_UpperBound, ReadVMask]>; def IM : VALUmVI, - Sched<[WriteVIMergeI, ReadVIMergeV, ReadVMask]>; + Sched<[WriteVIMergeI_UpperBound, ReadVIMergeV_UpperBound, + ReadVMask]>; } multiclass VALUm_IV_V_X funct6> { def VM : VALUmVV, - Sched<[WriteVICALUV, ReadVICALUV, ReadVICALUV, ReadVMask]>; + Sched<[WriteVICALUV_UpperBound, ReadVICALUV_UpperBound, + ReadVICALUV_UpperBound, ReadVMask]>; def XM : VALUmVX, - Sched<[WriteVICALUX, ReadVICALUV, ReadVICALUX, ReadVMask]>; + Sched<[WriteVICALUX_UpperBound, ReadVICALUV_UpperBound, + ReadVICALUX_UpperBound, ReadVMask]>; } multiclass VALUNoVm_IV_V_X_I funct6, Operand optype = simm5> { def V : VALUVVNoVm, - Sched<[WriteVICALUV, ReadVICALUV, ReadVICALUV]>; + Sched<[WriteVICALUV_UpperBound, ReadVICALUV_UpperBound, + ReadVICALUV_UpperBound]>; def X : VALUVXNoVm, - Sched<[WriteVICALUX, ReadVICALUV, ReadVICALUX]>; + Sched<[WriteVICALUX_UpperBound, ReadVICALUV_UpperBound + , ReadVICALUX_UpperBound]>; def I : VALUVINoVm, - Sched<[WriteVICALUI, ReadVICALUV]>; + Sched<[WriteVICALUI_UpperBound, ReadVICALUV_UpperBound]>; } multiclass VALUNoVm_IV_V_X funct6> { def V : VALUVVNoVm, - Sched<[WriteVICALUV, ReadVICALUV, ReadVICALUV]>; + Sched<[WriteVICALUV_UpperBound, ReadVICALUV_UpperBound, + ReadVICALUV_UpperBound]>; def X : VALUVXNoVm, - Sched<[WriteVICALUX, ReadVICALUV, ReadVICALUX]>; + Sched<[WriteVICALUX_UpperBound, ReadVICALUV_UpperBound, + ReadVICALUX_UpperBound]>; } multiclass VALU_FV_V_F funct6, string vw = "v"> { @@ -675,64 +702,83 @@ multiclass VMIOT_MV_V funct6, bits<5> vs1> { multiclass VSHT_IV_V_X_I funct6, Operand optype = simm5, string vw = "v"> { def V : VALUVV, - Sched<[WriteVShiftV, ReadVShiftV, ReadVShiftV, ReadVMask]>; + Sched<[WriteVShiftV_UpperBound, ReadVShiftV_UpperBound, + ReadVShiftV_UpperBound, ReadVMask]>; def X : VALUVX, - Sched<[WriteVShiftX, ReadVShiftV, ReadVShiftX, ReadVMask]>; + Sched<[WriteVShiftX_UpperBound, ReadVShiftV_UpperBound, + ReadVShiftX_UpperBound, ReadVMask]>; def I : VALUVI, - Sched<[WriteVShiftI, ReadVShiftV, ReadVMask]>; + Sched<[WriteVShiftI_UpperBound, ReadVShiftV_UpperBound, + ReadVMask]>; } multiclass VNSHT_IV_V_X_I funct6, Operand optype = simm5, string vw = "v"> { def V : VALUVV, - Sched<[WriteVNShiftV, ReadVNShiftV, ReadVNShiftV, ReadVMask]>; + Sched<[WriteVNShiftV_UpperBound, ReadVNShiftV_UpperBound, + ReadVNShiftV_UpperBound, ReadVMask]>; def X : VALUVX, - Sched<[WriteVNShiftX, ReadVNShiftV, ReadVNShiftX, ReadVMask]>; + Sched<[WriteVNShiftX_UpperBound, ReadVNShiftV_UpperBound, + ReadVNShiftX_UpperBound, ReadVMask]>; def I : VALUVI, - Sched<[WriteVNShiftI, ReadVNShiftV, ReadVMask]>; + Sched<[WriteVNShiftI_UpperBound, ReadVNShiftV_UpperBound, + ReadVMask]>; } multiclass VCMP_IV_V_X_I funct6, Operand optype = simm5, string vw = "v"> { def V : VALUVV, - Sched<[WriteVICmpV, ReadVICmpV, ReadVICmpV, ReadVMask]>; + Sched<[WriteVICmpV_UpperBound, ReadVICmpV_UpperBound, + ReadVICmpV_UpperBound, ReadVMask]>; def X : VALUVX, - Sched<[WriteVICmpX, ReadVICmpV, ReadVICmpX, ReadVMask]>; + Sched<[WriteVICmpX_UpperBound, ReadVICmpV_UpperBound, + ReadVICmpX_UpperBound, ReadVMask]>; def I : VALUVI, - Sched<[WriteVICmpI, ReadVICmpV, ReadVMask]>; + Sched<[WriteVICmpI_UpperBound, ReadVICmpV_UpperBound, + ReadVMask]>; } multiclass VCMP_IV_X_I funct6, Operand optype = simm5, string vw = "v"> { def X : VALUVX, - Sched<[WriteVICmpV, ReadVICmpV, ReadVICmpX, ReadVMask]>; + Sched<[WriteVICmpV_UpperBound, ReadVICmpV_UpperBound, + ReadVICmpX_UpperBound, ReadVMask]>; def I : VALUVI, - Sched<[WriteVICmpI, ReadVICmpV, ReadVMask]>; + Sched<[WriteVICmpI_UpperBound, ReadVICmpV_UpperBound, + ReadVMask]>; } multiclass VCMP_IV_V_X funct6, string vw = "v"> { def V : VALUVV, - Sched<[WriteVICmpV, ReadVICmpV, ReadVICmpV, ReadVMask]>; + Sched<[WriteVICmpV_UpperBound, ReadVICmpV_UpperBound, + ReadVICmpV_UpperBound, ReadVMask]>; def X : VALUVX, - Sched<[WriteVICmpX, ReadVICmpV, ReadVICmpX, ReadVMask]>; + Sched<[WriteVICmpX_UpperBound, ReadVICmpV_UpperBound, + ReadVICmpX_UpperBound, ReadVMask]>; } multiclass VMUL_MV_V_X funct6, string vw = "v"> { def V : VALUVV, - Sched<[WriteVIMulV, ReadVIMulV, ReadVIMulV, ReadVMask]>; + Sched<[WriteVIMulV_UpperBound, ReadVIMulV_UpperBound, + ReadVIMulV_UpperBound, ReadVMask]>; def X : VALUVX, - Sched<[WriteVIMulX, ReadVIMulV, ReadVIMulX, ReadVMask]>; + Sched<[WriteVIMulX_UpperBound, ReadVIMulV_UpperBound, + ReadVIMulX_UpperBound, ReadVMask]>; } multiclass VWMUL_MV_V_X funct6, string vw = "v"> { def V : VALUVV, - Sched<[WriteVIWMulV, ReadVIWMulV, ReadVIWMulV, ReadVMask]>; + Sched<[WriteVIWMulV_UpperBound, ReadVIWMulV_UpperBound, + ReadVIWMulV_UpperBound, ReadVMask]>; def X : VALUVX, - Sched<[WriteVIWMulX, ReadVIWMulV, ReadVIWMulX, ReadVMask]>; + Sched<[WriteVIWMulX_UpperBound, ReadVIWMulV_UpperBound, + ReadVIWMulX_UpperBound, ReadVMask]>; } multiclass VDIV_MV_V_X funct6, string vw = "v"> { def V : VALUVV, - Sched<[WriteVIDivV, ReadVIDivV, ReadVIDivV, ReadVMask]>; + Sched<[WriteVIDivV_UpperBound, ReadVIDivV_UpperBound, + ReadVIDivV_UpperBound, ReadVMask]>; def X : VALUVX, - Sched<[WriteVIDivX, ReadVIDivV, ReadVIDivX, ReadVMask]>; + Sched<[WriteVIDivX_UpperBound, ReadVIDivV_UpperBound, + ReadVIDivX_UpperBound, ReadVMask]>; } multiclass VSALU_IV_V_X_I funct6, Operand optype = simm5, string vw = "v"> { @@ -1126,15 +1172,15 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0, vs2 = 0, vm = 1, // op vd, vs1 def VMV_V_V : RVInstVV<0b010111, OPIVV, (outs VR:$vd), (ins VR:$vs1), "vmv.v.v", "$vd, $vs1">, - Sched<[WriteVIMovV, ReadVIMovV]>; + Sched<[WriteVIMovV_UpperBound, ReadVIMovV_UpperBound]>; // op vd, rs1 def VMV_V_X : RVInstVX<0b010111, OPIVX, (outs VR:$vd), (ins GPR:$rs1), "vmv.v.x", "$vd, $rs1">, - Sched<[WriteVIMovX, ReadVIMovX]>; + Sched<[WriteVIMovX_UpperBound, ReadVIMovX_UpperBound]>; // op vd, imm def VMV_V_I : RVInstIVI<0b010111, (outs VR:$vd), (ins simm5:$imm), "vmv.v.i", "$vd, $imm">, - Sched<[WriteVIMovI]>; + Sched<[WriteVIMovI_UpperBound]>; } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 // Vector Fixed-Point Arithmetic Instructions diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 7c765dd3548bc..06169022a0fa5 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -1914,6 +1914,10 @@ multiclass VPseudoBinaryV_VV { defm _VV : VPseudoBinary; } +multiclass VPseudoBinaryV_VV_LMUL { + defm _VV : VPseudoBinary; +} + // Similar to VPseudoBinaryV_VV, but uses MxListF. multiclass VPseudoBinaryFV_VV { foreach m = MxListF in @@ -1941,6 +1945,10 @@ multiclass VPseudoBinaryV_VX { defm "_VX" : VPseudoBinary; } +multiclass VPseudoBinaryV_VX_LMUL { + defm "_VX" : VPseudoBinary; +} + multiclass VPseudoVSLD1_VX { foreach m = MxList in defm "_VX" : VPseudoBinary, @@ -1967,6 +1975,10 @@ multiclass VPseudoBinaryV_VI { defm _VI : VPseudoBinary; } +multiclass VPseudoBinaryV_VI_LMUL { + defm _VI : VPseudoBinary; +} + multiclass VPseudoVALU_MM { foreach m = MxList in let VLMul = m.value in { @@ -1988,10 +2000,14 @@ multiclass VPseudoBinaryW_VV mxlist = MxListW> { "@earlyclobber $rd">; } -multiclass VPseudoBinaryW_VX { - foreach m = MxListW in - defm "_VX" : VPseudoBinary; +multiclass VPseudoBinaryW_VV_LMUL { + defm _VV : VPseudoBinary; +} + +multiclass VPseudoBinaryW_VX_LMUL { + defm "_VX" : VPseudoBinary; } multiclass VPseudoBinaryW_VF { @@ -2011,9 +2027,15 @@ multiclass VPseudoBinaryW_WV mxlist = MxListW> { } } -multiclass VPseudoBinaryW_WX { - foreach m = MxListW in - defm "_WX" : VPseudoBinary; +multiclass VPseudoBinaryW_WV_LMUL { + defm _WV : VPseudoBinary; + defm _WV : VPseudoTiedBinary; +} + +multiclass VPseudoBinaryW_WX_LMUL { + defm "_WX" : VPseudoBinary; } multiclass VPseudoBinaryW_WF { @@ -2034,59 +2056,70 @@ multiclass VPseudoBinaryV_WV { !if(!ge(m.octuple, 8), "@earlyclobber $rd", "")>; } +multiclass VPseudoBinaryV_WV_LMUL { + defm _WV : VPseudoBinary; +} + multiclass VPseudoBinaryV_WX { foreach m = MxListW in defm _WX : VPseudoBinary; } +multiclass VPseudoBinaryV_WX_LMUL { + defm _WX : VPseudoBinary; +} + multiclass VPseudoBinaryV_WI { foreach m = MxListW in defm _WI : VPseudoBinary; } +multiclass VPseudoBinaryV_WI_LMUL { + defm _WI : VPseudoBinary; +} + // For vadc and vsbc, the instruction encoding is reserved if the destination // vector register is v0. // For vadc and vsbc, CarryIn == 1 and CarryOut == 0 -multiclass VPseudoBinaryV_VM { - foreach m = MxList in - def "_VV" # !if(CarryIn, "M", "") # "_" # m.MX : - VPseudoBinaryCarryIn.R, m.vrclass)), - m.vrclass, m.vrclass, m, CarryIn, Constraint>; + def "_VV" # !if(CarryIn, "M", "") # "_" # m.MX : + VPseudoBinaryCarryIn.R, m.vrclass)), + m.vrclass, m.vrclass, m, CarryIn, Constraint>; } -multiclass VPseudoTiedBinaryV_VM { - foreach m = MxList in - def "_VV" # !if(CarryIn, "M", "") # "_" # m.MX # "_TU" : - VPseudoTiedBinaryCarryIn.R, m.vrclass)), - m.vrclass, m.vrclass, m, CarryIn, Constraint>; + def "_VV" # !if(CarryIn, "M", "") # "_" # m.MX # "_TU" : + VPseudoTiedBinaryCarryIn.R, m.vrclass)), + m.vrclass, m.vrclass, m, CarryIn, Constraint>; } -multiclass VPseudoBinaryV_XM { - foreach m = MxList in - def "_VX" # !if(CarryIn, "M", "") # "_" # m.MX : - VPseudoBinaryCarryIn.R, m.vrclass)), - m.vrclass, GPR, m, CarryIn, Constraint>; + def "_VX" # !if(CarryIn, "M", "") # "_" # m.MX : + VPseudoBinaryCarryIn.R, m.vrclass)), + m.vrclass, GPR, m, CarryIn, Constraint>; } -multiclass VPseudoTiedBinaryV_XM { - foreach m = MxList in - def "_VX" # !if(CarryIn, "M", "") # "_" # m.MX # "_TU": - VPseudoTiedBinaryCarryIn.R, m.vrclass)), - m.vrclass, GPR, m, CarryIn, Constraint>; + def "_VX" # !if(CarryIn, "M", "") # "_" # m.MX # "_TU": + VPseudoTiedBinaryCarryIn.R, m.vrclass)), + m.vrclass, GPR, m, CarryIn, Constraint>; } multiclass VPseudoVMRG_FM { @@ -2104,41 +2137,48 @@ multiclass VPseudoVMRG_FM { } } -multiclass VPseudoBinaryV_IM { - foreach m = MxList in - def "_VI" # !if(CarryIn, "M", "") # "_" # m.MX : - VPseudoBinaryCarryIn.R, m.vrclass)), - m.vrclass, simm5, m, CarryIn, Constraint>; + def "_VI" # !if(CarryIn, "M", "") # "_" # m.MX : + VPseudoBinaryCarryIn.R, m.vrclass)), + m.vrclass, simm5, m, CarryIn, Constraint>; } -multiclass VPseudoTiedBinaryV_IM { - foreach m = MxList in - def "_VI" # !if(CarryIn, "M", "") # "_" # m.MX # "_TU": - VPseudoTiedBinaryCarryIn.R, m.vrclass)), - m.vrclass, simm5, m, CarryIn, Constraint>; + def "_VI" # !if(CarryIn, "M", "") # "_" # m.MX # "_TU": + VPseudoTiedBinaryCarryIn.R, m.vrclass)), + m.vrclass, simm5, m, CarryIn, Constraint>; } multiclass VPseudoUnaryVMV_V_X_I { foreach m = MxList in { let VLMul = m.value in { - def "_V_" # m.MX : VPseudoUnaryNoDummyMask, - Sched<[WriteVIMovV, ReadVIMovV]>; - def "_X_" # m.MX : VPseudoUnaryNoDummyMask, - Sched<[WriteVIMovX, ReadVIMovX]>; - def "_I_" # m.MX : VPseudoUnaryNoDummyMask, - Sched<[WriteVIMovI]>; - def "_V_" # m.MX # "_TU": VPseudoUnaryNoDummyMaskTU, - Sched<[WriteVIMovV, ReadVIMovV]>; - def "_X_" # m.MX # "_TU": VPseudoUnaryNoDummyMaskTU, - Sched<[WriteVIMovX, ReadVIMovX]>; - def "_I_" # m.MX # "_TU": VPseudoUnaryNoDummyMaskTU, - Sched<[WriteVIMovI]>; + defvar mx = m.MX; + defvar WriteVIMovV_MX = !cast("WriteVIMovV_" # mx); + defvar WriteVIMovX_MX = !cast("WriteVIMovX_" # mx); + defvar WriteVIMovI_MX = !cast("WriteVIMovI_" # mx); + defvar ReadVIMovV_MX = !cast("ReadVIMovV_" # mx); + defvar ReadVIMovX_MX = !cast("ReadVIMovX_" # mx); + + let VLMul = m.value in { + def "_V_" # mx : VPseudoUnaryNoDummyMask, + Sched<[WriteVIMovV_MX, ReadVIMovV_MX]>; + def "_X_" # mx : VPseudoUnaryNoDummyMask, + Sched<[WriteVIMovX_MX, ReadVIMovX_MX]>; + def "_I_" # mx : VPseudoUnaryNoDummyMask, + Sched<[WriteVIMovI_MX]>; + def "_V_" # mx # "_TU": VPseudoUnaryNoDummyMaskTU, + Sched<[WriteVIMovV_MX, ReadVIMovV_MX]>; + def "_X_" # mx # "_TU": VPseudoUnaryNoDummyMaskTU, + Sched<[WriteVIMovX_MX, ReadVIMovX_MX]>; + def "_I_" # mx # "_TU": VPseudoUnaryNoDummyMaskTU, + Sched<[WriteVIMovI_MX]>; + } } } } @@ -2204,15 +2244,19 @@ multiclass PseudoVEXT_VF2 { defvar constraints = "@earlyclobber $rd"; foreach m = MxListVF2 in { + defvar mx = m.MX; + defvar WriteVExtV_MX = !cast("WriteVExtV_" # mx); + defvar ReadVExtV_MX = !cast("ReadVExtV_" # mx); + let VLMul = m.value in { - def "_" # m.MX : VPseudoUnaryNoMask, - Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; - def "_" # m.MX # "_TU": VPseudoUnaryNoMaskTU, - Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; - def "_" # m.MX # "_MASK" : + def "_" # mx : VPseudoUnaryNoMask, + Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>; + def "_" # mx # "_TU": VPseudoUnaryNoMaskTU, + Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>; + def "_" # mx # "_MASK" : VPseudoUnaryMaskTA, RISCVMaskedPseudo, - Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; + Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>; } } } @@ -2221,15 +2265,19 @@ multiclass PseudoVEXT_VF4 { defvar constraints = "@earlyclobber $rd"; foreach m = MxListVF4 in { + defvar mx = m.MX; + defvar WriteVExtV_MX = !cast("WriteVExtV_" # mx); + defvar ReadVExtV_MX = !cast("ReadVExtV_" # mx); + let VLMul = m.value in { - def "_" # m.MX : VPseudoUnaryNoMask, - Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; - def "_" # m.MX # "_TU": VPseudoUnaryNoMaskTU, - Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; - def "_" # m.MX # "_MASK" : + def "_" # mx : VPseudoUnaryNoMask, + Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>; + def "_" # mx # "_TU": VPseudoUnaryNoMaskTU, + Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>; + def "_" # mx # "_MASK" : VPseudoUnaryMaskTA, RISCVMaskedPseudo, - Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; + Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>; } } } @@ -2238,15 +2286,19 @@ multiclass PseudoVEXT_VF8 { defvar constraints = "@earlyclobber $rd"; foreach m = MxListVF8 in { + defvar mx = m.MX; + defvar WriteVExtV_MX = !cast("WriteVExtV_" # mx); + defvar ReadVExtV_MX = !cast("ReadVExtV_" # mx); + let VLMul = m.value in { - def "_" # m.MX : VPseudoUnaryNoMask, - Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; - def "_" # m.MX # "_TU": VPseudoUnaryNoMaskTU, - Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; - def "_" # m.MX # "_MASK" : + def "_" # mx : VPseudoUnaryNoMask, + Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>; + def "_" # mx # "_TU": VPseudoUnaryNoMaskTU, + Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>; + def "_" # mx # "_MASK" : VPseudoUnaryMaskTA, RISCVMaskedPseudo, - Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; + Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>; } } } @@ -2268,11 +2320,15 @@ multiclass VPseudoBinaryM_VV mxlist = MxList> { !if(!ge(m.octuple, 16), "@earlyclobber $rd", "")>; } -multiclass VPseudoBinaryM_VX { - foreach m = MxList in - defm "_VX" : - VPseudoBinaryM; +multiclass VPseudoBinaryM_VV_LMUL { + defm _VV : VPseudoBinaryM; +} + +multiclass VPseudoBinaryM_VX { + defm "_VX" : + VPseudoBinaryM; } multiclass VPseudoBinaryM_VF { @@ -2283,10 +2339,9 @@ multiclass VPseudoBinaryM_VF { !if(!ge(m.octuple, 16), "@earlyclobber $rd", "")>; } -multiclass VPseudoBinaryM_VI { - foreach m = MxList in - defm _VI : VPseudoBinaryM; +multiclass VPseudoBinaryM_VI { + defm _VI : VPseudoBinaryM; } multiclass VPseudoVGTR_VV_VX_VI { @@ -2309,12 +2364,21 @@ multiclass VPseudoVSALU_VV_VX_VI { - defm "" : VPseudoBinaryV_VV, - Sched<[WriteVShiftV, ReadVShiftV, ReadVShiftV, ReadVMask]>; - defm "" : VPseudoBinaryV_VX, - Sched<[WriteVShiftX, ReadVShiftV, ReadVShiftX, ReadVMask]>; - defm "" : VPseudoBinaryV_VI, - Sched<[WriteVShiftI, ReadVShiftV, ReadVMask]>; + foreach m = MxList in { + defvar mx = m.MX; + defvar WriteVShiftV_MX = !cast("WriteVShiftV_" # mx); + defvar WriteVShiftX_MX = !cast("WriteVShiftX_" # mx); + defvar WriteVShiftI_MX = !cast("WriteVShiftI_" # mx); + defvar ReadVShiftV_MX = !cast("ReadVShiftV_" # mx); + defvar ReadVShiftX_MX = !cast("ReadVShiftX_" # mx); + + defm "" : VPseudoBinaryV_VV_LMUL, + Sched<[WriteVShiftV_MX, ReadVShiftV_MX, ReadVShiftV_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_VX_LMUL, + Sched<[WriteVShiftX_MX, ReadVShiftV_MX, ReadVShiftX_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_VI_LMUL, + Sched<[WriteVShiftI_MX, ReadVShiftV_MX, ReadVMask]>; + } } multiclass VPseudoVSSHT_VV_VX_VI { @@ -2327,12 +2391,21 @@ multiclass VPseudoVSSHT_VV_VX_VI { - defm "" : VPseudoBinaryV_VV, - Sched<[WriteVIALUV, ReadVIALUV, ReadVIALUV, ReadVMask]>; - defm "" : VPseudoBinaryV_VX, - Sched<[WriteVIALUX, ReadVIALUV, ReadVIALUX, ReadVMask]>; - defm "" : VPseudoBinaryV_VI, - Sched<[WriteVIALUI, ReadVIALUV, ReadVMask]>; + foreach m = MxList in { + defvar mx = m.MX; + defvar WriteVIALUV_MX = !cast("WriteVIALUV_" # mx); + defvar WriteVIALUX_MX = !cast("WriteVIALUX_" # mx); + defvar WriteVIALUI_MX = !cast("WriteVIALUI_" # mx); + defvar ReadVIALUV_MX = !cast("ReadVIALUV_" # mx); + defvar ReadVIALUX_MX = !cast("ReadVIALUX_" # mx); + + defm "" : VPseudoBinaryV_VV_LMUL, + Sched<[WriteVIALUV_MX, ReadVIALUV_MX, ReadVIALUV_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_VX_LMUL, + Sched<[WriteVIALUX_MX, ReadVIALUV_MX, ReadVIALUX_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_VI_LMUL, + Sched<[WriteVIALUI_MX, ReadVIALUV_MX, ReadVMask]>; + } } multiclass VPseudoVSALU_VV_VX { @@ -2357,24 +2430,48 @@ multiclass VPseudoVAALU_VV_VX { } multiclass VPseudoVMINMAX_VV_VX { - defm "" : VPseudoBinaryV_VV, - Sched<[WriteVICmpV, ReadVICmpV, ReadVICmpV, ReadVMask]>; - defm "" : VPseudoBinaryV_VX, - Sched<[WriteVICmpX, ReadVICmpV, ReadVICmpX, ReadVMask]>; + foreach m = MxList in { + defvar mx = m.MX; + defvar WriteVICmpV_MX = !cast("WriteVICmpV_" # mx); + defvar WriteVICmpX_MX = !cast("WriteVICmpX_" # mx); + defvar ReadVICmpV_MX = !cast("ReadVICmpV_" # mx); + defvar ReadVICmpX_MX = !cast("ReadVICmpX_" # mx); + + defm "" : VPseudoBinaryV_VV_LMUL, + Sched<[WriteVICmpV_MX, ReadVICmpV_MX, ReadVICmpV_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_VX_LMUL, + Sched<[WriteVICmpX_MX, ReadVICmpV_MX, ReadVICmpX_MX, ReadVMask]>; + } } multiclass VPseudoVMUL_VV_VX { - defm "" : VPseudoBinaryV_VV, - Sched<[WriteVIMulV, ReadVIMulV, ReadVIMulV, ReadVMask]>; - defm "" : VPseudoBinaryV_VX, - Sched<[WriteVIMulX, ReadVIMulV, ReadVIMulX, ReadVMask]>; + foreach m = MxList in { + defvar mx = m.MX; + defvar WriteVIMulV_MX = !cast("WriteVIMulV_" # mx); + defvar WriteVIMulX_MX = !cast("WriteVIMulX_" # mx); + defvar ReadVIMulV_MX = !cast("ReadVIMulV_" # mx); + defvar ReadVIMulX_MX = !cast("ReadVIMulX_" # mx); + + defm "" : VPseudoBinaryV_VV_LMUL, + Sched<[WriteVIMulV_MX, ReadVIMulV_MX, ReadVIMulV_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_VX_LMUL, + Sched<[WriteVIMulX_MX, ReadVIMulV_MX, ReadVIMulX_MX, ReadVMask]>; + } } multiclass VPseudoVDIV_VV_VX { - defm "" : VPseudoBinaryV_VV, - Sched<[WriteVIDivV, ReadVIDivV, ReadVIDivV, ReadVMask]>; - defm "" : VPseudoBinaryV_VX, - Sched<[WriteVIDivX, ReadVIDivV, ReadVIDivX, ReadVMask]>; + foreach m = MxList in { + defvar mx = m.MX; + defvar WriteVIDivV_MX = !cast("WriteVIDivV_" # mx); + defvar WriteVIDivX_MX = !cast("WriteVIDivX_" # mx); + defvar ReadVIDivV_MX = !cast("ReadVIDivV_" # mx); + defvar ReadVIDivX_MX = !cast("ReadVIDivX_" # mx); + + defm "" : VPseudoBinaryV_VV_LMUL, + Sched<[WriteVIDivV_MX, ReadVIDivV_MX, ReadVIDivV_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_VX_LMUL, + Sched<[WriteVIDivX_MX, ReadVIDivV_MX, ReadVIDivX_MX, ReadVMask]>; + } } multiclass VPseudoVFMUL_VV_VF { @@ -2397,10 +2494,18 @@ multiclass VPseudoVFRDIV_VF { } multiclass VPseudoVALU_VV_VX { - defm "" : VPseudoBinaryV_VV, - Sched<[WriteVIALUV, ReadVIALUV, ReadVIALUV, ReadVMask]>; - defm "" : VPseudoBinaryV_VX, - Sched<[WriteVIALUX, ReadVIALUV, ReadVIALUX, ReadVMask]>; + foreach m = MxList in { + defvar mx = m.MX; + defvar WriteVIALUV_MX = !cast("WriteVIALUV_" # mx); + defvar WriteVIALUX_MX = !cast("WriteVIALUV_" # mx); + defvar ReadVIALUV_MX = !cast("ReadVIALUV_" # mx); + defvar ReadVIALUX_MX = !cast("ReadVIALUX_" # mx); + + defm "" : VPseudoBinaryV_VV_LMUL, + Sched<[WriteVIALUV_MX, ReadVIALUV_MX, ReadVIALUV_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_VX_LMUL, + Sched<[WriteVIALUX_MX, ReadVIALUV_MX, ReadVIALUX_MX, ReadVMask]>; + } } multiclass VPseudoVSGNJ_VV_VF { @@ -2430,24 +2535,48 @@ multiclass VPseudoVALU_VF { } multiclass VPseudoVALU_VX_VI { - defm "" : VPseudoBinaryV_VX, - Sched<[WriteVIALUX, ReadVIALUV, ReadVIALUX, ReadVMask]>; - defm "" : VPseudoBinaryV_VI, - Sched<[WriteVIALUI, ReadVIALUV, ReadVMask]>; + foreach m = MxList in { + defvar mx = m.MX; + defvar WriteVIALUX_MX = !cast("WriteVIALUX_" # mx); + defvar WriteVIALUI_MX = !cast("WriteVIALUI_" # mx); + defvar ReadVIALUV_MX = !cast("ReadVIALUV_" # mx); + defvar ReadVIALUX_MX = !cast("ReadVIALUX_" # mx); + + defm "" : VPseudoBinaryV_VX_LMUL, + Sched<[WriteVIALUX_MX, ReadVIALUV_MX, ReadVIALUX_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_VI_LMUL, + Sched<[WriteVIALUI_MX, ReadVIALUV_MX, ReadVMask]>; + } } multiclass VPseudoVWALU_VV_VX { - defm "" : VPseudoBinaryW_VV, - Sched<[WriteVIWALUV, ReadVIWALUV, ReadVIWALUV, ReadVMask]>; - defm "" : VPseudoBinaryW_VX, - Sched<[WriteVIWALUX, ReadVIWALUV, ReadVIWALUX, ReadVMask]>; + foreach m = MxListW in { + defvar mx = m.MX; + defvar WriteVIWALUV_MX = !cast("WriteVIWALUV_" # mx); + defvar WriteVIWALUX_MX = !cast("WriteVIWALUX_" # mx); + defvar ReadVIWALUV_MX = !cast("ReadVIWALUV_" # mx); + defvar ReadVIWALUX_MX = !cast("ReadVIWALUX_" # mx); + + defm "" : VPseudoBinaryW_VV_LMUL, + Sched<[WriteVIWALUV_MX, ReadVIWALUV_MX, ReadVIWALUV_MX, ReadVMask]>; + defm "" : VPseudoBinaryW_VX_LMUL, + Sched<[WriteVIWALUX_MX, ReadVIWALUV_MX, ReadVIWALUX_MX, ReadVMask]>; + } } multiclass VPseudoVWMUL_VV_VX { - defm "" : VPseudoBinaryW_VV, - Sched<[WriteVIWMulV, ReadVIWMulV, ReadVIWMulV, ReadVMask]>; - defm "" : VPseudoBinaryW_VX, - Sched<[WriteVIWMulX, ReadVIWMulV, ReadVIWMulX, ReadVMask]>; + foreach m = MxListW in { + defvar mx = m.MX; + defvar WriteVIWMulV_MX = !cast("WriteVIWMulV_" # mx); + defvar WriteVIWMulX_MX = !cast("WriteVIWMulX_" # mx); + defvar ReadVIWMulV_MX = !cast("ReadVIWMulV_" # mx); + defvar ReadVIWMulX_MX = !cast("ReadVIWMulX_" # mx); + + defm "" : VPseudoBinaryW_VV_LMUL, + Sched<[WriteVIWMulV_MX, ReadVIWMulV_MX, ReadVIWMulV_MX, ReadVMask]>; + defm "" : VPseudoBinaryW_VX_LMUL, + Sched<[WriteVIWMulX_MX, ReadVIWMulV_MX, ReadVIWMulX_MX, ReadVMask]>; + } } multiclass VPseudoVWMUL_VV_VF { @@ -2458,10 +2587,18 @@ multiclass VPseudoVWMUL_VV_VF { } multiclass VPseudoVWALU_WV_WX { - defm "" : VPseudoBinaryW_WV, - Sched<[WriteVIWALUV, ReadVIWALUV, ReadVIWALUV, ReadVMask]>; - defm "" : VPseudoBinaryW_WX, - Sched<[WriteVIWALUX, ReadVIWALUV, ReadVIWALUX, ReadVMask]>; + foreach m = MxListW in { + defvar mx = m.MX; + defvar WriteVIWALUV_MX = !cast("WriteVIWALUV_" # mx); + defvar WriteVIWALUX_MX = !cast("WriteVIWALUX_" # mx); + defvar ReadVIWALUV_MX = !cast("ReadVIWALUV_" # mx); + defvar ReadVIWALUX_MX = !cast("ReadVIWALUX_" # mx); + + defm "" : VPseudoBinaryW_WV_LMUL, + Sched<[WriteVIWALUV_MX, ReadVIWALUV_MX, ReadVIWALUV_MX, ReadVMask]>; + defm "" : VPseudoBinaryW_WX_LMUL, + Sched<[WriteVIWALUX_MX, ReadVIWALUV_MX, ReadVIWALUX_MX, ReadVMask]>; + } } multiclass VPseudoVFWALU_VV_VF { @@ -2479,79 +2616,139 @@ multiclass VPseudoVFWALU_WV_WF { } multiclass VPseudoVMRG_VM_XM_IM { - defm "" : VPseudoBinaryV_VM, - Sched<[WriteVIMergeV, ReadVIMergeV, ReadVIMergeV, ReadVMask]>; - defm "" : VPseudoBinaryV_XM, - Sched<[WriteVIMergeX, ReadVIMergeV, ReadVIMergeX, ReadVMask]>; - defm "" : VPseudoBinaryV_IM, - Sched<[WriteVIMergeI, ReadVIMergeV, ReadVMask]>; - // Tied versions to allow codegen control over the tail elements - defm "" : VPseudoTiedBinaryV_VM, - Sched<[WriteVIMergeV, ReadVIMergeV, ReadVIMergeV, ReadVMask]>; - defm "" : VPseudoTiedBinaryV_XM, - Sched<[WriteVIMergeX, ReadVIMergeV, ReadVIMergeX, ReadVMask]>; - defm "" : VPseudoTiedBinaryV_IM, - Sched<[WriteVIMergeI, ReadVIMergeV, ReadVMask]>; + foreach m = MxList in { + defvar mx = m.MX; + defvar WriteVIMergeV_MX = !cast("WriteVIMergeV_" # mx); + defvar WriteVIMergeX_MX = !cast("WriteVIMergeX_" # mx); + defvar WriteVIMergeI_MX = !cast("WriteVIMergeI_" # mx); + defvar ReadVIMergeV_MX = !cast("ReadVIMergeV_" # mx); + defvar ReadVIMergeX_MX = !cast("ReadVIMergeX_" # mx); + + defm "" : VPseudoBinaryV_VM, + Sched<[WriteVIMergeV_MX, ReadVIMergeV_MX, ReadVIMergeV_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_XM, + Sched<[WriteVIMergeX_MX, ReadVIMergeV_MX, ReadVIMergeX_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_IM, + Sched<[WriteVIMergeI_MX, ReadVIMergeV_MX, ReadVMask]>; + // Tied versions to allow codegen control over the tail elements + defm "" : VPseudoTiedBinaryV_VM, + Sched<[WriteVIMergeV_MX, ReadVIMergeV_MX, ReadVIMergeV_MX, ReadVMask]>; + defm "" : VPseudoTiedBinaryV_XM, + Sched<[WriteVIMergeX_MX, ReadVIMergeV_MX, ReadVIMergeX_MX, ReadVMask]>; + defm "" : VPseudoTiedBinaryV_IM, + Sched<[WriteVIMergeI_MX, ReadVIMergeV_MX, ReadVMask]>; + } } multiclass VPseudoVCALU_VM_XM_IM { - defm "" : VPseudoBinaryV_VM, - Sched<[WriteVICALUV, ReadVICALUV, ReadVICALUV, ReadVMask]>; - defm "" : VPseudoBinaryV_XM, - Sched<[WriteVICALUX, ReadVICALUV, ReadVICALUX, ReadVMask]>; - defm "" : VPseudoBinaryV_IM, - Sched<[WriteVICALUI, ReadVICALUV, ReadVMask]>; - // Tied versions to allow codegen control over the tail elements - defm "" : VPseudoTiedBinaryV_VM, - Sched<[WriteVICALUV, ReadVICALUV, ReadVICALUV, ReadVMask]>; - defm "" : VPseudoTiedBinaryV_XM, - Sched<[WriteVICALUX, ReadVICALUV, ReadVICALUX, ReadVMask]>; - defm "" : VPseudoTiedBinaryV_IM, - Sched<[WriteVICALUI, ReadVICALUV, ReadVMask]>; + foreach m = MxList in { + defvar mx = m.MX; + defvar WriteVICALUV_MX = !cast("WriteVICALUV_" # mx); + defvar WriteVICALUX_MX = !cast("WriteVICALUX_" # mx); + defvar WriteVICALUI_MX = !cast("WriteVICALUI_" # mx); + defvar ReadVICALUV_MX = !cast("ReadVICALUV_" # mx); + defvar ReadVICALUX_MX = !cast("ReadVICALUX_" # mx); + + defm "" : VPseudoBinaryV_VM, + Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_XM, + Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_IM, + Sched<[WriteVICALUI_MX, ReadVICALUV_MX, ReadVMask]>; + // Tied versions to allow codegen control over the tail elements + defm "" : VPseudoTiedBinaryV_VM, + Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX, ReadVMask]>; + defm "" : VPseudoTiedBinaryV_XM, + Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX, ReadVMask]>; + defm "" : VPseudoTiedBinaryV_IM, + Sched<[WriteVICALUI_MX, ReadVICALUV_MX, ReadVMask]>; + } } multiclass VPseudoVCALU_VM_XM { - defm "" : VPseudoBinaryV_VM, - Sched<[WriteVICALUV, ReadVICALUV, ReadVICALUV, ReadVMask]>; - defm "" : VPseudoBinaryV_XM, - Sched<[WriteVICALUX, ReadVICALUV, ReadVICALUX, ReadVMask]>; - // Tied versions to allow codegen control over the tail elements - defm "" : VPseudoTiedBinaryV_VM, - Sched<[WriteVICALUV, ReadVICALUV, ReadVICALUV, ReadVMask]>; - defm "" : VPseudoTiedBinaryV_XM, - Sched<[WriteVICALUX, ReadVICALUV, ReadVICALUX, ReadVMask]>; + foreach m = MxList in { + defvar mx = m.MX; + defvar WriteVICALUV_MX = !cast("WriteVICALUV_" # mx); + defvar WriteVICALUX_MX = !cast("WriteVICALUX_" # mx); + defvar ReadVICALUV_MX = !cast("ReadVICALUV_" # mx); + defvar ReadVICALUX_MX = !cast("ReadVICALUX_" # mx); + + defm "" : VPseudoBinaryV_VM, + Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_XM, + Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX, ReadVMask]>; + // Tied versions to allow codegen control over the tail elements + defm "" : VPseudoTiedBinaryV_VM, + Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX, ReadVMask]>; + defm "" : VPseudoTiedBinaryV_XM, + Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX, ReadVMask]>; + } } multiclass VPseudoVCALUM_VM_XM_IM { - defm "" : VPseudoBinaryV_VM, - Sched<[WriteVICALUV, ReadVICALUV, ReadVICALUV, ReadVMask]>; - defm "" : VPseudoBinaryV_XM, - Sched<[WriteVICALUX, ReadVICALUV, ReadVICALUX, ReadVMask]>; - defm "" : VPseudoBinaryV_IM, - Sched<[WriteVICALUI, ReadVICALUV, ReadVMask]>; + foreach m = MxList in { + defvar mx = m.MX; + defvar WriteVICALUV_MX = !cast("WriteVICALUV_" # mx); + defvar WriteVICALUX_MX = !cast("WriteVICALUX_" # mx); + defvar WriteVICALUI_MX = !cast("WriteVICALUI_" # mx); + defvar ReadVICALUV_MX = !cast("ReadVICALUV_" # mx); + defvar ReadVICALUX_MX = !cast("ReadVICALUX_" # mx); + + defm "" : VPseudoBinaryV_VM, + Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_XM, + Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_IM, + Sched<[WriteVICALUI_MX, ReadVICALUV_MX, ReadVMask]>; + } } multiclass VPseudoVCALUM_VM_XM { - defm "" : VPseudoBinaryV_VM, - Sched<[WriteVICALUV, ReadVICALUV, ReadVICALUV, ReadVMask]>; - defm "" : VPseudoBinaryV_XM, - Sched<[WriteVICALUX, ReadVICALUV, ReadVICALUX, ReadVMask]>; + foreach m = MxList in { + defvar mx = m.MX; + defvar WriteVICALUV_MX = !cast("WriteVICALUV_" # mx); + defvar WriteVICALUX_MX = !cast("WriteVICALUX_" # mx); + defvar ReadVICALUV_MX = !cast("ReadVICALUV_" # mx); + defvar ReadVICALUX_MX = !cast("ReadVICALUX_" # mx); + + defm "" : VPseudoBinaryV_VM, + Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_XM, + Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX, ReadVMask]>; + } } multiclass VPseudoVCALUM_V_X_I { - defm "" : VPseudoBinaryV_VM, - Sched<[WriteVICALUV, ReadVICALUV, ReadVICALUV]>; - defm "" : VPseudoBinaryV_XM, - Sched<[WriteVICALUX, ReadVICALUV, ReadVICALUX]>; - defm "" : VPseudoBinaryV_IM, - Sched<[WriteVICALUI, ReadVICALUV]>; + foreach m = MxList in { + defvar mx = m.MX; + defvar WriteVICALUV_MX = !cast("WriteVICALUV_" # mx); + defvar WriteVICALUX_MX = !cast("WriteVICALUX_" # mx); + defvar WriteVICALUI_MX = !cast("WriteVICALUI_" # mx); + defvar ReadVICALUV_MX = !cast("ReadVICALUV_" # mx); + defvar ReadVICALUX_MX = !cast("ReadVICALUX_" # mx); + + defm "" : VPseudoBinaryV_VM, + Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX]>; + defm "" : VPseudoBinaryV_XM, + Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX]>; + defm "" : VPseudoBinaryV_IM, + Sched<[WriteVICALUI_MX, ReadVICALUV_MX]>; + } } multiclass VPseudoVCALUM_V_X { - defm "" : VPseudoBinaryV_VM, - Sched<[WriteVICALUV, ReadVICALUV, ReadVICALUV]>; - defm "" : VPseudoBinaryV_XM, - Sched<[WriteVICALUX, ReadVICALUV, ReadVICALUX]>; + foreach m = MxList in { + defvar mx = m.MX; + defvar WriteVICALUV_MX = !cast("WriteVICALUV_" # mx); + defvar WriteVICALUX_MX = !cast("WriteVICALUX_" # mx); + defvar ReadVICALUV_MX = !cast("ReadVICALUV_" # mx); + defvar ReadVICALUX_MX = !cast("ReadVICALUX_" # mx); + + defm "" : VPseudoBinaryV_VM, + Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX]>; + defm "" : VPseudoBinaryV_XM, + Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX]>; + } } multiclass VPseudoVNCLP_WV_WX_WI { @@ -2564,12 +2761,21 @@ multiclass VPseudoVNCLP_WV_WX_WI { } multiclass VPseudoVNSHT_WV_WX_WI { - defm "" : VPseudoBinaryV_WV, - Sched<[WriteVNShiftV, ReadVNShiftV, ReadVNShiftV, ReadVMask]>; - defm "" : VPseudoBinaryV_WX, - Sched<[WriteVNShiftX, ReadVNShiftV, ReadVNShiftX, ReadVMask]>; - defm "" : VPseudoBinaryV_WI, - Sched<[WriteVNShiftI, ReadVNShiftV, ReadVMask]>; + foreach m = MxListW in { + defvar mx = m.MX; + defvar WriteVNShiftV_MX = !cast("WriteVNShiftV_" # mx); + defvar WriteVNShiftX_MX = !cast("WriteVNShiftX_" # mx); + defvar WriteVNShiftI_MX = !cast("WriteVNShiftI_" # mx); + defvar ReadVNShiftV_MX = !cast("ReadVNShiftV_" # mx); + defvar ReadVNShiftX_MX = !cast("ReadVNShiftX_" # mx); + + defm "" : VPseudoBinaryV_WV_LMUL, + Sched<[WriteVNShiftV_MX, ReadVNShiftV_MX, ReadVNShiftV_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_WX_LMUL, + Sched<[WriteVNShiftX_MX, ReadVNShiftV_MX, ReadVNShiftX_MX, ReadVMask]>; + defm "" : VPseudoBinaryV_WI_LMUL, + Sched<[WriteVNShiftI_MX, ReadVNShiftV_MX, ReadVMask]>; + } } multiclass VPseudoTernary { + defm _VV : VPseudoTernaryWithPolicy; +} + multiclass VPseudoVSLDV_VX { foreach m = MxList in defm _VX : VPseudoTernaryWithPolicy; } -multiclass VPseudoTernaryV_VX_AAXA { - foreach m = MxList in - defm "_VX" : VPseudoTernaryWithPolicy; +multiclass VPseudoTernaryV_VX_AAXA { + defm "_VX" : VPseudoTernaryWithPolicy; } multiclass VPseudoTernaryV_VF_AAXA { @@ -2643,11 +2853,16 @@ multiclass VPseudoTernaryW_VV mxlist = MxListW> { constraint>; } -multiclass VPseudoTernaryW_VX { +multiclass VPseudoTernaryW_VV_LMUL { defvar constraint = "@earlyclobber $rd"; - foreach m = MxListW in - defm "_VX" : VPseudoTernaryWithPolicy; + defm _VV : VPseudoTernaryWithPolicy; +} + +multiclass VPseudoTernaryW_VX { + defvar constraint = "@earlyclobber $rd"; + defm "_VX" : VPseudoTernaryWithPolicy; } multiclass VPseudoTernaryW_VF { @@ -2664,10 +2879,20 @@ multiclass VPseudoVSLDV_VI { } multiclass VPseudoVMAC_VV_VX_AAXA { - defm "" : VPseudoTernaryV_VV_AAXA, - Sched<[WriteVIMulAddV, ReadVIMulAddV, ReadVIMulAddV, ReadVIMulAddV, ReadVMask]>; - defm "" : VPseudoTernaryV_VX_AAXA, - Sched<[WriteVIMulAddX, ReadVIMulAddV, ReadVIMulAddV, ReadVIMulAddX, ReadVMask]>; + foreach m = MxList in { + defvar mx = m.MX; + defvar WriteVIMulAddV_MX = !cast("WriteVIMulAddV_" # mx); + defvar WriteVIMulAddX_MX = !cast("WriteVIMulAddX_" # mx); + defvar ReadVIMulAddV_MX = !cast("ReadVIMulAddV_" # mx); + defvar ReadVIMulAddX_MX = !cast("ReadVIMulAddX_" # mx); + + defm "" : VPseudoTernaryV_VV_AAXA_LMUL, + Sched<[WriteVIMulAddV_MX, ReadVIMulAddV_MX, ReadVIMulAddV_MX, + ReadVIMulAddV_MX, ReadVMask]>; + defm "" : VPseudoTernaryV_VX_AAXA, + Sched<[WriteVIMulAddX_MX, ReadVIMulAddV_MX, ReadVIMulAddV_MX, + ReadVIMulAddX_MX, ReadVMask]>; + } } multiclass VPseudoVMAC_VV_VF_AAXA { @@ -2685,15 +2910,33 @@ multiclass VPseudoVSLD_VX_VI { } multiclass VPseudoVWMAC_VV_VX { - defm "" : VPseudoTernaryW_VV, - Sched<[WriteVIWMulAddV, ReadVIWMulAddV, ReadVIWMulAddV, ReadVIWMulAddV, ReadVMask]>; - defm "" : VPseudoTernaryW_VX, - Sched<[WriteVIWMulAddX, ReadVIWMulAddV, ReadVIWMulAddV, ReadVIWMulAddX, ReadVMask]>; + foreach m = MxListW in { + defvar mx = m.MX; + defvar WriteVIWMulAddV_MX = !cast("WriteVIWMulAddV_" # mx); + defvar WriteVIWMulAddX_MX = !cast("WriteVIWMulAddX_" # mx); + defvar ReadVIWMulAddV_MX = !cast("ReadVIWMulAddV_" # mx); + defvar ReadVIWMulAddX_MX = !cast("ReadVIWMulAddX_" # mx); + + defm "" : VPseudoTernaryW_VV_LMUL, + Sched<[WriteVIWMulAddV_MX, ReadVIWMulAddV_MX, ReadVIWMulAddV_MX, + ReadVIWMulAddV_MX, ReadVMask]>; + defm "" : VPseudoTernaryW_VX, + Sched<[WriteVIWMulAddX_MX, ReadVIWMulAddV_MX, ReadVIWMulAddV_MX, + ReadVIWMulAddX_MX, ReadVMask]>; + } } multiclass VPseudoVWMAC_VX { - defm "" : VPseudoTernaryW_VX, - Sched<[WriteVIWMulAddX, ReadVIWMulAddV, ReadVIWMulAddV, ReadVIWMulAddX, ReadVMask]>; + foreach m = MxListW in { + defvar mx = m.MX; + defvar WriteVIWMulAddX_MX = !cast("WriteVIWMulAddX_" # mx); + defvar ReadVIWMulAddV_MX= !cast("ReadVIWMulAddV_" # mx); + defvar ReadVIWMulAddX_MX = !cast("ReadVIWMulAddX_" # mx); + + defm "" : VPseudoTernaryW_VX, + Sched<[WriteVIWMulAddX_MX, ReadVIWMulAddV_MX, ReadVIWMulAddV_MX, + ReadVIWMulAddX_MX, ReadVMask]>; + } } multiclass VPseudoVWMAC_VV_VF { @@ -2704,19 +2947,36 @@ multiclass VPseudoVWMAC_VV_VF { } multiclass VPseudoVCMPM_VV_VX_VI { - defm "" : VPseudoBinaryM_VV, - Sched<[WriteVICmpV, ReadVICmpV, ReadVICmpV, ReadVMask]>; - defm "" : VPseudoBinaryM_VX, - Sched<[WriteVICmpX, ReadVICmpV, ReadVICmpX, ReadVMask]>; - defm "" : VPseudoBinaryM_VI, - Sched<[WriteVICmpI, ReadVICmpV, ReadVMask]>; + foreach m = MxList in { + defvar mx = m.MX; + defvar WriteVICmpV_MX = !cast("WriteVICmpV_" # mx); + defvar WriteVICmpX_MX = !cast("WriteVICmpX_" # mx); + defvar WriteVICmpI_MX = !cast("WriteVICmpI_" # mx); + defvar ReadVICmpV_MX = !cast("ReadVICmpV_" # mx); + defvar ReadVICmpX_MX = !cast("ReadVICmpX_" # mx); + + defm "" : VPseudoBinaryM_VV_LMUL, + Sched<[WriteVICmpV_MX, ReadVICmpV_MX, ReadVICmpV_MX, ReadVMask]>; + defm "" : VPseudoBinaryM_VX, + Sched<[WriteVICmpX_MX, ReadVICmpV_MX, ReadVICmpX_MX, ReadVMask]>; + defm "" : VPseudoBinaryM_VI, + Sched<[WriteVICmpI_MX, ReadVICmpV_MX, ReadVMask]>; + } } multiclass VPseudoVCMPM_VV_VX { - defm "" : VPseudoBinaryM_VV, - Sched<[WriteVICmpV, ReadVICmpV, ReadVICmpV, ReadVMask]>; - defm "" : VPseudoBinaryM_VX, - Sched<[WriteVICmpX, ReadVICmpV, ReadVICmpX, ReadVMask]>; + foreach m = MxList in { + defvar mx = m.MX; + defvar WriteVICmpV_MX = !cast("WriteVICmpV_" # mx); + defvar WriteVICmpX_MX = !cast("WriteVICmpX_" # mx); + defvar ReadVICmpV_MX = !cast("ReadVICmpV_" # mx); + defvar ReadVICmpX_MX = !cast("ReadVICmpX_" # mx); + + defm "" : VPseudoBinaryM_VV_LMUL, + Sched<[WriteVICmpV_MX, ReadVICmpV_MX, ReadVICmpV_MX, ReadVMask]>; + defm "" : VPseudoBinaryM_VX, + Sched<[WriteVICmpX_MX, ReadVICmpV_MX, ReadVICmpX_MX, ReadVMask]>; + } } multiclass VPseudoVCMPM_VV_VF { @@ -2732,10 +2992,18 @@ multiclass VPseudoVCMPM_VF { } multiclass VPseudoVCMPM_VX_VI { - defm "" : VPseudoBinaryM_VX, - Sched<[WriteVICmpX, ReadVICmpV, ReadVICmpX, ReadVMask]>; - defm "" : VPseudoBinaryM_VI, - Sched<[WriteVICmpI, ReadVICmpV, ReadVMask]>; + foreach m = MxList in { + defvar mx = m.MX; + defvar WriteVICmpX_MX = !cast("WriteVICmpX_" # mx); + defvar WriteVICmpI_MX = !cast("WriteVICmpI_" # mx); + defvar ReadVICmpV_MX = !cast("ReadVICmpV_" # mx); + defvar ReadVICmpX_MX = !cast("ReadVICmpX_" # mx); + + defm "" : VPseudoBinaryM_VX, + Sched<[WriteVICmpX_MX, ReadVICmpV_MX, ReadVICmpX_MX, ReadVMask]>; + defm "" : VPseudoBinaryM_VI, + Sched<[WriteVICmpI_MX, ReadVICmpV_MX, ReadVMask]>; + } } multiclass VPseudoVRED_VS { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index 50ecd8b8da998..61476dd43d296 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -101,12 +101,6 @@ def BSETINVTwoBitsMask : PatLeaf<(imm), [{ return countPopulation(N->getZExtValue()) == 2; }]>; -def TrailingZerosXForm : SDNodeXFormgetZExtValue(); - return CurDAG->getTargetConstant(countTrailingZeros(I), SDLoc(N), - N->getValueType(0)); -}]>; - def BSETINVTwoBitsMaskHigh : SDNodeXFormgetZExtValue(); return CurDAG->getTargetConstant(63 - countLeadingZeros(I), SDLoc(N), @@ -248,6 +242,16 @@ def non_imm12 : PatLeaf<(XLenVT GPR:$a), [{ return !C || !isInt<12>(C->getSExtValue()); }]>; +def Shifted32OnesMask : PatLeaf<(imm), [{ + uint64_t Imm = N->getZExtValue(); + if (!isShiftedMask_64(Imm)) + return false; + + unsigned TrailingZeros = countTrailingZeros(Imm); + return TrailingZeros > 0 && TrailingZeros < 32 && + Imm == UINT64_C(0xFFFFFFFF) << TrailingZeros; +}], TrailingZeros>; + def sh1add_op : ComplexPattern; def sh2add_op : ComplexPattern; def sh3add_op : ComplexPattern; @@ -562,10 +566,10 @@ def : Pat<(seteq (and GPR:$rs1, SingleBitSetMask:$mask), 0), (BEXTI (XORI GPR:$rs1, -1), SingleBitSetMask:$mask)>; def : Pat<(or GPR:$r, BSETINVTwoBitsMask:$i), - (BSETI (BSETI GPR:$r, (TrailingZerosXForm BSETINVTwoBitsMask:$i)), + (BSETI (BSETI GPR:$r, (TrailingZeros BSETINVTwoBitsMask:$i)), (BSETINVTwoBitsMaskHigh BSETINVTwoBitsMask:$i))>; def : Pat<(xor GPR:$r, BSETINVTwoBitsMask:$i), - (BINVI (BINVI GPR:$r, (TrailingZerosXForm BSETINVTwoBitsMask:$i)), + (BINVI (BINVI GPR:$r, (TrailingZeros BSETINVTwoBitsMask:$i)), (BSETINVTwoBitsMaskHigh BSETINVTwoBitsMask:$i))>; def : Pat<(or GPR:$r, BSETINVORIMask:$i), (BSETI (ORI GPR:$r, (BSETINVORIMaskLow BSETINVORIMask:$i)), @@ -702,13 +706,13 @@ def : Pat<(add GPR:$r, CSImm12MulBy8:$i), def : Pat<(mul GPR:$r, C3LeftShift:$i), (SLLI (SH1ADD GPR:$r, GPR:$r), - (TrailingZerosXForm C3LeftShift:$i))>; + (TrailingZeros C3LeftShift:$i))>; def : Pat<(mul GPR:$r, C5LeftShift:$i), (SLLI (SH2ADD GPR:$r, GPR:$r), - (TrailingZerosXForm C5LeftShift:$i))>; + (TrailingZeros C5LeftShift:$i))>; def : Pat<(mul GPR:$r, C9LeftShift:$i), (SLLI (SH3ADD GPR:$r, GPR:$r), - (TrailingZerosXForm C9LeftShift:$i))>; + (TrailingZeros C9LeftShift:$i))>; def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 11)), (SH1ADD (SH2ADD GPR:$r, GPR:$r), GPR:$r)>; @@ -737,6 +741,12 @@ def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 81)), let Predicates = [HasStdExtZba, IsRV64] in { def : Pat<(i64 (shl (and GPR:$rs1, 0xFFFFFFFF), uimm5:$shamt)), (SLLI_UW GPR:$rs1, uimm5:$shamt)>; +// Match a shifted 0xffffffff mask. Use SRLI to clear the LSBs and SLLI_UW to +// mask and shift. +def : Pat<(i64 (and GPR:$rs1, Shifted32OnesMask:$mask)), + (SLLI_UW (SRLI GPR:$rs1, Shifted32OnesMask:$mask), + Shifted32OnesMask:$mask)>; + def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFFF), non_imm12:$rs2)), (ADD_UW GPR:$rs1, GPR:$rs2)>; def : Pat<(i64 (and GPR:$rs, 0xFFFFFFFF)), (ADD_UW GPR:$rs, X0)>; @@ -771,14 +781,14 @@ def : Pat<(i64 (add (and GPR:$rs1, 0x7FFFFFFF8), non_imm12:$rs2)), (SH3ADD_UW (SRLI GPR:$rs1, 3), GPR:$rs2)>; def : Pat<(mul (binop_oneuse GPR:$r, 0xFFFFFFFF), C3LeftShiftUW:$i), - (SH1ADD (SLLI_UW GPR:$r, (TrailingZerosXForm C3LeftShiftUW:$i)), - (SLLI_UW GPR:$r, (TrailingZerosXForm C3LeftShiftUW:$i)))>; + (SH1ADD (SLLI_UW GPR:$r, (TrailingZeros C3LeftShiftUW:$i)), + (SLLI_UW GPR:$r, (TrailingZeros C3LeftShiftUW:$i)))>; def : Pat<(mul (binop_oneuse GPR:$r, 0xFFFFFFFF), C5LeftShiftUW:$i), - (SH2ADD (SLLI_UW GPR:$r, (TrailingZerosXForm C5LeftShiftUW:$i)), - (SLLI_UW GPR:$r, (TrailingZerosXForm C5LeftShiftUW:$i)))>; + (SH2ADD (SLLI_UW GPR:$r, (TrailingZeros C5LeftShiftUW:$i)), + (SLLI_UW GPR:$r, (TrailingZeros C5LeftShiftUW:$i)))>; def : Pat<(mul (binop_oneuse GPR:$r, 0xFFFFFFFF), C9LeftShiftUW:$i), - (SH3ADD (SLLI_UW GPR:$r, (TrailingZerosXForm C9LeftShiftUW:$i)), - (SLLI_UW GPR:$r, (TrailingZerosXForm C9LeftShiftUW:$i)))>; + (SH3ADD (SLLI_UW GPR:$r, (TrailingZeros C9LeftShiftUW:$i)), + (SLLI_UW GPR:$r, (TrailingZeros C9LeftShiftUW:$i)))>; } // Predicates = [HasStdExtZba, IsRV64] let Predicates = [HasStdExtZbcOrZbkc] in { diff --git a/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp b/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp index 5e0613f1e2ab4..14d7afb512e1a 100644 --- a/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp +++ b/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp @@ -73,8 +73,7 @@ static void addUses(const MachineInstr &MI, // returns true if all uses of OrigMI only depend on the lower word of its // output, so we can transform OrigMI to the corresponding W-version. // TODO: handle multiple interdependent transformations -static bool isAllUsesReadW(const MachineInstr &OrigMI, - MachineRegisterInfo &MRI) { +static bool hasAllWUsers(const MachineInstr &OrigMI, MachineRegisterInfo &MRI) { SmallPtrSet Visited; SmallVector Worklist; @@ -118,10 +117,17 @@ static bool isAllUsesReadW(const MachineInstr &OrigMI, case RISCV::CTZW: case RISCV::CPOPW: case RISCV::SLLI_UW: + case RISCV::FMV_H_X: + case RISCV::FMV_W_X: + case RISCV::FCVT_H_W: + case RISCV::FCVT_H_WU: case RISCV::FCVT_S_W: case RISCV::FCVT_S_WU: case RISCV::FCVT_D_W: case RISCV::FCVT_D_WU: + case RISCV::SEXT_B: + case RISCV::SEXT_H: + case RISCV::ZEXT_H_RV64: continue; // these overwrite higher input bits, otherwise the lower word of output @@ -167,8 +173,6 @@ static bool isAllUsesReadW(const MachineInstr &OrigMI, case RISCV::CLMUL: case RISCV::ORC_B: case RISCV::ORN: - case RISCV::SEXT_B: - case RISCV::SEXT_H: case RISCV::SH1ADD: case RISCV::SH1ADD_UW: case RISCV::SH2ADD: @@ -176,7 +180,6 @@ static bool isAllUsesReadW(const MachineInstr &OrigMI, case RISCV::SH3ADD: case RISCV::SH3ADD_UW: case RISCV::XNOR: - case RISCV::ZEXT_H_RV64: addUses(*MI, Worklist, MRI); continue; default: @@ -254,7 +257,7 @@ static bool isSignExtendingOpW(MachineInstr &MI, MachineRegisterInfo &MRI, case RISCV::ADDI: if (MI.getOperand(1).isReg() && MI.getOperand(1).getReg() == RISCV::X0) return true; - if (isAllUsesReadW(MI, MRI)) { + if (hasAllWUsers(MI, MRI)) { // transform to ADDIW FixableDef.insert(&MI); return true; @@ -282,10 +285,11 @@ static bool isSignExtendingOpW(MachineInstr &MI, MachineRegisterInfo &MRI, case RISCV::LWU: case RISCV::MUL: case RISCV::SUB: - if (isAllUsesReadW(MI, MRI)) { + if (hasAllWUsers(MI, MRI)) { FixableDef.insert(&MI); return true; } + break; } return false; diff --git a/llvm/lib/Target/RISCV/RISCVScheduleV.td b/llvm/lib/Target/RISCV/RISCVScheduleV.td index 3c5b48803f4c4..013b9bd61455a 100644 --- a/llvm/lib/Target/RISCV/RISCVScheduleV.td +++ b/llvm/lib/Target/RISCV/RISCVScheduleV.td @@ -9,6 +9,41 @@ //===----------------------------------------------------------------------===// /// Define scheduler resources associated with def operands. +defvar SchedMxList = ["UpperBound", "M1", "M2", "M4", "M8", "MF2", "MF4", "MF8"]; +// Used for widening and narrowing instructions as it doesn't contain M8. +defvar SchedMxListW = ["UpperBound", "MF8", "MF4", "MF2", "M1", "M2", "M4"]; + +// Creates SchedWrite for each (name, LMUL) pair for LMUL in lmuls argument +multiclass LMULSchedWrites lmuls = SchedMxList> { + foreach mx = lmuls in { + def name # "_" # mx : SchedWrite; + } +} + +// Creates SchedRead for each (name, LMUL) pair for LMUL in lmuls argument +multiclass LMULSchedReads lmuls = SchedMxList> { + foreach mx = lmuls in { + def name # "_" # mx : SchedRead; + } +} + +// Creates WriteRes for each (name, LMUL, resources) tuple for LMUL +// in lmuls argument +multiclass LMULWriteRes resources, + list lmuls = SchedMxList> { + foreach mx = lmuls in { + def : WriteRes(name # "_" # mx), resources>; + } +} + +// Creates ReadAdvance for each (name, LMUL, val) tuple for LMUL +// in lmuls argument +multiclass LMULReadAdvance lmuls = SchedMxList> { + foreach mx = lmuls in { + def : ReadAdvance(name # "_" # mx), val>; + } +} + // 3.6 Vector Byte Length vlenb def WriteRdVLENB : SchedWrite; @@ -79,55 +114,55 @@ def WriteVST8R : SchedWrite; // 11. Vector Integer Arithmetic Instructions // 11.1. Vector Single-Width Integer Add and Subtract // 11.5. Vector Bitwise Logical Instructions -def WriteVIALUV : SchedWrite; -def WriteVIALUX : SchedWrite; -def WriteVIALUI : SchedWrite; +defm "" : LMULSchedWrites<"WriteVIALUV">; +defm "" : LMULSchedWrites<"WriteVIALUX">; +defm "" : LMULSchedWrites<"WriteVIALUI">; // 11.2. Vector Widening Integer Add/Subtract -def WriteVIWALUV : SchedWrite; -def WriteVIWALUX : SchedWrite; -def WriteVIWALUI : SchedWrite; +defm "" : LMULSchedWrites<"WriteVIWALUV", SchedMxListW>; +defm "" : LMULSchedWrites<"WriteVIWALUX", SchedMxListW>; +defm "" : LMULSchedWrites<"WriteVIWALUI", SchedMxListW>; // 11.3. Vector Integer Extension -def WriteVExtV : SchedWrite; +defm "" : LMULSchedWrites<"WriteVExtV">; // 11.4. Vector Integer Arithmetic with Carry or Borrow Instructions -def WriteVICALUV : SchedWrite; -def WriteVICALUX : SchedWrite; -def WriteVICALUI : SchedWrite; +defm "" : LMULSchedWrites<"WriteVICALUV">; +defm "" : LMULSchedWrites<"WriteVICALUX">; +defm "" : LMULSchedWrites<"WriteVICALUI">; // 11.6. Vector Single-Width Bit Shift Instructions -def WriteVShiftV : SchedWrite; -def WriteVShiftX : SchedWrite; -def WriteVShiftI : SchedWrite; +defm "" : LMULSchedWrites<"WriteVShiftV">; +defm "" : LMULSchedWrites<"WriteVShiftX">; +defm "" : LMULSchedWrites<"WriteVShiftI">; // 11.7. Vector Narrowing Integer Right Shift Instructions -def WriteVNShiftV : SchedWrite; -def WriteVNShiftX : SchedWrite; -def WriteVNShiftI : SchedWrite; +defm "" : LMULSchedWrites<"WriteVNShiftV", SchedMxListW>; +defm "" : LMULSchedWrites<"WriteVNShiftX", SchedMxListW>; +defm "" : LMULSchedWrites<"WriteVNShiftI", SchedMxListW>; // 11.8. Vector Integer Comparison Instructions // 11.9. Vector Integer Min/Max Instructions -def WriteVICmpV : SchedWrite; -def WriteVICmpX : SchedWrite; -def WriteVICmpI : SchedWrite; +defm "" : LMULSchedWrites<"WriteVICmpV">; +defm "" : LMULSchedWrites<"WriteVICmpX">; +defm "" : LMULSchedWrites<"WriteVICmpI">; // 11.10. Vector Single-Width Integer Multiply Instructions -def WriteVIMulV : SchedWrite; -def WriteVIMulX : SchedWrite; +defm "" : LMULSchedWrites<"WriteVIMulV">; +defm "" : LMULSchedWrites<"WriteVIMulX">; // 11.11. Vector Integer Divide Instructions -def WriteVIDivV : SchedWrite; -def WriteVIDivX : SchedWrite; +defm "" : LMULSchedWrites<"WriteVIDivV">; +defm "" : LMULSchedWrites<"WriteVIDivX">; // 11.12. Vector Widening Integer Multiply Instructions -def WriteVIWMulV : SchedWrite; -def WriteVIWMulX : SchedWrite; +defm "" : LMULSchedWrites<"WriteVIWMulV", SchedMxListW>; +defm "" : LMULSchedWrites<"WriteVIWMulX", SchedMxListW>; // 11.13. Vector Single-Width Integer Multiply-Add Instructions -def WriteVIMulAddV : SchedWrite; -def WriteVIMulAddX : SchedWrite; +defm "" : LMULSchedWrites<"WriteVIMulAddV">; +defm "" : LMULSchedWrites<"WriteVIMulAddX">; // 11.14. Vector Widening Integer Multiply-Add Instructions -def WriteVIWMulAddV : SchedWrite; -def WriteVIWMulAddX : SchedWrite; +defm "" : LMULSchedWrites<"WriteVIWMulAddV", SchedMxListW>; +defm "" : LMULSchedWrites<"WriteVIWMulAddX", SchedMxListW>; // 11.15. Vector Integer Merge Instructions -def WriteVIMergeV : SchedWrite; -def WriteVIMergeX : SchedWrite; -def WriteVIMergeI : SchedWrite; +defm "" : LMULSchedWrites<"WriteVIMergeV">; +defm "" : LMULSchedWrites<"WriteVIMergeX">; +defm "" : LMULSchedWrites<"WriteVIMergeI">; // 11.16. Vector Integer Move Instructions -def WriteVIMovV : SchedWrite; -def WriteVIMovX : SchedWrite; -def WriteVIMovI : SchedWrite; +defm "" : LMULSchedWrites<"WriteVIMovV">; +defm "" : LMULSchedWrites<"WriteVIMovX">; +defm "" : LMULSchedWrites<"WriteVIMovI">; // 12. Vector Fixed-Point Arithmetic Instructions // 12.1. Vector Single-Width Saturating Add and Subtract @@ -303,47 +338,47 @@ def ReadVST8R : SchedRead; // 11. Vector Integer Arithmetic Instructions // 11.1. Vector Single-Width Integer Add and Subtract // 11.5. Vector Bitwise Logical Instructions -def ReadVIALUV : SchedRead; -def ReadVIALUX : SchedRead; +defm "" : LMULSchedReads<"ReadVIALUV">; +defm "" : LMULSchedReads<"ReadVIALUX">; // 11.2. Vector Widening Integer Add/Subtract -def ReadVIWALUV : SchedRead; -def ReadVIWALUX : SchedRead; +defm "" : LMULSchedReads<"ReadVIWALUV", SchedMxListW>; +defm "" : LMULSchedReads<"ReadVIWALUX", SchedMxListW>; // 11.3. Vector Integer Extension -def ReadVExtV : SchedRead; +defm "" : LMULSchedReads<"ReadVExtV">; // 11.4. Vector Integer Arithmetic with Carry or Borrow Instructions -def ReadVICALUV : SchedRead; -def ReadVICALUX : SchedRead; +defm "" : LMULSchedReads<"ReadVICALUV">; +defm "" : LMULSchedReads<"ReadVICALUX">; // 11.6. Vector Single-Width Bit Shift Instructions -def ReadVShiftV : SchedRead; -def ReadVShiftX : SchedRead; +defm "" : LMULSchedReads<"ReadVShiftV">; +defm "" : LMULSchedReads<"ReadVShiftX">; // 11.7. Vector Narrowing Integer Right Shift Instructions -def ReadVNShiftV : SchedRead; -def ReadVNShiftX : SchedRead; +defm "" : LMULSchedReads<"ReadVNShiftV", SchedMxListW>; +defm "" : LMULSchedReads<"ReadVNShiftX", SchedMxListW>; // 11.8. Vector Integer Comparison Instructions // 11.9. Vector Integer Min/Max Instructions -def ReadVICmpV : SchedRead; -def ReadVICmpX : SchedRead; +defm "" : LMULSchedReads<"ReadVICmpV">; +defm "" : LMULSchedReads<"ReadVICmpX">; // 11.10. Vector Single-Width Integer Multiply Instructions -def ReadVIMulV : SchedRead; -def ReadVIMulX : SchedRead; +defm "" : LMULSchedReads<"ReadVIMulV">; +defm "" : LMULSchedReads<"ReadVIMulX">; // 11.11. Vector Integer Divide Instructions -def ReadVIDivV : SchedRead; -def ReadVIDivX : SchedRead; +defm "" : LMULSchedReads<"ReadVIDivV">; +defm "" : LMULSchedReads<"ReadVIDivX">; // 11.12. Vector Widening Integer Multiply Instructions -def ReadVIWMulV : SchedRead; -def ReadVIWMulX : SchedRead; +defm "" : LMULSchedReads<"ReadVIWMulV", SchedMxListW>; +defm "" : LMULSchedReads<"ReadVIWMulX", SchedMxListW>; // 11.13. Vector Single-Width Integer Multiply-Add Instructions -def ReadVIMulAddV : SchedRead; -def ReadVIMulAddX : SchedRead; +defm "" : LMULSchedReads<"ReadVIMulAddV">; +defm "" : LMULSchedReads<"ReadVIMulAddX">; // 11.14. Vector Widening Integer Multiply-Add Instructions -def ReadVIWMulAddV : SchedRead; -def ReadVIWMulAddX : SchedRead; +defm "" : LMULSchedReads<"ReadVIWMulAddV", SchedMxListW>; +defm "" : LMULSchedReads<"ReadVIWMulAddX", SchedMxListW>; // 11.15. Vector Integer Merge Instructions -def ReadVIMergeV : SchedRead; -def ReadVIMergeX : SchedRead; +defm "" : LMULSchedReads<"ReadVIMergeV">; +defm "" : LMULSchedReads<"ReadVIMergeX">; // 11.16. Vector Integer Move Instructions -def ReadVIMovV : SchedRead; -def ReadVIMovX : SchedRead; +defm "" : LMULSchedReads<"ReadVIMovV">; +defm "" : LMULSchedReads<"ReadVIMovX">; // 12. Vector Fixed-Point Arithmetic Instructions // 12.1. Vector Single-Width Saturating Add and Subtract @@ -541,42 +576,42 @@ foreach nf=2-8 in { } } -// 12. Vector Integer Arithmetic Instructions -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; -def : WriteRes; +// 11. Vector Integer Arithmetic Instructions +defm "" : LMULWriteRes<"WriteVIALUV", []>; +defm "" : LMULWriteRes<"WriteVIALUX", []>; +defm "" : LMULWriteRes<"WriteVIALUI", []>; +defm "" : LMULWriteRes<"WriteVIWALUV", [], SchedMxListW>; +defm "" : LMULWriteRes<"WriteVIWALUX", [], SchedMxListW>; +defm "" : LMULWriteRes<"WriteVIWALUI", [], SchedMxListW>; +defm "" : LMULWriteRes<"WriteVExtV", []>; +defm "" : LMULWriteRes<"WriteVICALUV", []>; +defm "" : LMULWriteRes<"WriteVICALUX", []>; +defm "" : LMULWriteRes<"WriteVICALUI", []>; +defm "" : LMULWriteRes<"WriteVShiftV", []>; +defm "" : LMULWriteRes<"WriteVShiftX", []>; +defm "" : LMULWriteRes<"WriteVShiftI", []>; +defm "" : LMULWriteRes<"WriteVNShiftV", [], SchedMxListW>; +defm "" : LMULWriteRes<"WriteVNShiftX", [], SchedMxListW>; +defm "" : LMULWriteRes<"WriteVNShiftI", [], SchedMxListW>; +defm "" : LMULWriteRes<"WriteVICmpV", []>; +defm "" : LMULWriteRes<"WriteVICmpX", []>; +defm "" : LMULWriteRes<"WriteVICmpI", []>; +defm "" : LMULWriteRes<"WriteVIMulV", []>; +defm "" : LMULWriteRes<"WriteVIMulX", []>; +defm "" : LMULWriteRes<"WriteVIDivV", []>; +defm "" : LMULWriteRes<"WriteVIDivX", []>; +defm "" : LMULWriteRes<"WriteVIWMulV", [], SchedMxListW>; +defm "" : LMULWriteRes<"WriteVIWMulX", [], SchedMxListW>; +defm "" : LMULWriteRes<"WriteVIMulAddV", []>; +defm "" : LMULWriteRes<"WriteVIMulAddX", []>; +defm "" : LMULWriteRes<"WriteVIWMulAddV", [], SchedMxListW>; +defm "" : LMULWriteRes<"WriteVIWMulAddX", [], SchedMxListW>; +defm "" : LMULWriteRes<"WriteVIMergeV", []>; +defm "" : LMULWriteRes<"WriteVIMergeX", []>; +defm "" : LMULWriteRes<"WriteVIMergeI", []>; +defm "" : LMULWriteRes<"WriteVIMovV", []>; +defm "" : LMULWriteRes<"WriteVIMovX", []>; +defm "" : LMULWriteRes<"WriteVIMovI", []>; // 13. Vector Fixed-Point Arithmetic Instructions def : WriteRes; @@ -700,34 +735,34 @@ def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; -// 12. Vector Integer Arithmetic Instructions -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; +// 11. Vector Integer Arithmetic Instructions +defm "" : LMULReadAdvance<"ReadVIALUV", 0>; +defm "" : LMULReadAdvance<"ReadVIALUX", 0>; +defm "" : LMULReadAdvance<"ReadVIWALUV", 0, SchedMxListW>; +defm "" : LMULReadAdvance<"ReadVIWALUX", 0, SchedMxListW>; +defm "" : LMULReadAdvance<"ReadVExtV", 0>; +defm "" : LMULReadAdvance<"ReadVICALUV", 0>; +defm "" : LMULReadAdvance<"ReadVICALUX", 0>; +defm "" : LMULReadAdvance<"ReadVShiftV", 0>; +defm "" : LMULReadAdvance<"ReadVShiftX", 0>; +defm "" : LMULReadAdvance<"ReadVNShiftV", 0, SchedMxListW>; +defm "" : LMULReadAdvance<"ReadVNShiftX", 0, SchedMxListW>; +defm "" : LMULReadAdvance<"ReadVICmpV", 0>; +defm "" : LMULReadAdvance<"ReadVICmpX", 0>; +defm "" : LMULReadAdvance<"ReadVIMulV", 0>; +defm "" : LMULReadAdvance<"ReadVIMulX", 0>; +defm "" : LMULReadAdvance<"ReadVIDivV", 0>; +defm "" : LMULReadAdvance<"ReadVIDivX", 0>; +defm "" : LMULReadAdvance<"ReadVIWMulV", 0, SchedMxListW>; +defm "" : LMULReadAdvance<"ReadVIWMulX", 0, SchedMxListW>; +defm "" : LMULReadAdvance<"ReadVIMulAddV", 0>; +defm "" : LMULReadAdvance<"ReadVIMulAddX", 0>; +defm "" : LMULReadAdvance<"ReadVIWMulAddV", 0, SchedMxListW>; +defm "" : LMULReadAdvance<"ReadVIWMulAddX", 0, SchedMxListW>; +defm "" : LMULReadAdvance<"ReadVIMergeV", 0>; +defm "" : LMULReadAdvance<"ReadVIMergeX", 0>; +defm "" : LMULReadAdvance<"ReadVIMovV", 0>; +defm "" : LMULReadAdvance<"ReadVIMovX", 0>; // 13. Vector Fixed-Point Arithmetic Instructions def : ReadAdvance; diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index f89db1202a674..afb21b868cecf 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -153,7 +153,9 @@ Optional RISCVTTIImpl::getMaxVScale() const { Optional RISCVTTIImpl::getVScaleForTuning() const { if (ST->hasVInstructions()) - return ST->getRealMinVLen() / RISCV::RVVBitsPerBlock; + if (unsigned MinVLen = ST->getRealMinVLen(); + MinVLen >= RISCV::RVVBitsPerBlock) + return MinVLen / RISCV::RVVBitsPerBlock; return BaseT::getVScaleForTuning(); } @@ -169,7 +171,10 @@ RISCVTTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { ST->useRVVForFixedLengthVectors() ? LMUL * ST->getRealMinVLen() : 0); case TargetTransformInfo::RGK_ScalableVector: return TypeSize::getScalable( - ST->hasVInstructions() ? LMUL * RISCV::RVVBitsPerBlock : 0); + (ST->hasVInstructions() && + ST->getRealMinVLen() >= RISCV::RVVBitsPerBlock) + ? LMUL * RISCV::RVVBitsPerBlock + : 0); } llvm_unreachable("Unsupported register kind"); @@ -227,7 +232,8 @@ InstructionCost RISCVTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind) { - if (!isa(Src)) + if (!isLegalMaskedLoadStore(Src, Alignment) || + CostKind != TTI::TCK_RecipThroughput) return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind); diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp index b11c786e7856d..d6688c31334c2 100644 --- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp +++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp @@ -84,6 +84,10 @@ static MCTargetStreamer *createTargetAsmStreamer(MCStreamer &S, return new SparcTargetAsmStreamer(S, OS); } +static MCTargetStreamer *createNullTargetStreamer(MCStreamer &S) { + return new SparcTargetStreamer(S); +} + static MCInstPrinter *createSparcMCInstPrinter(const Triple &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, @@ -122,6 +126,9 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSparcTargetMC() { // Register the asm streamer. TargetRegistry::RegisterAsmTargetStreamer(*T, createTargetAsmStreamer); + // Register the null streamer. + TargetRegistry::RegisterNullTargetStreamer(*T, createNullTargetStreamer); + // Register the MCInstPrinter TargetRegistry::RegisterMCInstPrinter(*T, createSparcMCInstPrinter); } diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.h b/llvm/lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.h index 27976d166067b..ef28afa06bffb 100644 --- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.h +++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.h @@ -22,9 +22,9 @@ class SparcTargetStreamer : public MCTargetStreamer { public: SparcTargetStreamer(MCStreamer &S); /// Emit ".register , #ignore". - virtual void emitSparcRegisterIgnore(unsigned reg) = 0; + virtual void emitSparcRegisterIgnore(unsigned reg){}; /// Emit ".register , #scratch". - virtual void emitSparcRegisterScratch(unsigned reg) = 0; + virtual void emitSparcRegisterScratch(unsigned reg){}; }; // This part is for ascii assembly output diff --git a/llvm/lib/Target/WebAssembly/WebAssembly.td b/llvm/lib/Target/WebAssembly/WebAssembly.td index b83dcf3a8e656..7531d36a74a64 100644 --- a/llvm/lib/Target/WebAssembly/WebAssembly.td +++ b/llvm/lib/Target/WebAssembly/WebAssembly.td @@ -97,7 +97,13 @@ def WebAssemblyInstrInfo : InstrInfo; def : ProcessorModel<"mvp", NoSchedModel, []>; // Generic processor: latest stable version. -def : ProcessorModel<"generic", NoSchedModel, []>; +// +// This includes features that have achieved phase 4 of the standards process, +// and that are expected to work for most users in the current time, with +// consideration given to available support in relevant engines and tools, and +// the importance of the features. +def : ProcessorModel<"generic", NoSchedModel, + [FeatureSignExt, FeatureMutableGlobals]>; // Latest and greatest experimental version of WebAssembly. Bugs included! def : ProcessorModel<"bleeding-edge", NoSchedModel, diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index b137529269bc8..3926c47882bb2 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -1502,6 +1502,10 @@ def : ProcModel<"sapphirerapids", SkylakeServerModel, ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>; def : ProcModel<"alderlake", AlderlakePModel, ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>; +def : ProcModel<"raptorlake", AlderlakePModel, + ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>; +def : ProcModel<"meteorlake", AlderlakePModel, + ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>; // AMD CPUs. diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 2b823114a0357..b8b214596cd94 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -9926,7 +9926,7 @@ multiclass avx512_trunc opc, string OpcodeStr, SDNode OpNode128, SDPatternOperator MaskNode128, SDPatternOperator MaskNode256, SDPatternOperator MaskNode512, - X86FoldableSchedWrite sched, + X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128, X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ, @@ -9935,25 +9935,25 @@ multiclass avx512_trunc opc, string OpcodeStr, SDNode OpNode128, PatFrag mtruncFrag, Predicate prd = HasAVX512>{ let Predicates = [HasVLX, prd] in { - defm Z128: avx512_trunc_common, avx512_trunc_mr_lowering, EVEX_V128; - defm Z256: avx512_trunc_common, avx512_trunc_mr_lowering, EVEX_V256; } let Predicates = [prd] in - defm Z: avx512_trunc_common, avx512_trunc_mr_lowering, EVEX_V512; } multiclass avx512_trunc_qb opc, string OpcodeStr, - X86FoldableSchedWrite sched, PatFrag StoreNode, + X86SchedWriteWidths sched, PatFrag StoreNode, PatFrag MaskedStoreNode, SDNode InVecNode, SDPatternOperator InVecMaskNode> { defm NAME: avx512_trunc opc, string OpcodeStr, multiclass avx512_trunc_qw opc, string OpcodeStr, SDNode OpNode, SDPatternOperator MaskNode, - X86FoldableSchedWrite sched, PatFrag StoreNode, + X86SchedWriteWidths sched, PatFrag StoreNode, PatFrag MaskedStoreNode, SDNode InVecNode, SDPatternOperator InVecMaskNode> { defm NAME: avx512_trunc opc, string OpcodeStr, SDNode OpNode, multiclass avx512_trunc_qd opc, string OpcodeStr, SDNode OpNode, SDPatternOperator MaskNode, - X86FoldableSchedWrite sched, PatFrag StoreNode, + X86SchedWriteWidths sched, PatFrag StoreNode, PatFrag MaskedStoreNode, SDNode InVecNode, SDPatternOperator InVecMaskNode> { defm NAME: avx512_trunc opc, string OpcodeStr, SDNode OpNode, multiclass avx512_trunc_db opc, string OpcodeStr, SDNode OpNode, SDPatternOperator MaskNode, - X86FoldableSchedWrite sched, PatFrag StoreNode, + X86SchedWriteWidths sched, PatFrag StoreNode, PatFrag MaskedStoreNode, SDNode InVecNode, SDPatternOperator InVecMaskNode> { defm NAME: avx512_trunc opc, string OpcodeStr, SDNode OpNode, multiclass avx512_trunc_dw opc, string OpcodeStr, SDNode OpNode, SDPatternOperator MaskNode, - X86FoldableSchedWrite sched, PatFrag StoreNode, + X86SchedWriteWidths sched, PatFrag StoreNode, PatFrag MaskedStoreNode, SDNode InVecNode, SDPatternOperator InVecMaskNode> { defm NAME: avx512_trunc opc, string OpcodeStr, SDNode OpNode, multiclass avx512_trunc_wb opc, string OpcodeStr, SDNode OpNode, SDPatternOperator MaskNode, - X86FoldableSchedWrite sched, PatFrag StoreNode, + X86SchedWriteWidths sched, PatFrag StoreNode, PatFrag MaskedStoreNode, SDNode InVecNode, SDPatternOperator InVecMaskNode> { defm NAME: avx512_trunc opc, string OpcodeStr, SDNode OpNode, } defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", - WriteVPMOV256, truncstorevi8, + SchedWriteVecTruncate, truncstorevi8, masked_truncstorevi8, X86vtrunc, X86vmtrunc>; defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", - WriteVPMOV256, truncstore_s_vi8, + SchedWriteVecTruncate, truncstore_s_vi8, masked_truncstore_s_vi8, X86vtruncs, X86vmtruncs>; defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", - WriteVPMOV256, truncstore_us_vi8, + SchedWriteVecTruncate, truncstore_us_vi8, masked_truncstore_us_vi8, X86vtruncus, X86vmtruncus>; defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc, - WriteVPMOV256, truncstorevi16, + SchedWriteVecTruncate, truncstorevi16, masked_truncstorevi16, X86vtrunc, X86vmtrunc>; defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, select_truncs, - WriteVPMOV256, truncstore_s_vi16, + SchedWriteVecTruncate, truncstore_s_vi16, masked_truncstore_s_vi16, X86vtruncs, X86vmtruncs>; defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, - select_truncus, WriteVPMOV256, + select_truncus, SchedWriteVecTruncate, truncstore_us_vi16, masked_truncstore_us_vi16, X86vtruncus, X86vmtruncus>; defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc, - WriteVPMOV256, truncstorevi32, + SchedWriteVecTruncate, truncstorevi32, masked_truncstorevi32, X86vtrunc, X86vmtrunc>; defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, select_truncs, - WriteVPMOV256, truncstore_s_vi32, + SchedWriteVecTruncate, truncstore_s_vi32, masked_truncstore_s_vi32, X86vtruncs, X86vmtruncs>; defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, - select_truncus, WriteVPMOV256, + select_truncus, SchedWriteVecTruncate, truncstore_us_vi32, masked_truncstore_us_vi32, X86vtruncus, X86vmtruncus>; defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc, - WriteVPMOV256, truncstorevi8, + SchedWriteVecTruncate, truncstorevi8, masked_truncstorevi8, X86vtrunc, X86vmtrunc>; defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs, - WriteVPMOV256, truncstore_s_vi8, + SchedWriteVecTruncate, truncstore_s_vi8, masked_truncstore_s_vi8, X86vtruncs, X86vmtruncs>; defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, - select_truncus, WriteVPMOV256, + select_truncus, SchedWriteVecTruncate, truncstore_us_vi8, masked_truncstore_us_vi8, X86vtruncus, X86vmtruncus>; defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc, - WriteVPMOV256, truncstorevi16, + SchedWriteVecTruncate, truncstorevi16, masked_truncstorevi16, X86vtrunc, X86vmtrunc>; defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs, - WriteVPMOV256, truncstore_s_vi16, + SchedWriteVecTruncate, truncstore_s_vi16, masked_truncstore_s_vi16, X86vtruncs, X86vmtruncs>; defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, - select_truncus, WriteVPMOV256, + select_truncus, SchedWriteVecTruncate, truncstore_us_vi16, masked_truncstore_us_vi16, X86vtruncus, X86vmtruncus>; defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc, - WriteVPMOV256, truncstorevi8, + SchedWriteVecTruncate, truncstorevi8, masked_truncstorevi8, X86vtrunc, X86vmtrunc>; defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs, - WriteVPMOV256, truncstore_s_vi8, + SchedWriteVecTruncate, truncstore_s_vi8, masked_truncstore_s_vi8, X86vtruncs, X86vmtruncs>; defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, - select_truncus, WriteVPMOV256, + select_truncus, SchedWriteVecTruncate, truncstore_us_vi8, masked_truncstore_us_vi8, X86vtruncus, X86vmtruncus>; diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index ab3abe8faca7c..09e31988e5bd8 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -801,9 +801,9 @@ def X86lock_sub_nocf : PatFrag<(ops node:$lhs, node:$rhs), return hasNoCarryFlagUses(SDValue(N, 0)); }]>; -let Predicates = [UseIncDec] in { - let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1, - SchedRW = [WriteALURMW] in { +let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1, + SchedRW = [WriteALURMW] in { + let Predicates = [UseIncDec] in { def LOCK_INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst", [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i8 1)))]>, @@ -816,10 +816,6 @@ let Predicates = [UseIncDec] in { "inc{l}\t$dst", [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i32 1)))]>, OpSize32, LOCK; - def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), - "inc{q}\t$dst", - [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i64 1)))]>, - LOCK; def LOCK_DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst", @@ -833,20 +829,33 @@ let Predicates = [UseIncDec] in { "dec{l}\t$dst", [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i32 1)))]>, OpSize32, LOCK; + } + + let Predicates = [UseIncDec, In64BitMode] in { + def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), + "inc{q}\t$dst", + [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i64 1)))]>, + LOCK; def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i64 1)))]>, LOCK; } +} +let Predicates = [UseIncDec] in { // Additional patterns for -1 constant. def : Pat<(X86lock_add addr:$dst, (i8 -1)), (LOCK_DEC8m addr:$dst)>; def : Pat<(X86lock_add addr:$dst, (i16 -1)), (LOCK_DEC16m addr:$dst)>; def : Pat<(X86lock_add addr:$dst, (i32 -1)), (LOCK_DEC32m addr:$dst)>; - def : Pat<(X86lock_add addr:$dst, (i64 -1)), (LOCK_DEC64m addr:$dst)>; def : Pat<(X86lock_sub addr:$dst, (i8 -1)), (LOCK_INC8m addr:$dst)>; def : Pat<(X86lock_sub addr:$dst, (i16 -1)), (LOCK_INC16m addr:$dst)>; def : Pat<(X86lock_sub addr:$dst, (i32 -1)), (LOCK_INC32m addr:$dst)>; +} + +let Predicates = [UseIncDec, In64BitMode] in { + // Additional patterns for -1 constant. + def : Pat<(X86lock_add addr:$dst, (i64 -1)), (LOCK_DEC64m addr:$dst)>; def : Pat<(X86lock_sub addr:$dst, (i64 -1)), (LOCK_INC64m addr:$dst)>; } diff --git a/llvm/lib/Target/X86/X86InstrExtension.td b/llvm/lib/Target/X86/X86InstrExtension.td index 7a4eb138ec346..8d3fce7f55bc6 100644 --- a/llvm/lib/Target/X86/X86InstrExtension.td +++ b/llvm/lib/Target/X86/X86InstrExtension.td @@ -42,7 +42,7 @@ def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), let mayLoad = 1 in def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src), "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, - TB, OpSize16, Sched<[WriteALULd]>; + TB, OpSize16, Sched<[WriteLoad]>; } // hasSideEffects = 0 def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src), "movs{bl|x}\t{$src, $dst|$dst, $src}", @@ -51,7 +51,7 @@ def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src), def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src), "movs{bl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (sextloadi32i8 addr:$src))]>, TB, - OpSize32, Sched<[WriteALULd]>; + OpSize32, Sched<[WriteLoad]>; def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src), "movs{wl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (sext GR16:$src))]>, TB, @@ -59,7 +59,7 @@ def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src), def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), "movs{wl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (sextloadi32i16 addr:$src))]>, - OpSize32, TB, Sched<[WriteALULd]>; + OpSize32, TB, Sched<[WriteLoad]>; let hasSideEffects = 0 in { def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), @@ -68,7 +68,7 @@ def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), let mayLoad = 1 in def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src), "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, - TB, OpSize16, Sched<[WriteALULd]>; + TB, OpSize16, Sched<[WriteLoad]>; } // hasSideEffects = 0 def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", @@ -77,7 +77,7 @@ def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src), def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (zextloadi32i8 addr:$src))]>, TB, - OpSize32, Sched<[WriteALULd]>; + OpSize32, Sched<[WriteLoad]>; def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src), "movz{wl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (zext GR16:$src))]>, TB, @@ -85,7 +85,7 @@ def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src), def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), "movz{wl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (zextloadi32i16 addr:$src))]>, - TB, OpSize32, Sched<[WriteALULd]>; + TB, OpSize32, Sched<[WriteLoad]>; // These instructions exist as a consequence of operand size prefix having // control of the destination size, but not the input size. Only support them @@ -100,10 +100,10 @@ def MOVZX16rr16: I<0xB7, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), let mayLoad = 1 in { def MOVSX16rm16: I<0xBF, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "movs{ww|x}\t{$src, $dst|$dst, $src}", - []>, OpSize16, TB, Sched<[WriteALULd]>, NotMemoryFoldable; + []>, OpSize16, TB, Sched<[WriteLoad]>, NotMemoryFoldable; def MOVZX16rm16: I<0xB7, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "movz{ww|x}\t{$src, $dst|$dst, $src}", - []>, TB, OpSize16, Sched<[WriteALULd]>, NotMemoryFoldable; + []>, TB, OpSize16, Sched<[WriteLoad]>, NotMemoryFoldable; } // mayLoad = 1 } // isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 @@ -119,7 +119,7 @@ let mayLoad = 1 in def MOVZX32rm8_NOREX : I<0xB6, MRMSrcMem, (outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", - []>, TB, OpSize32, Sched<[WriteALULd]>; + []>, TB, OpSize32, Sched<[WriteLoad]>; def MOVSX32rr8_NOREX : I<0xBE, MRMSrcReg, (outs GR32_NOREX:$dst), (ins GR8_NOREX:$src), @@ -129,7 +129,7 @@ let mayLoad = 1 in def MOVSX32rm8_NOREX : I<0xBE, MRMSrcMem, (outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src), "movs{bl|x}\t{$src, $dst|$dst, $src}", - []>, TB, OpSize32, Sched<[WriteALULd]>; + []>, TB, OpSize32, Sched<[WriteLoad]>; } // MOVSX64rr8 always has a REX prefix and it has an 8-bit register @@ -143,7 +143,7 @@ def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src), def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src), "movs{bq|x}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (sextloadi64i8 addr:$src))]>, - TB, Sched<[WriteALULd]>; + TB, Sched<[WriteLoad]>; def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), "movs{wq|x}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (sext GR16:$src))]>, TB, @@ -151,7 +151,7 @@ def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), "movs{wq|x}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (sextloadi64i16 addr:$src))]>, - TB, Sched<[WriteALULd]>; + TB, Sched<[WriteLoad]>; def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src), "movs{lq|xd}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (sext GR32:$src))]>, @@ -159,7 +159,7 @@ def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src), def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src), "movs{lq|xd}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (sextloadi64i32 addr:$src))]>, - Sched<[WriteALULd]>, Requires<[In64BitMode]>; + Sched<[WriteLoad]>, Requires<[In64BitMode]>; // These instructions exist as a consequence of operand size prefix having // control of the destination size, but not the input size. Only support them @@ -174,10 +174,10 @@ def MOVSX32rr32: I<0x63, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), let mayLoad = 1 in { def MOVSX16rm32: I<0x63, MRMSrcMem, (outs GR16:$dst), (ins i32mem:$src), "movs{lq|xd}\t{$src, $dst|$dst, $src}", []>, - Sched<[WriteALULd]>, OpSize16, Requires<[In64BitMode]>; + Sched<[WriteLoad]>, OpSize16, Requires<[In64BitMode]>; def MOVSX32rm32: I<0x63, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "movs{lq|xd}\t{$src, $dst|$dst, $src}", []>, - Sched<[WriteALULd]>, OpSize32, Requires<[In64BitMode]>; + Sched<[WriteLoad]>, OpSize32, Requires<[In64BitMode]>; } // mayLoad = 1 } // isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 @@ -189,14 +189,14 @@ def MOVZX64rr8 : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src), let mayLoad = 1 in def MOVZX64rm8 : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src), "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, - TB, Sched<[WriteALULd]>; + TB, Sched<[WriteLoad]>; def MOVZX64rr16 : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB, Sched<[WriteALU]>; let mayLoad = 1 in def MOVZX64rm16 : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, - TB, Sched<[WriteALULd]>; + TB, Sched<[WriteLoad]>; } // 64-bit zero-extension patterns use SUBREG_TO_REG and an operation writing a diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index f90202e98a941..a31117e377ae2 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -1301,7 +1301,7 @@ def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (any_fpround (loadf64 addr:$src)))]>, XD, Requires<[UseSSE2, OptForSize]>, - Sched<[WriteCvtSD2SS.Folded]>, SIMD_EXC; + Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>, SIMD_EXC; } let Uses = [MXCSR], mayRaiseFPException = 1, ExeDomain = SSEPackedSingle in { @@ -1367,7 +1367,7 @@ def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (any_fpextend (loadf32 addr:$src)))]>, XS, Requires<[UseSSE2, OptForSize]>, - Sched<[WriteCvtSS2SD.Folded]>, SIMD_EXC; + Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>, SIMD_EXC; } // isCodeGenOnly = 1 let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1, diff --git a/llvm/lib/Target/X86/X86InstrShiftRotate.td b/llvm/lib/Target/X86/X86InstrShiftRotate.td index 823ff78b99035..e57169db7b1d7 100644 --- a/llvm/lib/Target/X86/X86InstrShiftRotate.td +++ b/llvm/lib/Target/X86/X86InstrShiftRotate.td @@ -14,7 +14,7 @@ let Defs = [EFLAGS] in { -let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in { +let Constraints = "$src1 = $dst" in { let Uses = [CL], SchedRW = [WriteShiftCL] in { def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1), "shl{b}\t{%cl, $dst|$dst, cl}", @@ -30,6 +30,7 @@ def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1), [(set GR64:$dst, (shl GR64:$src1, CL))]>; } // Uses = [CL], SchedRW +let SchedRW = [WriteShift] in { let isConvertibleToThreeAddress = 1 in { // Can transform into LEA. def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, u8imm:$src2), "shl{b}\t{$src2, $dst|$dst, $src2}", @@ -61,7 +62,8 @@ def SHL32r1 : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1), def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1), "shl{q}\t$dst", []>; } // hasSideEffects = 0 -} // Constraints = "$src = $dst", SchedRW +} // SchedRW +} // Constraints = "$src = $dst" // FIXME: Why do we need an explicit "Uses = [CL]" when the instr has a pattern // using CL? @@ -81,7 +83,7 @@ def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst), "shl{q}\t{%cl, $dst|$dst, cl}", [(store (shl (loadi64 addr:$dst), CL), addr:$dst)]>, Requires<[In64BitMode]>; -} +} // Uses, SchedRW let SchedRW = [WriteShiftLd, WriteRMW] in { def SHL8mi : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, u8imm:$src), @@ -118,7 +120,7 @@ def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst), Requires<[In64BitMode]>; } // SchedRW -let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in { +let Constraints = "$src1 = $dst" in { let Uses = [CL], SchedRW = [WriteShiftCL] in { def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src1), "shr{b}\t{%cl, $dst|$dst, cl}", @@ -132,8 +134,9 @@ def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src1), def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src1), "shr{q}\t{%cl, $dst|$dst, cl}", [(set GR64:$dst, (srl GR64:$src1, CL))]>; -} +} // Uses, SchedRW +let SchedRW = [WriteShift] in { def SHR8ri : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, u8imm:$src2), "shr{b}\t{$src2, $dst|$dst, $src2}", [(set GR8:$dst, (srl GR8:$src1, (i8 imm:$src2)))]>; @@ -162,7 +165,8 @@ def SHR32r1 : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1), def SHR64r1 : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1), "shr{q}\t$dst", [(set GR64:$dst, (srl GR64:$src1, (i8 1)))]>; -} // Constraints = "$src = $dst", SchedRW +} // SchedRW +} // Constraints = "$src = $dst" let Uses = [CL], SchedRW = [WriteShiftCLLd, WriteRMW] in { @@ -181,7 +185,7 @@ def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst), "shr{q}\t{%cl, $dst|$dst, cl}", [(store (srl (loadi64 addr:$dst), CL), addr:$dst)]>, Requires<[In64BitMode]>; -} +} // Uses, SchedRW let SchedRW = [WriteShiftLd, WriteRMW] in { def SHR8mi : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, u8imm:$src), @@ -218,7 +222,7 @@ def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst), Requires<[In64BitMode]>; } // SchedRW -let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in { +let Constraints = "$src1 = $dst" in { let Uses = [CL], SchedRW = [WriteShiftCL] in { def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1), "sar{b}\t{%cl, $dst|$dst, cl}", @@ -234,8 +238,9 @@ def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src1), def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src1), "sar{q}\t{%cl, $dst|$dst, cl}", [(set GR64:$dst, (sra GR64:$src1, CL))]>; -} +} // Uses, SchedRW +let SchedRW = [WriteShift] in { def SAR8ri : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, u8imm:$src2), "sar{b}\t{$src2, $dst|$dst, $src2}", [(set GR8:$dst, (sra GR8:$src1, (i8 imm:$src2)))]>; @@ -265,7 +270,8 @@ def SAR32r1 : I<0xD1, MRM7r, (outs GR32:$dst), (ins GR32:$src1), def SAR64r1 : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1), "sar{q}\t$dst", [(set GR64:$dst, (sra GR64:$src1, (i8 1)))]>; -} // Constraints = "$src = $dst", SchedRW +} // SchedRW +} // Constraints = "$src = $dst" let Uses = [CL], SchedRW = [WriteShiftCLLd, WriteRMW] in { @@ -284,7 +290,7 @@ def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst), "sar{q}\t{%cl, $dst|$dst, cl}", [(store (sra (loadi64 addr:$dst), CL), addr:$dst)]>, Requires<[In64BitMode]>; -} +} // Uses, SchedRW let SchedRW = [WriteShiftLd, WriteRMW] in { def SAR8mi : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, u8imm:$src), @@ -326,7 +332,7 @@ def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst), //===----------------------------------------------------------------------===// let hasSideEffects = 0 in { -let Constraints = "$src1 = $dst", SchedRW = [WriteRotate] in { +let Constraints = "$src1 = $dst" in { let Uses = [CL, EFLAGS], SchedRW = [WriteRotateCL] in { def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1), @@ -337,9 +343,9 @@ def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1), "rcl{l}\t{%cl, $dst|$dst, cl}", []>, OpSize32; def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src1), "rcl{q}\t{%cl, $dst|$dst, cl}", []>; -} // Uses = [CL, EFLAGS] +} // Uses = [CL, EFLAGS], SchedRW -let Uses = [EFLAGS] in { +let Uses = [EFLAGS], SchedRW = [WriteRotate] in { def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1), "rcl{b}\t$dst", []>; def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, u8imm:$cnt), @@ -356,7 +362,7 @@ def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src1), "rcl{q}\t$dst", []>; def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src1, u8imm:$cnt), "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>; -} // Uses = [EFLAGS] +} // Uses = [EFLAGS], SchedRW let Uses = [CL, EFLAGS], SchedRW = [WriteRotateCL] in { def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1), @@ -367,9 +373,9 @@ def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1), "rcr{l}\t{%cl, $dst|$dst, cl}", []>, OpSize32; def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src1), "rcr{q}\t{%cl, $dst|$dst, cl}", []>; -} // Uses = [CL, EFLAGS] +} // Uses = [CL, EFLAGS], SchedRW -let Uses = [EFLAGS] in { +let Uses = [EFLAGS], SchedRW = [WriteRotate] in { def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src1), "rcr{b}\t$dst", []>; def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, u8imm:$cnt), @@ -386,12 +392,12 @@ def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src1), "rcr{q}\t$dst", []>; def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src1, u8imm:$cnt), "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>; -} // Uses = [EFLAGS] +} // Uses = [EFLAGS], SchedRW } // Constraints = "$src = $dst" -let SchedRW = [WriteRotateLd, WriteRMW], mayStore = 1 in { -let Uses = [EFLAGS] in { +let mayStore = 1 in { +let Uses = [EFLAGS], SchedRW = [WriteRotateLd, WriteRMW] in { def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst), "rcl{b}\t$dst", []>; def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, u8imm:$cnt), @@ -427,7 +433,7 @@ def RCR64m1 : RI<0xD1, MRM3m, (outs), (ins i64mem:$dst), def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, u8imm:$cnt), "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>, Requires<[In64BitMode]>; -} // Uses = [EFLAGS] +} // Uses = [EFLAGS], SchedRW let Uses = [CL, EFLAGS], SchedRW = [WriteRotateCLLd, WriteRMW] in { def RCL8mCL : I<0xD2, MRM2m, (outs), (ins i8mem:$dst), @@ -449,11 +455,11 @@ def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst), def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst), "rcr{q}\t{%cl, $dst|$dst, cl}", []>, Requires<[In64BitMode]>; -} // Uses = [CL, EFLAGS] -} // SchedRW +} // Uses = [CL, EFLAGS], SchedRW +} // mayStore } // hasSideEffects = 0 -let Constraints = "$src1 = $dst", SchedRW = [WriteRotate] in { +let Constraints = "$src1 = $dst" in { // FIXME: provide shorter instructions when imm8 == 1 let Uses = [CL], SchedRW = [WriteRotateCL] in { def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), @@ -468,8 +474,9 @@ def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src1), def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "rol{q}\t{%cl, $dst|$dst, cl}", [(set GR64:$dst, (rotl GR64:$src1, CL))]>; -} +} // Uses, SchedRW +let SchedRW = [WriteRotate] in { def ROL8ri : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, u8imm:$src2), "rol{b}\t{$src2, $dst|$dst, $src2}", [(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))]>; @@ -499,7 +506,8 @@ def ROL32r1 : I<0xD1, MRM0r, (outs GR32:$dst), (ins GR32:$src1), def ROL64r1 : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "rol{q}\t$dst", [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>; -} // Constraints = "$src = $dst", SchedRW +} // SchedRW +} // Constraints = "$src = $dst" let Uses = [CL], SchedRW = [WriteRotateCLLd, WriteRMW] in { def ROL8mCL : I<0xD2, MRM0m, (outs), (ins i8mem :$dst), @@ -515,7 +523,7 @@ def ROL64mCL : RI<0xD3, MRM0m, (outs), (ins i64mem:$dst), "rol{q}\t{%cl, $dst|$dst, cl}", [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)]>, Requires<[In64BitMode]>; -} +} // Uses, SchedRW let SchedRW = [WriteRotateLd, WriteRMW] in { def ROL8mi : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, u8imm:$src1), @@ -552,7 +560,7 @@ def ROL64m1 : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst), Requires<[In64BitMode]>; } // SchedRW -let Constraints = "$src1 = $dst", SchedRW = [WriteRotate] in { +let Constraints = "$src1 = $dst" in { let Uses = [CL], SchedRW = [WriteRotateCL] in { def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), "ror{b}\t{%cl, $dst|$dst, cl}", @@ -568,6 +576,7 @@ def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src1), [(set GR64:$dst, (rotr GR64:$src1, CL))]>; } +let SchedRW = [WriteRotate] in { def ROR8ri : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, u8imm:$src2), "ror{b}\t{$src2, $dst|$dst, $src2}", [(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))]>; @@ -597,6 +606,7 @@ def ROR32r1 : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1), def ROR64r1 : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "ror{q}\t$dst", [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))]>; +} // SchedRW } // Constraints = "$src = $dst", SchedRW let Uses = [CL], SchedRW = [WriteRotateCLLd, WriteRMW] in { @@ -613,7 +623,7 @@ def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst), "ror{q}\t{%cl, $dst|$dst, cl}", [(store (rotr (loadi64 addr:$dst), CL), addr:$dst)]>, Requires<[In64BitMode]>; -} +} // Uses, SchedRW let SchedRW = [WriteRotateLd, WriteRMW] in { def ROR8mi : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, u8imm:$src), @@ -688,7 +698,7 @@ def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst), "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, cl}", [(set GR64:$dst, (fshr GR64:$src2, GR64:$src1, CL))]>, TB; -} // SchedRW +} // Uses, SchedRW let isCommutable = 1, SchedRW = [WriteSHDrri] in { // These instructions commute to each other. def SHLD16rri8 : Ii8<0xA4, MRMDestReg, @@ -763,7 +773,7 @@ def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, cl}", [(store (fshr GR64:$src2, (loadi64 addr:$dst), CL), addr:$dst)]>, TB; -} // SchedRW +} // Uses, SchedRW let SchedRW = [WriteSHDmri] in { def SHLD16mri8 : Ii8<0xA4, MRMDestMem, diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 35a776941485a..01c84048c60ba 100644 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -367,21 +367,21 @@ defm : BWWriteResPair; defm : X86WriteResPairUnsupported; defm : BWWriteResPair; -defm : BWWriteResPair; -defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; defm : X86WriteResPairUnsupported; defm : BWWriteResPair; -defm : BWWriteResPair; -defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; defm : X86WriteResPairUnsupported; -defm : BWWriteResPair; -defm : BWWriteResPair; -defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; defm : X86WriteResPairUnsupported; -defm : BWWriteResPair; -defm : BWWriteResPair; -defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; defm : X86WriteResPairUnsupported; defm : X86WriteRes; @@ -715,14 +715,6 @@ def: InstRW<[BWWriteResGroup14], (instrs LFENCE, WAIT, XGETBV)>; -def BWWriteResGroup15 : SchedWriteRes<[BWPort0,BWPort5]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[BWWriteResGroup15], (instregex "(V?)CVTPS2PDrr", - "(V?)CVTSS2SDrr")>; - def BWWriteResGroup16 : SchedWriteRes<[BWPort6,BWPort0156]> { let Latency = 2; let NumMicroOps = 2; @@ -784,9 +776,7 @@ def BWWriteResGroup27 : SchedWriteRes<[BWPort1]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[BWWriteResGroup27], (instrs MMX_CVTPI2PSrr)>; -def: InstRW<[BWWriteResGroup27], (instregex "P(DEP|EXT)(32|64)rr", - "(V?)CVTDQ2PS(Y?)rr")>; +def: InstRW<[BWWriteResGroup27], (instregex "P(DEP|EXT)(32|64)rr")>; def BWWriteResGroup28 : SchedWriteRes<[BWPort5]> { let Latency = 3; @@ -858,13 +848,6 @@ def: InstRW<[BWWriteResGroup39], (instregex "(V?)CVT(T?)SD2SI64rr", "(V?)CVT(T?)SS2SI64rr", "(V?)CVT(T?)SS2SIrr")>; -def BWWriteResGroup40 : SchedWriteRes<[BWPort0,BWPort5]> { - let Latency = 4; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[BWWriteResGroup40], (instrs VCVTPS2PDYrr)>; - def BWWriteResGroup41 : SchedWriteRes<[BWPort0,BWPort0156]> { let Latency = 4; let NumMicroOps = 2; @@ -877,12 +860,8 @@ def BWWriteResGroup42 : SchedWriteRes<[BWPort1,BWPort5]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[BWWriteResGroup42], (instrs MMX_CVTPI2PDrr)>; def: InstRW<[BWWriteResGroup42], (instregex "MMX_CVT(T?)PD2PIrr", "MMX_CVT(T?)PS2PIrr", - "(V?)CVTDQ2PDrr", - "(V?)CVTPD2PSrr", - "(V?)CVTSD2SSrr", "(V?)CVTSI642SDrr", "(V?)CVTSI2SDrr", "(V?)CVTSI2SSrr", @@ -929,8 +908,6 @@ def BWWriteResGroup49 : SchedWriteRes<[BWPort23]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[BWWriteResGroup49], (instregex "MOVSX(16|32|64)rm(8|16|32)", - "MOVZX(16|32|64)rm(8|16)")>; def: InstRW<[BWWriteResGroup49], (instrs VBROADCASTSSrm, VMOVDDUPrm, MOVDDUPrm, VMOVSHDUPrm, MOVSHDUPrm, @@ -1005,9 +982,7 @@ def BWWriteResGroup60 : SchedWriteRes<[BWPort1,BWPort5]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[BWWriteResGroup60], (instrs VCVTDQ2PDYrr, - VCVTPD2PSYrr, - VCVTPD2DQYrr, +def: InstRW<[BWWriteResGroup60], (instrs VCVTPD2DQYrr, VCVTTPD2DQYrr)>; def BWWriteResGroup62 : SchedWriteRes<[BWPort6,BWPort23]> { @@ -1170,9 +1145,6 @@ def BWWriteResGroup91 : SchedWriteRes<[BWPort1,BWPort23]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[BWWriteResGroup91], (instrs MMX_CVTPI2PSrm, - CVTDQ2PSrm, - VCVTDQ2PSrm)>; def: InstRW<[BWWriteResGroup91], (instregex "P(DEP|EXT)(32|64)rm")>; def BWWriteResGroup92 : SchedWriteRes<[BWPort5,BWPort23]> { @@ -1236,25 +1208,14 @@ def: InstRW<[BWWriteResGroup105], (instregex "(V?)CVTSS2SI(64)?rm", "VCVTTSS2SI64rm", "(V?)CVTTSS2SIrm")>; -def BWWriteResGroup106 : SchedWriteRes<[BWPort0,BWPort5,BWPort23]> { - let Latency = 9; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[BWWriteResGroup106], (instrs VCVTPS2PDYrm)>; - def BWWriteResGroup107 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> { let Latency = 9; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[BWWriteResGroup107], (instrs CVTPD2PSrm, - CVTPD2DQrm, - CVTTPD2DQrm, - MMX_CVTPI2PDrm)>; -def: InstRW<[BWWriteResGroup107], (instregex "MMX_CVT(T?)PD2PIrm", - "(V?)CVTDQ2PDrm", - "(V?)CVTSD2SSrm")>; +def: InstRW<[BWWriteResGroup107], (instrs CVTPD2DQrm, VCVTPD2DQrm, + CVTTPD2DQrm, VCVTTPD2DQrm)>; +def: InstRW<[BWWriteResGroup107], (instregex "MMX_CVT(T?)PD2PIrm")>; def BWWriteResGroup108 : SchedWriteRes<[BWPort5,BWPort23,BWPort015]> { let Latency = 9; @@ -1315,13 +1276,6 @@ def BWWriteResGroup123 : SchedWriteRes<[BWPort0,BWPort23]> { def: InstRW<[BWWriteResGroup123], (instregex "MUL_F(32|64)m")>; def: InstRW<[BWWriteResGroup123], (instrs VPCMPGTQYrm)>; -def BWWriteResGroup128 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> { - let Latency = 11; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[BWWriteResGroup128], (instrs VCVTDQ2PDYrm)>; - def BWWriteResGroup131 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> { let Latency = 11; let NumMicroOps = 7; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index d1d385bfaf49f..bd4cbe2469693 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -360,27 +360,27 @@ defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; -defm : HWWriteResPair; -defm : HWWriteResPair; -defm : HWWriteResPair; // Unsupported = 1 +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; -defm : HWWriteResPair; -defm : HWWriteResPair; -defm : HWWriteResPair; // Unsupported = 1 +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; -defm : HWWriteResPair; -defm : HWWriteResPair; -defm : HWWriteResPair; // Unsupported = 1 - -defm : HWWriteResPair; -defm : HWWriteResPair; -defm : HWWriteResPair; -defm : HWWriteResPair; // Unsupported = 1 -defm : HWWriteResPair; -defm : HWWriteResPair; -defm : HWWriteResPair; -defm : HWWriteResPair; // Unsupported = 1 +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 + +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : X86WriteRes; defm : X86WriteRes; @@ -864,6 +864,7 @@ def HWWriteResGroup0 : SchedWriteRes<[HWPort23]> { def: InstRW<[HWWriteResGroup0], (instrs VBROADCASTSSrm)>; def: InstRW<[HWWriteResGroup0], (instregex "(V?)MOVSHDUPrm", "(V?)MOVSLDUPrm", + "(V?)MOVDDUPrm", "VPBROADCAST(D|Q)rm")>; def HWWriteResGroup0_1 : SchedWriteRes<[HWPort23]> { @@ -881,15 +882,6 @@ def: InstRW<[HWWriteResGroup0_1], (instrs VBROADCASTF128, def: InstRW<[HWWriteResGroup0_1], (instregex "LD_F(32|64|80)m", "VPBROADCAST(D|Q)Yrm")>; -def HWWriteResGroup0_2 : SchedWriteRes<[HWPort23]> { - let Latency = 5; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[HWWriteResGroup0_2], (instregex "MOVSX(16|32|64)rm(8|16|32)", - "MOVZX(16|32|64)rm(8|16)", - "(V?)MOVDDUPrm")>; - def HWWriteResGroup1 : SchedWriteRes<[HWPort4,HWPort237]> { let Latency = 1; let NumMicroOps = 2; @@ -1131,14 +1123,6 @@ def: InstRW<[HWWriteResGroup30], (instrs LFENCE, WAIT, XGETBV)>; -def HWWriteResGroup31 : SchedWriteRes<[HWPort0,HWPort5]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[HWWriteResGroup31], (instregex "(V?)CVTPS2PDrr", - "(V?)CVTSS2SDrr")>; - def HWWriteResGroup32 : SchedWriteRes<[HWPort6,HWPort0156]> { let Latency = 2; let NumMicroOps = 2; @@ -1241,9 +1225,7 @@ def HWWriteResGroup50 : SchedWriteRes<[HWPort1]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[HWWriteResGroup50], (instrs MMX_CVTPI2PSrr)>; -def: InstRW<[HWWriteResGroup50], (instregex "P(DEP|EXT)(32|64)rr", - "(V?)CVTDQ2PS(Y?)rr")>; +def: InstRW<[HWWriteResGroup50], (instregex "P(DEP|EXT)(32|64)rr")>; def HWWriteResGroup51 : SchedWriteRes<[HWPort5]> { let Latency = 3; @@ -1267,8 +1249,7 @@ def HWWriteResGroup52_1 : SchedWriteRes<[HWPort1,HWPort23]> { } def: InstRW<[HWWriteResGroup52_1], (instregex "(ADD|SUB|SUBR)_F(32|64)m", "ILD_F(16|32|64)m")>; -def: InstRW<[HWWriteResGroup52_1], (instrs VCVTDQ2PSYrm, - VCVTPS2DQYrm, +def: InstRW<[HWWriteResGroup52_1], (instrs VCVTPS2DQYrm, VCVTTPS2DQYrm)>; def HWWriteResGroup53_1 : SchedWriteRes<[HWPort5,HWPort23]> { @@ -1369,13 +1350,6 @@ def HWWriteResGroup70 : SchedWriteRes<[HWPort0,HWPort1]> { def: InstRW<[HWWriteResGroup70], (instregex "(V?)CVT(T?)SD2SI(64)?rr", "(V?)CVT(T?)SS2SI(64)?rr")>; -def HWWriteResGroup71 : SchedWriteRes<[HWPort0,HWPort5]> { - let Latency = 4; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[HWWriteResGroup71], (instrs VCVTPS2PDYrr)>; - def HWWriteResGroup72 : SchedWriteRes<[HWPort0,HWPort0156]> { let Latency = 4; let NumMicroOps = 2; @@ -1388,15 +1362,11 @@ def HWWriteResGroup73 : SchedWriteRes<[HWPort1,HWPort5]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[HWWriteResGroup73], (instrs MMX_CVTPI2PDrr, - MMX_CVTPD2PIrr, +def: InstRW<[HWWriteResGroup73], (instrs MMX_CVTPD2PIrr, MMX_CVTPS2PIrr, MMX_CVTTPD2PIrr, MMX_CVTTPS2PIrr)>; -def: InstRW<[HWWriteResGroup73], (instregex "(V?)CVTDQ2PDrr", - "(V?)CVTPD2PSrr", - "(V?)CVTSD2SSrr", - "(V?)CVTSI(64)?2SDrr", +def: InstRW<[HWWriteResGroup73], (instregex "(V?)CVTSI(64)?2SDrr", "(V?)CVTSI2SSrr", "(V?)CVT(T?)PD2DQrr")>; @@ -1418,34 +1388,22 @@ def: InstRW<[HWWriteResGroup76], (instregex "(V?)CVTSD2SI(64)?rm", "VCVTTSS2SI64rm", "(V?)CVTTSS2SIrm")>; -def HWWriteResGroup77 : SchedWriteRes<[HWPort0,HWPort5,HWPort23]> { - let Latency = 10; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[HWWriteResGroup77], (instrs VCVTPS2PDYrm)>; - def HWWriteResGroup78 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> { let Latency = 10; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[HWWriteResGroup78], (instrs CVTPD2PSrm, - CVTPD2DQrm, - CVTTPD2DQrm, +def: InstRW<[HWWriteResGroup78], (instrs CVTPD2DQrm, VCVTPD2DQrm, + CVTTPD2DQrm, VCVTTPD2DQrm, MMX_CVTPD2PIrm, - MMX_CVTTPD2PIrm, - CVTDQ2PDrm, - VCVTDQ2PDrm)>; + MMX_CVTTPD2PIrm)>; def HWWriteResGroup78_1 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> { let Latency = 9; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[HWWriteResGroup78_1], (instrs MMX_CVTPI2PDrm, - CVTSD2SSrm, CVTSD2SSrm_Int, - VCVTSD2SSrm, VCVTSD2SSrm_Int)>; +def: InstRW<[HWWriteResGroup78_1], (instrs MMX_CVTPI2PDrm)>; def HWWriteResGroup80 : SchedWriteRes<[HWPort5,HWPort23,HWPort015]> { let Latency = 9; @@ -1552,9 +1510,7 @@ def HWWriteResGroup102 : SchedWriteRes<[HWPort1,HWPort5]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[HWWriteResGroup102], (instrs VCVTDQ2PDYrr, - VCVTPD2PSYrr, - VCVTPD2DQYrr, +def: InstRW<[HWWriteResGroup102], (instrs VCVTPD2DQYrr, VCVTTPD2DQYrr)>; def HWWriteResGroup103 : SchedWriteRes<[HWPort1,HWPort23]> { @@ -1564,13 +1520,6 @@ def HWWriteResGroup103 : SchedWriteRes<[HWPort1,HWPort23]> { } def: InstRW<[HWWriteResGroup103], (instregex "(ADD|SUB|SUBR)_FI(16|32)m")>; -def HWWriteResGroup104 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> { - let Latency = 12; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[HWWriteResGroup104], (instrs VCVTDQ2PDYrm)>; - def HWWriteResGroup107 : SchedWriteRes<[HWPort1,HWPort6,HWPort06,HWPort0156]> { let Latency = 6; let NumMicroOps = 4; diff --git a/llvm/lib/Target/X86/X86SchedIceLake.td b/llvm/lib/Target/X86/X86SchedIceLake.td index 7a1e6c06c8857..331fafa6d2fe3 100644 --- a/llvm/lib/Target/X86/X86SchedIceLake.td +++ b/llvm/lib/Target/X86/X86SchedIceLake.td @@ -473,10 +473,10 @@ defm : ICXWriteResPair; defm : ICXWriteResPair; defm : ICXWriteResPair; defm : ICXWriteResPair; -defm : ICXWriteResPair; -defm : ICXWriteResPair; -defm : ICXWriteResPair; -defm : ICXWriteResPair; +defm : ICXWriteResPair; +defm : ICXWriteResPair; +defm : ICXWriteResPair; +defm : ICXWriteResPair; defm : X86WriteRes; defm : X86WriteRes; @@ -1066,15 +1066,6 @@ def ICXWriteResGroup57 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort0156]> { } def: InstRW<[ICXWriteResGroup57], (instregex "LAR(16|32|64)rr")>; -def ICXWriteResGroup58 : SchedWriteRes<[ICXPort23]> { - let Latency = 5; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[ICXWriteResGroup58], (instregex "MOVSX(16|32|64)rm(8|16|32)", - "MOVZX(16|32|64)rm(8|16)", - "(V?)MOVDDUPrm")>; // TODO: Should this be ICXWriteResGroup71? - def ICXWriteResGroup61 : SchedWriteRes<[ICXPort5,ICXPort015]> { let Latency = 5; let NumMicroOps = 2; @@ -1085,15 +1076,12 @@ def: InstRW<[ICXWriteResGroup61], (instregex "MMX_CVT(T?)PD2PIrr", "VCVTDQ2PDZ128rr", "VCVTPD2DQZ128rr", "(V?)CVT(T?)PD2DQrr", - "VCVTPD2PSZ128rr", - "(V?)CVTPD2PSrr", "VCVTPD2UDQZ128rr", "VCVTPS2PDZ128rr", "(V?)CVTPS2PDrr", "VCVTPS2QQZ128rr", "VCVTPS2UQQZ128rr", "VCVTQQ2PSZ128rr", - "(V?)CVTSD2SS(Z?)rr", "(V?)CVTSI(64)?2SDrr", "VCVTSI2SSZrr", "(V?)CVTSI2SSrr", @@ -1179,8 +1167,10 @@ def: InstRW<[ICXWriteResGroup71], (instrs VBROADCASTSSrm, VPBROADCASTQrm, VMOVSHDUPrm, VMOVSLDUPrm, + VMOVDDUPrm, MOVSHDUPrm, - MOVSLDUPrm)>; + MOVSLDUPrm, + MOVDDUPrm)>; def ICXWriteResGroup72 : SchedWriteRes<[ICXPort5]> { let Latency = 6; @@ -1336,7 +1326,6 @@ def ICXWriteResGroup93 : SchedWriteRes<[ICXPort5,ICXPort015]> { } def: InstRW<[ICXWriteResGroup93], (instregex "VCVTDQ2PDZ256rr", "VCVTPD2DQ(Y|Z256)rr", - "VCVTPD2PS(Y|Z256)rr", "VCVTPD2UDQZ256rr", "VCVTPS2PD(Y|Z256)rr", "VCVTPS2QQZ256rr", @@ -1356,7 +1345,6 @@ def ICXWriteResGroup93z : SchedWriteRes<[ICXPort5,ICXPort05]> { } def: InstRW<[ICXWriteResGroup93z], (instrs VCVTDQ2PDZrr, VCVTPD2DQZrr, - VCVTPD2PSZrr, VCVTPD2UDQZrr, VCVTPS2PDZrr, VCVTPS2QQZrr, @@ -1870,13 +1858,6 @@ def: InstRW<[ICXWriteResGroup151], (instregex "VEXPANDPDZ128rm(b?)", "VPEXPANDDZ128rm(b?)", "VPEXPANDQZ128rm(b?)")>; -def ICXWriteResGroup153 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> { - let Latency = 10; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[ICXWriteResGroup153], (instregex "(V?)CVTSD2SSrm")>; - def ICXWriteResGroup154 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23]> { let Latency = 10; let NumMicroOps = 4; @@ -1933,13 +1914,6 @@ def: InstRW<[ICXWriteResGroup162], (instregex "FICOM(P?)(16|32)m", "VPEXPANDD(Z|Z256)rm(b?)", "VPEXPANDQ(Z|Z256)rm(b?)")>; -def ICXWriteResGroup163 : SchedWriteRes<[ICXPort23,ICXPort015]> { - let Latency = 11; - let NumMicroOps = 3; - let ResourceCycles = [1,2]; -} -def: InstRW<[ICXWriteResGroup163], (instregex "VCVTSD2SSZrm")>; - def ICXWriteResGroup164 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> { let Latency = 11; let NumMicroOps = 3; @@ -1952,8 +1926,7 @@ def ICXWriteResGroup166 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> { let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[ICXWriteResGroup166], (instrs CVTPD2PSrm, - CVTPD2DQrm, +def: InstRW<[ICXWriteResGroup166], (instrs CVTPD2DQrm, CVTTPD2DQrm, MMX_CVTPD2PIrm, MMX_CVTTPD2PIrm)>; @@ -2068,7 +2041,6 @@ def ICXWriteResGroup188 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> { let ResourceCycles = [1,1,1]; } def: InstRW<[ICXWriteResGroup188], (instregex "VCVTPD2DQZrm(b?)", - "VCVTPD2PSZrm(b?)", "VCVTPD2UDQZrm(b?)", "VCVTQQ2PSZrm(b?)", "VCVTTPD2DQZrm(b?)", diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index 0f90036eb38e3..9d7069a277eb6 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -736,14 +736,6 @@ def SBWriteResGroup29_2 : SchedWriteRes<[SBPort5,SBPort015]> { } def: InstRW<[SBWriteResGroup29_2], (instrs PAUSE)>; -def SBWriteResGroup31 : SchedWriteRes<[SBPort23]> { - let Latency = 5; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup31], (instregex "MOVSX(16|32|64)rm(8|16|32)", - "MOVZX(16|32|64)rm(8|16)")>; - def SBWriteResGroup76 : SchedWriteRes<[SBPort05]> { let Latency = 5; let NumMicroOps = 8; @@ -1002,13 +994,6 @@ def SBWriteResGroup87 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> { } def: InstRW<[SBWriteResGroup87], (instrs FARCALL64m)>; -def SBWriteResGroup93 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { - let Latency = 9; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup93], (instregex "CVT(T?)(SD|SS)2SI(64)?rm")>; - def SBWriteResGroup95 : SchedWriteRes<[SBPort5,SBPort01,SBPort23]> { let Latency = 9; let NumMicroOps = 3; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index ba245bb6358b5..e92a5a87c7da6 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -456,17 +456,17 @@ defm : SKLWriteResPair; defm : SKLWriteResPair; defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; -defm : SKLWriteResPair; -defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; defm : SKLWriteResPair; defm : SKLWriteResPair; defm : X86WriteResPairUnsupported; -defm : SKLWriteResPair; -defm : SKLWriteResPair; -defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; defm : X86WriteResPairUnsupported; defm : X86WriteRes; @@ -928,23 +928,6 @@ def SKLWriteResGroup57 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort0156]> { } def: InstRW<[SKLWriteResGroup57], (instregex "LAR(16|32|64)rr")>; -def SKLWriteResGroup58 : SchedWriteRes<[SKLPort23]> { - let Latency = 5; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SKLWriteResGroup58], (instregex "MOVSX(16|32|64)rm(8|16|32)", - "MOVZX(16|32|64)rm(8|16)")>; - -def SKLWriteResGroup59 : SchedWriteRes<[SKLPort0,SKLPort5]> { - let Latency = 5; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup59], (instrs MMX_CVTPI2PDrr, - CVTDQ2PDrr, - VCVTDQ2PDrr)>; - def SKLWriteResGroup60 : SchedWriteRes<[SKLPort5,SKLPort015]> { let Latency = 5; let NumMicroOps = 2; @@ -953,9 +936,7 @@ def SKLWriteResGroup60 : SchedWriteRes<[SKLPort5,SKLPort015]> { def: InstRW<[SKLWriteResGroup60], (instregex "MMX_CVT(T?)PD2PIrr", "MMX_CVT(T?)PS2PIrr", "(V?)CVT(T?)PD2DQrr", - "(V?)CVTPD2PSrr", "(V?)CVTPS2PDrr", - "(V?)CVTSD2SSrr", "(V?)CVTSI642SDrr", "(V?)CVTSI2SDrr", "(V?)CVTSI2SSrr", @@ -1032,7 +1013,7 @@ def SKLWriteResGroup70 : SchedWriteRes<[SKLPort0,SKLPort01]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup70], (instregex "(V?)CVTSS2SI(64)?rr", +def: InstRW<[SKLWriteResGroup70], (instregex "(V?)CVT(T?)SS2SIrr", "(V?)CVT(T?)SD2SI(64)?rr")>; def SKLWriteResGroup72 : SchedWriteRes<[SKLPort6,SKLPort23]> { @@ -1113,13 +1094,6 @@ def: InstRW<[SKLWriteResGroup85], (instrs VBROADCASTF128, VPBROADCASTDYrm, VPBROADCASTQYrm)>; -def SKLWriteResGroup86 : SchedWriteRes<[SKLPort0,SKLPort5]> { - let Latency = 7; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup86], (instrs VCVTDQ2PDYrr)>; - def SKLWriteResGroup88 : SchedWriteRes<[SKLPort5,SKLPort23]> { let Latency = 6; let NumMicroOps = 2; @@ -1137,8 +1111,7 @@ def SKLWriteResGroup89 : SchedWriteRes<[SKLPort5,SKLPort01]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup89], (instrs VCVTPD2PSYrr, - VCVTPS2PDYrr, +def: InstRW<[SKLWriteResGroup89], (instrs VCVTPS2PDYrr, VCVTPD2DQYrr, VCVTTPD2DQYrr)>; @@ -1176,7 +1149,7 @@ def SKLWriteResGroup95 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort01]> { let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup95], (instregex "(V?)CVTTSS2SI(64)?rr")>; +def: InstRW<[SKLWriteResGroup95], (instregex "(V?)CVT(T?)SS2SI64?rr")>; def SKLWriteResGroup96 : SchedWriteRes<[SKLPort0,SKLPort23,SKLPort05]> { let Latency = 7; @@ -1362,13 +1335,6 @@ def SKLWriteResGroup138 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { } def: InstRW<[SKLWriteResGroup138], (instrs MMX_CVTPI2PDrm)>; -def SKLWriteResGroup139 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort01]> { - let Latency = 10; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKLWriteResGroup139], (instregex "(V?)CVTSD2SSrm")>; - def SKLWriteResGroup140 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> { let Latency = 10; let NumMicroOps = 4; @@ -1408,13 +1374,6 @@ def SKLWriteResGroup149 : SchedWriteRes<[SKLPort5,SKLPort23]> { } def: InstRW<[SKLWriteResGroup149], (instregex "FICOM(P?)(16|32)m")>; -def SKLWriteResGroup150 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { - let Latency = 11; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKLWriteResGroup150], (instregex "(V?)CVTDQ2PDrm")>; - def SKLWriteResGroup151 : SchedWriteRes<[SKLPort0,SKLPort23,SKLPort01]> { let Latency = 11; let NumMicroOps = 3; @@ -1430,8 +1389,7 @@ def SKLWriteResGroup152 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort01]> { let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup152], (instrs CVTPD2PSrm, - CVTPD2DQrm, +def: InstRW<[SKLWriteResGroup152], (instrs CVTPD2DQrm, CVTTPD2DQrm, MMX_CVTPD2PIrm, MMX_CVTTPD2PIrm)>; @@ -1472,13 +1430,6 @@ def SKLWriteResGroup162 : SchedWriteRes<[SKLPort5,SKLPort23]> { } def: InstRW<[SKLWriteResGroup162], (instregex "(ADD|SUB|SUBR)_FI(16|32)m")>; -def SKLWriteResGroup163 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { - let Latency = 13; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKLWriteResGroup163], (instrs VCVTDQ2PDYrm)>; - def SKLWriteResGroup169 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { let Latency = 14; let NumMicroOps = 3; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 68820cb8bbf21..5ee909b49d098 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -465,10 +465,10 @@ defm : SKXWriteResPair; defm : SKXWriteResPair; defm : SKXWriteResPair; defm : SKXWriteResPair; -defm : SKXWriteResPair; -defm : SKXWriteResPair; -defm : SKXWriteResPair; -defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; defm : X86WriteRes; defm : X86WriteRes; @@ -1048,14 +1048,6 @@ def SKXWriteResGroup57 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort0156]> { } def: InstRW<[SKXWriteResGroup57], (instregex "LAR(16|32|64)rr")>; -def SKXWriteResGroup58 : SchedWriteRes<[SKXPort23]> { - let Latency = 5; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SKXWriteResGroup58], (instregex "MOVSX(16|32|64)rm(8|16|32)", - "MOVZX(16|32|64)rm(8|16)")>; - def SKXWriteResGroup61 : SchedWriteRes<[SKXPort5,SKXPort015]> { let Latency = 5; let NumMicroOps = 2; @@ -1066,15 +1058,12 @@ def: InstRW<[SKXWriteResGroup61], (instregex "MMX_CVT(T?)PD2PIrr", "VCVTDQ2PDZ128rr", "VCVTPD2DQZ128rr", "(V?)CVT(T?)PD2DQrr", - "VCVTPD2PSZ128rr", - "(V?)CVTPD2PSrr", "VCVTPD2UDQZ128rr", "VCVTPS2PDZ128rr", "(V?)CVTPS2PDrr", "VCVTPS2QQZ128rr", "VCVTPS2UQQZ128rr", "VCVTQQ2PSZ128rr", - "(V?)CVTSD2SS(Z?)rr", "(V?)CVTSI(64)?2SDrr", "VCVTSI2SSZrr", "(V?)CVTSI2SSrr", @@ -1322,7 +1311,6 @@ def SKXWriteResGroup93 : SchedWriteRes<[SKXPort5,SKXPort015]> { } def: InstRW<[SKXWriteResGroup93], (instregex "VCVTDQ2PDZ256rr", "VCVTPD2DQ(Y|Z256)rr", - "VCVTPD2PS(Y|Z256)rr", "VCVTPD2UDQZ256rr", "VCVTPS2PD(Y|Z256)rr", "VCVTPS2QQZ256rr", @@ -1342,7 +1330,6 @@ def SKXWriteResGroup93z : SchedWriteRes<[SKXPort5,SKXPort05]> { } def: InstRW<[SKXWriteResGroup93z], (instrs VCVTDQ2PDZrr, VCVTPD2DQZrr, - VCVTPD2PSZrr, VCVTPD2UDQZrr, VCVTPS2PDZrr, VCVTPS2QQZrr, @@ -1851,13 +1838,6 @@ def: InstRW<[SKXWriteResGroup151], (instregex "VEXPANDPDZ128rm(b?)", "VPEXPANDDZ128rm(b?)", "VPEXPANDQZ128rm(b?)")>; -def SKXWriteResGroup153 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { - let Latency = 10; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKXWriteResGroup153], (instregex "(V?)CVTSD2SSrm")>; - def SKXWriteResGroup154 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23]> { let Latency = 10; let NumMicroOps = 4; @@ -1914,13 +1894,6 @@ def: InstRW<[SKXWriteResGroup162], (instregex "FICOM(P?)(16|32)m", "VPEXPANDD(Z|Z256)rm(b?)", "VPEXPANDQ(Z|Z256)rm(b?)")>; -def SKXWriteResGroup163 : SchedWriteRes<[SKXPort23,SKXPort015]> { - let Latency = 11; - let NumMicroOps = 3; - let ResourceCycles = [1,2]; -} -def: InstRW<[SKXWriteResGroup163], (instregex "VCVTSD2SSZrm")>; - def SKXWriteResGroup164 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { let Latency = 11; let NumMicroOps = 3; @@ -1933,8 +1906,7 @@ def SKXWriteResGroup166 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKXWriteResGroup166], (instrs CVTPD2PSrm, - CVTPD2DQrm, +def: InstRW<[SKXWriteResGroup166], (instrs CVTPD2DQrm, CVTTPD2DQrm, MMX_CVTPD2PIrm, MMX_CVTTPD2PIrm)>; @@ -2049,7 +2021,6 @@ def SKXWriteResGroup188 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { let ResourceCycles = [1,1,1]; } def: InstRW<[SKXWriteResGroup188], (instregex "VCVTPD2DQZrm(b?)", - "VCVTPD2PSZrm(b?)", "VCVTPD2UDQZrm(b?)", "VCVTQQ2PSZrm(b?)", "VCVTTPD2DQZrm(b?)", diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index b156396660805..3321ed737a444 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -681,6 +681,9 @@ def SchedWritePSADBW def SchedWriteVecExtend : X86SchedWriteWidths; +def SchedWriteVecTruncate + : X86SchedWriteWidths; def SchedWriteShuffle : X86SchedWriteWidths; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index 78b32953cdf80..705100d85f361 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -177,7 +177,7 @@ def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes; -def : WriteRes { let Latency = 8; } +def : WriteRes { let Latency = 4; } // Model the effect of clobbering the read-write mask operand of the GATHER operation. // Does not cost anything by itself, only has latency, matching that of the WriteLoad, @@ -523,10 +523,6 @@ def : SchedAlias; // r16,m. def : InstRW<[WriteALULd, ReadAfterLd], (instrs MOV16rm)>; -// MOVSX, MOVZX. -// r,m. -def : InstRW<[WriteLoad], (instregex "MOV(S|Z)X32rm(8|16)")>; - // XCHG. // r,m. def ZnWriteXCHGrm : SchedWriteRes<[ZnAGU, ZnALU]> { diff --git a/llvm/lib/Target/X86/X86ScheduleZnver2.td b/llvm/lib/Target/X86/X86ScheduleZnver2.td index d6b0d2dd191a1..87a953cef33a7 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver2.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver2.td @@ -176,7 +176,7 @@ def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes; -def : WriteRes { let Latency = 8; } +def : WriteRes { let Latency = 4; } // Model the effect of clobbering the read-write mask operand of the GATHER operation. // Does not cost anything by itself, only has latency, matching that of the WriteLoad, @@ -522,10 +522,6 @@ def : SchedAlias; // r16,m. def : InstRW<[WriteALULd, ReadAfterLd], (instregex "MOV16rm")>; -// MOVSX, MOVZX. -// r,m. -def : InstRW<[WriteLoad], (instregex "MOV(S|Z)X32rm(8|16)")>; - // XCHG. // r,r. def Zn2WriteXCHG : SchedWriteRes<[Zn2ALU]> { diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index 52d0defae63c9..4ce823f526637 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -729,15 +729,15 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL, if (LoadSize1 < 8 || !isPowerOf2_64(LoadSize1)) return false; - // Alias Analysis to check for store b/w the loads. + // TODO: Alias Analysis to check for stores b/w the loads. + // Currently bail out if there are stores b/w the loads. LoadInst *Start = LI1, *End = LI2; if (!LI1->comesBefore(LI2)) std::swap(Start, End); - MemoryLocation Loc = MemoryLocation::get(End); unsigned NumScanned = 0; for (Instruction &Inst : make_range(Start->getIterator(), End->getIterator())) { - if (Inst.mayWriteToMemory() && isModSet(AA.getModRefInfo(&Inst, Loc))) + if (Inst.mayWriteToMemory()) return false; if (++NumScanned > MaxInstrsToScan) return false; diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 2935127dcbc48..e8d7fe2212251 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -457,7 +457,7 @@ static bool getPotentialCopiesOfMemoryValue( auto &PI = A.getAAFor(QueryingAA, IRPosition::value(*Obj), DepClassTy::NONE); if (!PI.forallInterferingAccesses(A, QueryingAA, I, CheckAccess, - HasBeenWrittenTo, OAS)) { + HasBeenWrittenTo, &OAS)) { LLVM_DEBUG( dbgs() << "Failed to verify all interfering accesses for underlying object: " diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 0b446a4f74078..0514e503fc6b4 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -757,6 +757,13 @@ struct AccessAsInstructionInfo : DenseMapInfo { /// A type to track pointer/struct usage and accesses for AAPointerInfo. struct AA::PointerInfo::State : public AbstractState { + + ~State() { + // We do not delete the Accesses objects but need to destroy them still. + for (auto &It : AccessBins) + It.second->~Accesses(); + } + /// Return the best possible representable state. static State getBestState(const State &SIS) { return State(); } @@ -768,7 +775,9 @@ struct AA::PointerInfo::State : public AbstractState { } State() = default; - State(State &&SIS) = default; + State(State &&SIS) : AccessBins(std::move(SIS.AccessBins)) { + SIS.AccessBins.clear(); + } const State &getAssumed() const { return *this; } @@ -794,9 +803,7 @@ struct AA::PointerInfo::State : public AbstractState { if (this == &R) return *this; BS = R.BS; - AccessList = R.AccessList; - OffsetBins = R.OffsetBins; - RemoteIMap = R.RemoteIMap; + AccessBins = R.AccessBins; return *this; } @@ -804,52 +811,99 @@ struct AA::PointerInfo::State : public AbstractState { if (this == &R) return *this; std::swap(BS, R.BS); - std::swap(AccessList, R.AccessList); - std::swap(OffsetBins, R.OffsetBins); - std::swap(RemoteIMap, R.RemoteIMap); + std::swap(AccessBins, R.AccessBins); return *this; } - /// Add a new Access to the state at offset \p Offset and with size \p Size. + bool operator==(const State &R) const { + if (BS != R.BS) + return false; + if (AccessBins.size() != R.AccessBins.size()) + return false; + auto It = begin(), RIt = R.begin(), E = end(); + while (It != E) { + if (It->getFirst() != RIt->getFirst()) + return false; + auto &Accs = It->getSecond(); + auto &RAccs = RIt->getSecond(); + if (Accs->size() != RAccs->size()) + return false; + for (const auto &ZipIt : llvm::zip(*Accs, *RAccs)) + if (std::get<0>(ZipIt) != std::get<1>(ZipIt)) + return false; + ++It; + ++RIt; + } + return true; + } + bool operator!=(const State &R) const { return !(*this == R); } + + /// We store accesses in a set with the instruction as key. + struct Accesses { + SmallVector Accesses; + DenseMap Map; + + unsigned size() const { return Accesses.size(); } + + using vec_iterator = decltype(Accesses)::iterator; + vec_iterator begin() { return Accesses.begin(); } + vec_iterator end() { return Accesses.end(); } + + using iterator = decltype(Map)::const_iterator; + iterator find(AAPointerInfo::Access &Acc) { + return Map.find(Acc.getRemoteInst()); + } + iterator find_end() { return Map.end(); } + + AAPointerInfo::Access &get(iterator &It) { + return Accesses[It->getSecond()]; + } + + void insert(AAPointerInfo::Access &Acc) { + Map[Acc.getRemoteInst()] = Accesses.size(); + Accesses.push_back(Acc); + } + }; + + /// We store all accesses in bins denoted by their offset and size. + using AccessBinsTy = DenseMap; + + AccessBinsTy::const_iterator begin() const { return AccessBins.begin(); } + AccessBinsTy::const_iterator end() const { return AccessBins.end(); } + +protected: + /// The bins with all the accesses for the associated pointer. + AccessBinsTy AccessBins; + + /// Add a new access to the state at offset \p Offset and with size \p Size. /// The access is associated with \p I, writes \p Content (if anything), and - /// is of kind \p Kind. If an Access already exists for the same \p I and same - /// \p RemoteI, the two are combined, potentially losing information about - /// offset and size. The resulting access must now be moved from its original - /// OffsetBin to the bin for its new offset. - /// + /// is of kind \p Kind. /// \Returns CHANGED, if the state changed, UNCHANGED otherwise. ChangeStatus addAccess(Attributor &A, int64_t Offset, int64_t Size, Instruction &I, Optional Content, AAPointerInfo::AccessKind Kind, Type *Ty, - Instruction *RemoteI = nullptr); - - using OffsetBinsTy = DenseMap>; - - using const_bin_iterator = OffsetBinsTy::const_iterator; - const_bin_iterator begin() const { return OffsetBins.begin(); } - const_bin_iterator end() const { return OffsetBins.end(); } - - const AAPointerInfo::Access &getAccess(unsigned Index) const { - return AccessList[Index]; + Instruction *RemoteI = nullptr, + Accesses *BinPtr = nullptr) { + AA::OffsetAndSize Key{Offset, Size}; + Accesses *&Bin = BinPtr ? BinPtr : AccessBins[Key]; + if (!Bin) + Bin = new (A.Allocator) Accesses; + AAPointerInfo::Access Acc(&I, RemoteI ? RemoteI : &I, Content, Kind, Ty); + // Check if we have an access for this instruction in this bin, if not, + // simply add it. + auto It = Bin->find(Acc); + if (It == Bin->find_end()) { + Bin->insert(Acc); + return ChangeStatus::CHANGED; + } + // If the existing access is the same as then new one, nothing changed. + AAPointerInfo::Access &Current = Bin->get(It); + AAPointerInfo::Access Before = Current; + // The new one will be combined with the existing one. + Current &= Acc; + return Current == Before ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED; } -protected: - // Every memory instruction results in an Access object. We maintain a list of - // all Access objects that we own, along with the following maps: - // - // - OffsetBins: OffsetAndSize -> { Access } - // - RemoteIMap: RemoteI x LocalI -> Access - // - // A RemoteI is any instruction that accesses memory. RemoteI is different - // from LocalI if and only if LocalI is a call; then RemoteI is some - // instruction in the callgraph starting from LocalI. Multiple paths in the - // callgraph from LocalI to RemoteI may produce multiple accesses, but these - // are all combined into a single Access object. This may result in loss of - // information in OffsetAndSize in the Access object. - SmallVector AccessList; - OffsetBinsTy OffsetBins; - DenseMap> RemoteIMap; - /// See AAPointerInfo::forallInterferingAccesses. bool forallInterferingAccesses( AA::OffsetAndSize OAS, @@ -857,16 +911,14 @@ struct AA::PointerInfo::State : public AbstractState { if (!isValidState()) return false; - for (const auto &It : OffsetBins) { + for (const auto &It : AccessBins) { AA::OffsetAndSize ItOAS = It.getFirst(); if (!OAS.mayOverlap(ItOAS)) continue; bool IsExact = OAS == ItOAS && !OAS.offsetOrSizeAreUnknown(); - for (auto Index : It.getSecond()) { - auto &Access = AccessList[Index]; + for (auto &Access : *It.getSecond()) if (!CB(Access, IsExact)) return false; - } } return true; } @@ -875,19 +927,32 @@ struct AA::PointerInfo::State : public AbstractState { bool forallInterferingAccesses( Instruction &I, function_ref CB, - AA::OffsetAndSize &OAS) const { + AA::OffsetAndSize *OASPtr) const { if (!isValidState()) return false; - auto LocalList = RemoteIMap.find(&I); - if (LocalList == RemoteIMap.end()) { - return true; + // First find the offset and size of I. + AA::OffsetAndSize OAS; + for (const auto &It : AccessBins) { + for (auto &Access : *It.getSecond()) { + if (Access.getRemoteInst() == &I) { + OAS = It.getFirst(); + break; + } + } + if (OAS.Size != AA::OffsetAndSize::Unassigned) + break; } - for (auto LI : LocalList->getSecond()) { - auto &Access = AccessList[LI]; - OAS &= {Access.getOffset(), Access.getSize()}; - } + if (OASPtr) + *OASPtr = OAS; + + // No access for I was found, we are done. + if (OAS.Size == AA::OffsetAndSize::Unassigned) + return true; + + // Now that we have an offset and size, find all overlapping ones and use + // the callback on the accesses. return forallInterferingAccesses(OAS, CB); } @@ -896,56 +961,6 @@ struct AA::PointerInfo::State : public AbstractState { BooleanState BS; }; -ChangeStatus AA::PointerInfo::State::addAccess(Attributor &A, int64_t Offset, - int64_t Size, Instruction &I, - Optional Content, - AAPointerInfo::AccessKind Kind, - Type *Ty, Instruction *RemoteI) { - RemoteI = RemoteI ? RemoteI : &I; - AAPointerInfo::Access Acc(&I, RemoteI, Offset, Size, Content, Kind, Ty); - - // Check if we have an access for this instruction, if not, simply add it. - auto &LocalList = RemoteIMap[RemoteI]; - bool AccExists = false; - unsigned AccIndex = AccessList.size(); - for (auto Index : LocalList) { - auto &A = AccessList[Index]; - if (A.getLocalInst() == &I) { - AccExists = true; - AccIndex = Index; - break; - } - } - if (!AccExists) { - AccessList.push_back(Acc); - LocalList.push_back(AccIndex); - } else { - // The new one will be combined with the existing one. - auto &Current = AccessList[AccIndex]; - auto Before = Current; - Current &= Acc; - if (Current == Before) - return ChangeStatus::UNCHANGED; - - Acc = Current; - AA::OffsetAndSize Key{Before.getOffset(), Before.getSize()}; - assert(OffsetBins.count(Key) && "Existing Access must be in some bin."); - auto &Bin = OffsetBins[Key]; - assert(Bin.count(AccIndex) && - "Expected bin to actually contain the Access."); - LLVM_DEBUG(dbgs() << "[AAPointerInfo] Removing Access " - << AccessList[AccIndex] << " with key {" << Key.Offset - << ',' << Key.Size << "}\n"); - Bin.erase(AccIndex); - } - - AA::OffsetAndSize Key{Acc.getOffset(), Acc.getSize()}; - LLVM_DEBUG(dbgs() << "[AAPointerInfo] Inserting Access " << Acc - << " with key {" << Key.Offset << ',' << Key.Size << "}\n"); - OffsetBins[Key].insert(AccIndex); - return ChangeStatus::CHANGED; -} - namespace { struct AAPointerInfoImpl : public StateWrapper { @@ -956,7 +971,7 @@ struct AAPointerInfoImpl const std::string getAsStr() const override { return std::string("PointerInfo ") + (isValidState() ? (std::string("#") + - std::to_string(OffsetBins.size()) + " bins") + std::to_string(AccessBins.size()) + " bins") : ""); } @@ -975,7 +990,7 @@ struct AAPointerInfoImpl bool forallInterferingAccesses( Attributor &A, const AbstractAttribute &QueryingAA, Instruction &I, function_ref UserCB, bool &HasBeenWrittenTo, - AA::OffsetAndSize &OAS) const override { + AA::OffsetAndSize *OASPtr = nullptr) const override { HasBeenWrittenTo = false; SmallPtrSet DominatingWrites; @@ -1090,7 +1105,7 @@ struct AAPointerInfoImpl InterferingAccesses.push_back({&Acc, Exact}); return true; }; - if (!State::forallInterferingAccesses(I, AccessCB, OAS)) + if (!State::forallInterferingAccesses(I, AccessCB, OASPtr)) return false; if (HasBeenWrittenTo) { @@ -1157,15 +1172,14 @@ struct AAPointerInfoImpl // Combine the accesses bin by bin. ChangeStatus Changed = ChangeStatus::UNCHANGED; - const auto &State = OtherAAImpl.getState(); - for (const auto &It : State) { + for (const auto &It : OtherAAImpl.getState()) { AA::OffsetAndSize OAS = AA::OffsetAndSize::getUnknown(); if (Offset != AA::OffsetAndSize::Unknown && !It.first.offsetOrSizeAreUnknown()) { OAS = AA::OffsetAndSize(It.first.Offset + Offset, It.first.Size); } - for (auto Index : It.getSecond()) { - const auto &RAcc = State.getAccess(Index); + Accesses *Bin = AccessBins.lookup(OAS); + for (const AAPointerInfo::Access &RAcc : *It.second) { if (IsByval && !RAcc.isRead()) continue; bool UsedAssumedInformation = false; @@ -1178,8 +1192,9 @@ struct AAPointerInfoImpl AccessKind(AK & (IsByval ? AccessKind::AK_R : AccessKind::AK_RW)); AK = AccessKind(AK | (RAcc.isMayAccess() ? AK_MAY : AK_MUST)); } - Changed = Changed | addAccess(A, OAS.Offset, OAS.Size, CB, Content, AK, - RAcc.getType(), RAcc.getRemoteInst()); + Changed = + Changed | addAccess(A, OAS.Offset, OAS.Size, CB, Content, AK, + RAcc.getType(), RAcc.getRemoteInst(), Bin); } } return Changed; @@ -1191,11 +1206,10 @@ struct AAPointerInfoImpl /// Dump the state into \p O. void dumpState(raw_ostream &O) { - for (auto &It : OffsetBins) { + for (auto &It : AccessBins) { O << "[" << It.first.Offset << "-" << It.first.Offset + It.first.Size - << "] : " << It.getSecond().size() << "\n"; - for (auto AccIndex : It.getSecond()) { - auto &Acc = AccessList[AccIndex]; + << "] : " << It.getSecond()->size() << "\n"; + for (auto &Acc : *It.getSecond()) { O << " - " << Acc.getKind() << " - " << *Acc.getLocalInst() << "\n"; if (Acc.getLocalInst() != Acc.getRemoteInst()) O << " --> " << *Acc.getRemoteInst() @@ -1516,15 +1530,13 @@ struct AAPointerInfoCallSiteArgument final : AAPointerInfoFloating { AAPointerInfoCallSiteArgument(const IRPosition &IRP, Attributor &A) : AAPointerInfoFloating(IRP, A) {} - /// See AbstractAttribute::initialize(...). - void initialize(Attributor &A) override { - AAPointerInfoFloating::initialize(A); - + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + using namespace AA::PointerInfo; // We handle memory intrinsics explicitly, at least the first (= // destination) and second (=source) arguments as we know how they are // accessed. if (auto *MI = dyn_cast_or_null(getCtxI())) { - // TODO: Simplify the length. ConstantInt *Length = dyn_cast(MI->getLength()); int64_t LengthVal = AA::OffsetAndSize::Unknown; if (Length) @@ -1541,22 +1553,16 @@ struct AAPointerInfoCallSiteArgument final : AAPointerInfoFloating { } else { LLVM_DEBUG(dbgs() << "[AAPointerInfo] Unhandled memory intrinsic " << *MI << "\n"); - indicatePessimisticFixpoint(); + return indicatePessimisticFixpoint(); } - indicateOptimisticFixpoint(); - LLVM_DEBUG({ - dbgs() << "Accesses by bin after initialization:\n"; + dbgs() << "Accesses by bin after update:\n"; dumpState(dbgs()); }); - return; - } - } - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override { - using namespace AA::PointerInfo; + return Changed; + } // TODO: Once we have call site specific value information we can provide // call site specific liveness information and then it makes @@ -7299,13 +7305,28 @@ struct AAMemoryBehaviorFunction final : public AAMemoryBehaviorImpl { /// See AbstractAttribute::manifest(...). ChangeStatus manifest(Attributor &A) override { + // TODO: It would be better to merge this with AAMemoryLocation, so that + // we could determine read/write per location. This would also have the + // benefit of only one place trying to manifest the memory attribute. Function &F = cast(getAnchorValue()); - if (isAssumedReadNone()) { - F.removeFnAttr(Attribute::ArgMemOnly); - F.removeFnAttr(Attribute::InaccessibleMemOnly); - F.removeFnAttr(Attribute::InaccessibleMemOrArgMemOnly); - } - return AAMemoryBehaviorImpl::manifest(A); + MemoryEffects ME = MemoryEffects::unknown(); + if (isAssumedReadNone()) + ME = MemoryEffects::none(); + else if (isAssumedReadOnly()) + ME = MemoryEffects::readOnly(); + else if (isAssumedWriteOnly()) + ME = MemoryEffects::writeOnly(); + + // Intersect with existing memory attribute, as we currently deduce the + // location and modref portion separately. + MemoryEffects ExistingME = F.getMemoryEffects(); + ME &= ExistingME; + if (ME == ExistingME) + return ChangeStatus::UNCHANGED; + + return IRAttributeManifest::manifestAttrs( + A, getIRPosition(), Attribute::getWithMemoryEffects(F.getContext(), ME), + /*ForceReplace*/ true); } /// See AbstractAttribute::trackStatistics() @@ -7345,6 +7366,31 @@ struct AAMemoryBehaviorCallSite final : AAMemoryBehaviorImpl { return clampStateAndIndicateChange(getState(), FnAA.getState()); } + /// See AbstractAttribute::manifest(...). + ChangeStatus manifest(Attributor &A) override { + // TODO: Deduplicate this with AAMemoryBehaviorFunction. + CallBase &CB = cast(getAnchorValue()); + MemoryEffects ME = MemoryEffects::unknown(); + if (isAssumedReadNone()) + ME = MemoryEffects::none(); + else if (isAssumedReadOnly()) + ME = MemoryEffects::readOnly(); + else if (isAssumedWriteOnly()) + ME = MemoryEffects::writeOnly(); + + // Intersect with existing memory attribute, as we currently deduce the + // location and modref portion separately. + MemoryEffects ExistingME = CB.getMemoryEffects(); + ME &= ExistingME; + if (ME == ExistingME) + return ChangeStatus::UNCHANGED; + + return IRAttributeManifest::manifestAttrs( + A, getIRPosition(), + Attribute::getWithMemoryEffects(CB.getContext(), ME), + /*ForceReplace*/ true); + } + /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { if (isAssumedReadNone()) @@ -7614,36 +7660,54 @@ struct AAMemoryLocationImpl : public AAMemoryLocation { // unlikely this will cause real performance problems. If we are deriving // attributes for the anchor function we even remove the attribute in // addition to ignoring it. + // TODO: A better way to handle this would be to add ~NO_GLOBAL_MEM / + // MemoryEffects::Other as a possible location. bool UseArgMemOnly = true; Function *AnchorFn = IRP.getAnchorScope(); if (AnchorFn && A.isRunOn(*AnchorFn)) UseArgMemOnly = !AnchorFn->hasLocalLinkage(); SmallVector Attrs; - IRP.getAttrs(AttrKinds, Attrs, IgnoreSubsumingPositions); + IRP.getAttrs({Attribute::Memory}, Attrs, IgnoreSubsumingPositions); for (const Attribute &Attr : Attrs) { - switch (Attr.getKindAsEnum()) { - case Attribute::ReadNone: + // TODO: We can map MemoryEffects to Attributor locations more precisely. + MemoryEffects ME = Attr.getMemoryEffects(); + if (ME.doesNotAccessMemory()) { State.addKnownBits(NO_LOCAL_MEM | NO_CONST_MEM); - break; - case Attribute::InaccessibleMemOnly: + continue; + } + if (ME.onlyAccessesInaccessibleMem()) { State.addKnownBits(inverseLocation(NO_INACCESSIBLE_MEM, true, true)); - break; - case Attribute::ArgMemOnly: + continue; + } + if (ME.onlyAccessesArgPointees()) { if (UseArgMemOnly) State.addKnownBits(inverseLocation(NO_ARGUMENT_MEM, true, true)); - else - IRP.removeAttrs({Attribute::ArgMemOnly}); - break; - case Attribute::InaccessibleMemOrArgMemOnly: + else { + // Remove location information, only keep read/write info. + ME = MemoryEffects(ME.getModRef()); + IRAttributeManifest::manifestAttrs( + A, IRP, + Attribute::getWithMemoryEffects(IRP.getAnchorValue().getContext(), + ME), + /*ForceReplace*/ true); + } + continue; + } + if (ME.onlyAccessesInaccessibleOrArgMem()) { if (UseArgMemOnly) State.addKnownBits(inverseLocation( NO_INACCESSIBLE_MEM | NO_ARGUMENT_MEM, true, true)); - else - IRP.removeAttrs({Attribute::InaccessibleMemOrArgMemOnly}); - break; - default: - llvm_unreachable("Unexpected attribute!"); + else { + // Remove location information, only keep read/write info. + ME = MemoryEffects(ME.getModRef()); + IRAttributeManifest::manifestAttrs( + A, IRP, + Attribute::getWithMemoryEffects(IRP.getAnchorValue().getContext(), + ME), + /*ForceReplace*/ true); + } + continue; } } } @@ -7651,41 +7715,53 @@ struct AAMemoryLocationImpl : public AAMemoryLocation { /// See AbstractAttribute::getDeducedAttributes(...). void getDeducedAttributes(LLVMContext &Ctx, SmallVectorImpl &Attrs) const override { + // TODO: We can map Attributor locations to MemoryEffects more precisely. assert(Attrs.size() == 0); - if (isAssumedReadNone()) { - Attrs.push_back(Attribute::get(Ctx, Attribute::ReadNone)); - } else if (getIRPosition().getPositionKind() == IRPosition::IRP_FUNCTION) { - if (isAssumedInaccessibleMemOnly()) - Attrs.push_back(Attribute::get(Ctx, Attribute::InaccessibleMemOnly)); + if (getIRPosition().getPositionKind() == IRPosition::IRP_FUNCTION) { + if (isAssumedReadNone()) + Attrs.push_back( + Attribute::getWithMemoryEffects(Ctx, MemoryEffects::none())); + else if (isAssumedInaccessibleMemOnly()) + Attrs.push_back(Attribute::getWithMemoryEffects( + Ctx, MemoryEffects::inaccessibleMemOnly())); else if (isAssumedArgMemOnly()) - Attrs.push_back(Attribute::get(Ctx, Attribute::ArgMemOnly)); - else if (isAssumedInaccessibleOrArgMemOnly()) Attrs.push_back( - Attribute::get(Ctx, Attribute::InaccessibleMemOrArgMemOnly)); + Attribute::getWithMemoryEffects(Ctx, MemoryEffects::argMemOnly())); + else if (isAssumedInaccessibleOrArgMemOnly()) + Attrs.push_back(Attribute::getWithMemoryEffects( + Ctx, MemoryEffects::inaccessibleOrArgMemOnly())); } assert(Attrs.size() <= 1); } /// See AbstractAttribute::manifest(...). ChangeStatus manifest(Attributor &A) override { + // TODO: If AAMemoryLocation and AAMemoryBehavior are merged, we could + // provide per-location modref information here. const IRPosition &IRP = getIRPosition(); - // Check if we would improve the existing attributes first. - SmallVector DeducedAttrs; + SmallVector DeducedAttrs; getDeducedAttributes(IRP.getAnchorValue().getContext(), DeducedAttrs); - if (llvm::all_of(DeducedAttrs, [&](const Attribute &Attr) { - return IRP.hasAttr(Attr.getKindAsEnum(), - /* IgnoreSubsumingPositions */ true); - })) + if (DeducedAttrs.size() != 1) return ChangeStatus::UNCHANGED; + MemoryEffects ME = DeducedAttrs[0].getMemoryEffects(); + + // Intersect with existing memory attribute, as we currently deduce the + // location and modref portion separately. + SmallVector ExistingAttrs; + IRP.getAttrs({Attribute::Memory}, ExistingAttrs, + /* IgnoreSubsumingPositions */ true); + if (ExistingAttrs.size() == 1) { + MemoryEffects ExistingME = ExistingAttrs[0].getMemoryEffects(); + ME &= ExistingME; + if (ME == ExistingME) + return ChangeStatus::UNCHANGED; + } - // Clear existing attributes. - IRP.removeAttrs(AttrKinds); - if (isAssumedReadNone()) - IRP.removeAttrs(AAMemoryBehaviorImpl::AttrKinds); - - // Use the generic manifest method. - return IRAttribute::manifest(A); + return IRAttributeManifest::manifestAttrs( + A, IRP, + Attribute::getWithMemoryEffects(IRP.getAnchorValue().getContext(), ME), + /*ForceReplace*/ true); } /// See AAMemoryLocation::checkForAllAccessesToMemoryKind(...). @@ -7808,15 +7884,8 @@ struct AAMemoryLocationImpl : public AAMemoryLocation { /// Used to allocate access sets. BumpPtrAllocator &Allocator; - - /// The set of IR attributes AAMemoryLocation deals with. - static const Attribute::AttrKind AttrKinds[4]; }; -const Attribute::AttrKind AAMemoryLocationImpl::AttrKinds[] = { - Attribute::ReadNone, Attribute::InaccessibleMemOnly, Attribute::ArgMemOnly, - Attribute::InaccessibleMemOrArgMemOnly}; - void AAMemoryLocationImpl::categorizePtrValue( Attributor &A, const Instruction &I, const Value &Ptr, AAMemoryLocation::StateType &State, bool &Changed) { diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index a187cb1e4790e..3058dc25202ec 100644 --- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -69,10 +69,7 @@ using namespace llvm; #define DEBUG_TYPE "function-attrs" -STATISTIC(NumArgMemOnly, "Number of functions marked argmemonly"); -STATISTIC(NumReadNone, "Number of functions marked readnone"); -STATISTIC(NumReadOnly, "Number of functions marked readonly"); -STATISTIC(NumWriteOnly, "Number of functions marked writeonly"); +STATISTIC(NumMemoryAttr, "Number of functions with improved memory attribute"); STATISTIC(NumNoCapture, "Number of arguments marked nocapture"); STATISTIC(NumReturned, "Number of arguments marked returned"); STATISTIC(NumReadNoneArg, "Number of arguments marked readnone"); @@ -254,79 +251,14 @@ static void addMemoryAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter, return; } - ModRefInfo MR = ME.getModRef(); - for (Function *F : SCCNodes) { - if (F->doesNotAccessMemory()) - // Already perfect! - continue; - - if (ME.doesNotAccessMemory()) { - // For readnone, remove all other memory attributes. - AttributeMask AttrsToRemove; - AttrsToRemove.addAttribute(Attribute::ReadOnly); - AttrsToRemove.addAttribute(Attribute::WriteOnly); - AttrsToRemove.addAttribute(Attribute::ArgMemOnly); - AttrsToRemove.addAttribute(Attribute::InaccessibleMemOnly); - AttrsToRemove.addAttribute(Attribute::InaccessibleMemOrArgMemOnly); - - ++NumReadNone; - F->removeFnAttrs(AttrsToRemove); - F->addFnAttr(Attribute::ReadNone); - Changed.insert(F); - continue; - } - - // Add argmemonly, inaccessiblememonly, or inaccessible_or_argmemonly - // attributes if possible. - AttributeMask AttrsToRemove; - AttrsToRemove.addAttribute(Attribute::ArgMemOnly); - AttrsToRemove.addAttribute(Attribute::InaccessibleMemOnly); - AttrsToRemove.addAttribute(Attribute::InaccessibleMemOrArgMemOnly); - if (ME.onlyAccessesArgPointees()) { - if (!F->onlyAccessesArgMemory()) { - NumArgMemOnly++; - F->removeFnAttrs(AttrsToRemove); - F->addFnAttr(Attribute::ArgMemOnly); - Changed.insert(F); - } - } else if (ME.onlyAccessesInaccessibleMem()) { - if (!F->onlyAccessesInaccessibleMemory()) { - F->removeFnAttrs(AttrsToRemove); - F->addFnAttr(Attribute::InaccessibleMemOnly); - Changed.insert(F); - } - } else if (ME.onlyAccessesInaccessibleOrArgMem() && - !F->onlyAccessesInaccessibleMemOrArgMem()) { - F->removeFnAttrs(AttrsToRemove); - F->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); + MemoryEffects OldME = F->getMemoryEffects(); + MemoryEffects NewME = ME & OldME; + if (NewME != OldME) { + ++NumMemoryAttr; + F->setMemoryEffects(NewME); Changed.insert(F); } - - // The SCC contains functions both writing and reading from memory. We - // cannot add readonly or writeonline attributes. - if (MR == ModRefInfo::ModRef) - continue; - - if (F->onlyReadsMemory() && MR == ModRefInfo::Ref) - continue; - - if (F->onlyWritesMemory() && MR == ModRefInfo::Mod) - continue; - - Changed.insert(F); - - // Add in the new attribute. - if (MR == ModRefInfo::Mod) { - ++NumWriteOnly; - F->removeFnAttr(Attribute::ReadOnly); - F->addFnAttr(Attribute::WriteOnly); - } else { - ++NumReadOnly; - assert(MR == ModRefInfo::Ref); - F->removeFnAttr(Attribute::WriteOnly); - F->addFnAttr(Attribute::ReadOnly); - } } } diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp index b589ec798caa1..844ee19e1e6a5 100644 --- a/llvm/lib/Transforms/IPO/FunctionImport.cpp +++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp @@ -1051,6 +1051,7 @@ bool llvm::convertToDeclaration(GlobalValue &GV) { void llvm::thinLTOFinalizeInModule(Module &TheModule, const GVSummaryMapTy &DefinedGlobals, bool PropagateAttrs) { + DenseSet NonPrevailingComdats; auto FinalizeInModule = [&](GlobalValue &GV, bool Propagate = false) { // See if the global summary analysis computed a new resolved linkage. const auto &GS = DefinedGlobals.find(GV.getGUID()); @@ -1128,8 +1129,10 @@ void llvm::thinLTOFinalizeInModule(Module &TheModule, // as this is a declaration for the linker, and will be dropped eventually. // It is illegal for comdats to contain declarations. auto *GO = dyn_cast_or_null(&GV); - if (GO && GO->isDeclarationForLinker() && GO->hasComdat()) + if (GO && GO->isDeclarationForLinker() && GO->hasComdat()) { + NonPrevailingComdats.insert(GO->getComdat()); GO->setComdat(nullptr); + } }; // Process functions and global now @@ -1139,6 +1142,36 @@ void llvm::thinLTOFinalizeInModule(Module &TheModule, FinalizeInModule(GV); for (auto &GV : TheModule.aliases()) FinalizeInModule(GV); + + // For a non-prevailing comdat, all its members must be available_externally. + // FinalizeInModule has handled non-local-linkage GlobalValues. Here we handle + // local linkage GlobalValues. + if (NonPrevailingComdats.empty()) + return; + for (auto &GO : TheModule.global_objects()) { + if (auto *C = GO.getComdat(); C && NonPrevailingComdats.count(C)) { + GO.setComdat(nullptr); + GO.setLinkage(GlobalValue::AvailableExternallyLinkage); + } + } + bool Changed; + do { + Changed = false; + // If an alias references a GlobalValue in a non-prevailing comdat, change + // it to available_externally. For simplicity we don't handle ConstantExpr + // aliasee, which is unlikely used in a COMDAT. + for (auto &GA : TheModule.aliases()) { + if (GA.hasAvailableExternallyLinkage()) + continue; + assert(isa(GA.getAliasee()) && + "non-GlobalValue aliasee is unimplemented"); + if (const auto *GV = dyn_cast(GA.getAliasee())) + if (GV->hasAvailableExternallyLinkage()) { + GA.setLinkage(GlobalValue::AvailableExternallyLinkage); + Changed = true; + } + } + } while (Changed); } /// Run internalization on \p TheModule based on symmary analysis. diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 76234ccfdcff3..6f1b0b9b070bb 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -2032,12 +2032,34 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) { } const APInt *Op0C; - if (match(Op0, m_APInt(Op0C)) && Op0C->isMask()) { - // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known - // zero. - KnownBits RHSKnown = computeKnownBits(Op1, 0, &I); - if ((*Op0C | RHSKnown.Zero).isAllOnes()) - return BinaryOperator::CreateXor(Op1, Op0); + if (match(Op0, m_APInt(Op0C))) { + if (Op0C->isMask()) { + // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known + // zero. + KnownBits RHSKnown = computeKnownBits(Op1, 0, &I); + if ((*Op0C | RHSKnown.Zero).isAllOnes()) + return BinaryOperator::CreateXor(Op1, Op0); + } + + // C - ((C3 -nuw X) & C2) --> (C - (C2 & C3)) + (X & C2) when: + // (C3 - ((C2 & C3) - 1)) is pow2 + // ((C2 + C3) & ((C2 & C3) - 1)) == ((C2 & C3) - 1) + // C2 is negative pow2 || sub nuw + const APInt *C2, *C3; + BinaryOperator *InnerSub; + if (match(Op1, m_OneUse(m_And(m_BinOp(InnerSub), m_APInt(C2)))) && + match(InnerSub, m_Sub(m_APInt(C3), m_Value(X))) && + (InnerSub->hasNoUnsignedWrap() || C2->isNegatedPowerOf2())) { + APInt C2AndC3 = *C2 & *C3; + APInt C2AndC3Minus1 = C2AndC3 - 1; + APInt C2AddC3 = *C2 + *C3; + if ((*C3 - C2AndC3Minus1).isPowerOf2() && + C2AndC3Minus1.isSubsetOf(C2AddC3)) { + Value *And = Builder.CreateAnd(X, ConstantInt::get(I.getType(), *C2)); + return BinaryOperator::CreateAdd( + And, ConstantInt::get(I.getType(), *Op0C - C2AndC3)); + } + } } { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index e20f907dd83be..45b9fd1a027a7 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1342,6 +1342,17 @@ Instruction *InstCombinerImpl::visitZExt(ZExtInst &CI) { return BinaryOperator::CreateXor(Builder.CreateAnd(X, ZC), ZC); } + // If we are truncating, masking, and then zexting back to the original type, + // that's just a mask. This is not handled by canEvaluateZextd if the + // intermediate values have extra uses. This could be generalized further for + // a non-constant mask operand. + // zext (and (trunc X), C) --> and X, (zext C) + if (match(Src, m_And(m_Trunc(m_Value(X)), m_Constant(C))) && + X->getType() == DestTy) { + Constant *ZextC = ConstantExpr::getZExt(C, DestTy); + return BinaryOperator::CreateAnd(X, ZextC); + } + if (match(Src, m_VScale(DL))) { if (CI.getFunction() && CI.getFunction()->hasFnAttribute(Attribute::VScaleRange)) { diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 93fc04c6d049f..64ec387b6afa1 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6772,10 +6772,48 @@ static Instruction *foldFCmpReciprocalAndZero(FCmpInst &I, Instruction *LHSI, /// Optimize fabs(X) compared with zero. static Instruction *foldFabsWithFcmpZero(FCmpInst &I, InstCombinerImpl &IC) { Value *X; - if (!match(I.getOperand(0), m_FAbs(m_Value(X))) || - !match(I.getOperand(1), m_PosZeroFP())) + if (!match(I.getOperand(0), m_FAbs(m_Value(X)))) return nullptr; + const APFloat *C; + if (!match(I.getOperand(1), m_APFloat(C))) + return nullptr; + + if (!C->isPosZero()) { + if (*C != APFloat::getSmallestNormalized(C->getSemantics())) + return nullptr; + + const Function *F = I.getFunction(); + DenormalMode Mode = F->getDenormalMode(C->getSemantics()); + if (Mode.Input == DenormalMode::PreserveSign || + Mode.Input == DenormalMode::PositiveZero) { + + auto replaceFCmp = [](FCmpInst *I, FCmpInst::Predicate P, Value *X) { + Constant *Zero = ConstantFP::getNullValue(X->getType()); + return new FCmpInst(P, X, Zero, "", I); + }; + + switch (I.getPredicate()) { + case FCmpInst::FCMP_OLT: + // fcmp olt fabs(x), smallest_normalized_number -> fcmp oeq x, 0.0 + return replaceFCmp(&I, FCmpInst::FCMP_OEQ, X); + case FCmpInst::FCMP_UGE: + // fcmp uge fabs(x), smallest_normalized_number -> fcmp une x, 0.0 + return replaceFCmp(&I, FCmpInst::FCMP_UNE, X); + case FCmpInst::FCMP_OGE: + // fcmp oge fabs(x), smallest_normalized_number -> fcmp one x, 0.0 + return replaceFCmp(&I, FCmpInst::FCMP_ONE, X); + case FCmpInst::FCMP_ULT: + // fcmp ult fabs(x), smallest_normalized_number -> fcmp ueq x, 0.0 + return replaceFCmp(&I, FCmpInst::FCMP_UEQ, X); + default: + break; + } + } + + return nullptr; + } + auto replacePredAndOp0 = [&IC](FCmpInst *I, FCmpInst::Predicate P, Value *X) { I->setPredicate(P); return IC.replaceOperand(*I, 0, X); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 3f1bcea3727f5..cc1dedf372752 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -167,6 +167,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final Instruction *visitInsertValueInst(InsertValueInst &IV); Instruction *visitInsertElementInst(InsertElementInst &IE); Instruction *visitExtractElementInst(ExtractElementInst &EI); + Instruction *simplifyBinOpSplats(ShuffleVectorInst &SVI); Instruction *visitShuffleVectorInst(ShuffleVectorInst &SVI); Instruction *visitExtractValueInst(ExtractValueInst &EV); Instruction *visitLandingPadInst(LandingPadInst &LI); @@ -369,6 +370,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final Instruction *foldExtractOfOverflowIntrinsic(ExtractValueInst &EV); Instruction *foldIntrinsicWithOverflowCommon(IntrinsicInst *II); Instruction *foldFPSignBitOps(BinaryOperator &I); + Instruction *foldFDivConstantDivisor(BinaryOperator &I); // Optimize one of these forms: // and i1 Op, SI / select i1 Op, i1 SI, i1 false (if IsAnd = true) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index e4fccda750e6c..96275302e86d9 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -140,7 +140,7 @@ static Value *foldMulSelectToNegate(BinaryOperator &I, return nullptr; } -/// Reduce integer multiplication patterns that contain a (1 << Z) factor. +/// Reduce integer multiplication patterns that contain a (+/-1 << Z) factor. /// Callers are expected to call this twice to handle commuted patterns. static Value *foldMulShl1(BinaryOperator &Mul, bool CommuteOperands, InstCombiner::BuilderTy &Builder) { @@ -171,6 +171,17 @@ static Value *foldMulShl1(BinaryOperator &Mul, bool CommuteOperands, return Builder.CreateAdd(Shl, FrX, Mul.getName(), HasNUW, PropagateNSW); } + // Similar to above, but a decrement of the shifted value is disguised as + // 'not' and becomes a sub: + // X * (~(-1 << Z)) --> X * ((1 << Z) - 1) --> (X << Z) - X + // This increases uses of X, so it may require a freeze, but that is still + // expected to be an improvement because it removes the multiply. + if (match(Y, m_OneUse(m_Not(m_OneUse(m_Shl(m_AllOnes(), m_Value(Z))))))) { + Value *FrX = Builder.CreateFreeze(X, X->getName() + ".fr"); + Value *Shl = Builder.CreateShl(FrX, Z, "mulshl"); + return Builder.CreateSub(Shl, FrX, Mul.getName()); + } + return nullptr; } @@ -1392,7 +1403,7 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) { } /// Remove negation and try to convert division into multiplication. -static Instruction *foldFDivConstantDivisor(BinaryOperator &I) { +Instruction *InstCombinerImpl::foldFDivConstantDivisor(BinaryOperator &I) { Constant *C; if (!match(I.getOperand(1), m_Constant(C))) return nullptr; @@ -1404,6 +1415,17 @@ static Instruction *foldFDivConstantDivisor(BinaryOperator &I) { if (Constant *NegC = ConstantFoldUnaryOpOperand(Instruction::FNeg, C, DL)) return BinaryOperator::CreateFDivFMF(X, NegC, &I); + // nnan X / +0.0 -> copysign(inf, X) + if (I.hasNoNaNs() && match(I.getOperand(1), m_Zero())) { + IRBuilder<> B(&I); + // TODO: nnan nsz X / -0.0 -> copysign(inf, X) + CallInst *CopySign = B.CreateIntrinsic( + Intrinsic::copysign, {C->getType()}, + {ConstantFP::getInfinity(I.getType()), I.getOperand(0)}, &I); + CopySign->takeName(&I); + return replaceInstUsesWith(I, CopySign); + } + // If the constant divisor has an exact inverse, this is always safe. If not, // then we can still create a reciprocal if fast-math-flags allow it and the // constant is a regular number (not zero, infinite, or denormal). diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 5df7459e49851..f4ad343a614e4 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -2784,17 +2784,16 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) { if (Res && *Res == false) return replaceOperand(SI, 1, A); } - // select c, true, (select a, b, false) -> select c, true, a - // select (select a, b, false), true, c -> select a, true, c + // select c, true, (a && b) -> select c, true, a + // select (a && b), true, c -> select a, true, c // if c = false implies that b = true - // FIXME: This should use m_LogicalAnd instead of matching a select operand. if (match(TrueVal, m_One()) && - match(FalseVal, m_Select(m_Value(A), m_Value(B), m_Zero()))) { + match(FalseVal, m_LogicalAnd(m_Value(A), m_Value(B)))) { Optional Res = isImpliedCondition(CondVal, B, DL, false); if (Res && *Res == true) return replaceOperand(SI, 2, A); } - if (match(CondVal, m_Select(m_Value(A), m_Value(B), m_Zero())) && + if (match(CondVal, m_LogicalAnd(m_Value(A), m_Value(B))) && match(TrueVal, m_One())) { Optional Res = isImpliedCondition(FalseVal, B, DL, false); if (Res && *Res == true) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index d50918629ba5c..5964c96619a6c 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -2598,6 +2598,35 @@ static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) { return new ShuffleVectorInst(X, Y, NewMask); } +// Splatting the first element of the result of a BinOp, where any of the +// BinOp's operands are the result of a first element splat can be simplified to +// splatting the first element of the result of the BinOp +Instruction *InstCombinerImpl::simplifyBinOpSplats(ShuffleVectorInst &SVI) { + if (!match(SVI.getOperand(1), m_Undef()) || + !match(SVI.getShuffleMask(), m_ZeroMask())) + return nullptr; + + Value *Op0 = SVI.getOperand(0); + Value *X, *Y; + if (!match(Op0, m_BinOp(m_Shuffle(m_Value(X), m_Undef(), m_ZeroMask()), + m_Value(Y))) && + !match(Op0, m_BinOp(m_Value(X), + m_Shuffle(m_Value(Y), m_Undef(), m_ZeroMask())))) + return nullptr; + if (X->getType() != Y->getType()) + return nullptr; + + auto *BinOp = cast(Op0); + if (!isSafeToSpeculativelyExecute(BinOp)) + return nullptr; + + Value *NewBO = Builder.CreateBinOp(BinOp->getOpcode(), X, Y); + if (auto NewBOI = dyn_cast(NewBO)) + NewBOI->copyIRFlags(BinOp); + + return new ShuffleVectorInst(NewBO, SVI.getShuffleMask()); +} + Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { Value *LHS = SVI.getOperand(0); Value *RHS = SVI.getOperand(1); @@ -2606,7 +2635,9 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { SVI.getType(), ShufQuery)) return replaceInstUsesWith(SVI, V); - // Bail out for scalable vectors + if (Instruction *I = simplifyBinOpSplats(SVI)) + return I; + if (isa(LHS->getType())) return nullptr; diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 8cbe29a4c4ef2..ff05454aa920e 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -106,6 +106,7 @@ static const uint64_t kMIPS_ShadowOffsetN32 = 1ULL << 29; static const uint64_t kMIPS32_ShadowOffset32 = 0x0aaa0000; static const uint64_t kMIPS64_ShadowOffset64 = 1ULL << 37; static const uint64_t kAArch64_ShadowOffset64 = 1ULL << 36; +static const uint64_t kLoongArch64_ShadowOffset64 = 1ULL << 46; static const uint64_t kRISCV64_ShadowOffset64 = 0xd55550000; static const uint64_t kFreeBSD_ShadowOffset32 = 1ULL << 30; static const uint64_t kFreeBSD_ShadowOffset64 = 1ULL << 46; @@ -396,12 +397,12 @@ static cl::opt ClForceExperiment( static cl::opt ClUsePrivateAlias("asan-use-private-alias", cl::desc("Use private aliases for global variables"), - cl::Hidden, cl::init(false)); + cl::Hidden, cl::init(true)); static cl::opt ClUseOdrIndicator("asan-use-odr-indicator", cl::desc("Use odr indicators to improve ODR reporting"), - cl::Hidden, cl::init(false)); + cl::Hidden, cl::init(true)); static cl::opt ClUseGlobalsGC("asan-globals-live-support", @@ -484,6 +485,7 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize, bool IsMIPS64 = TargetTriple.isMIPS64(); bool IsArmOrThumb = TargetTriple.isARM() || TargetTriple.isThumb(); bool IsAArch64 = TargetTriple.getArch() == Triple::aarch64; + bool IsLoongArch64 = TargetTriple.getArch() == Triple::loongarch64; bool IsRISCV64 = TargetTriple.getArch() == Triple::riscv64; bool IsWindows = TargetTriple.isOSWindows(); bool IsFuchsia = TargetTriple.isOSFuchsia(); @@ -555,6 +557,8 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize, Mapping.Offset = kDynamicShadowSentinel; else if (IsAArch64) Mapping.Offset = kAArch64_ShadowOffset64; + else if (IsLoongArch64) + Mapping.Offset = kLoongArch64_ShadowOffset64; else if (IsRISCV64) Mapping.Offset = kRISCV64_ShadowOffset64; else if (IsAMDGPU) @@ -573,12 +577,12 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize, } // OR-ing shadow offset if more efficient (at least on x86) if the offset - // is a power of two, but on ppc64 we have to use add since the shadow - // offset is not necessary 1/8-th of the address space. On SystemZ, - // we could OR the constant in a single instruction, but it's more + // is a power of two, but on ppc64 and loongarch64 we have to use add since + // the shadow offset is not necessarily 1/8-th of the address space. On + // SystemZ, we could OR the constant in a single instruction, but it's more // efficient to load it once and use indexed addressing. Mapping.OrShadowOffset = !IsAArch64 && !IsPPC64 && !IsSystemZ && !IsPS && - !IsRISCV64 && + !IsRISCV64 && !IsLoongArch64 && !(Mapping.Offset & (Mapping.Offset - 1)) && Mapping.Offset != kDynamicShadowSentinel; bool IsAndroidWithIfuncSupport = @@ -767,15 +771,19 @@ class ModuleAddressSanitizer { public: ModuleAddressSanitizer(Module &M, bool CompileKernel = false, bool Recover = false, bool UseGlobalsGC = true, - bool UseOdrIndicator = false, + bool UseOdrIndicator = true, AsanDtorKind DestructorKind = AsanDtorKind::Global) : CompileKernel(ClEnableKasan.getNumOccurrences() > 0 ? ClEnableKasan : CompileKernel), Recover(ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover), UseGlobalsGC(UseGlobalsGC && ClUseGlobalsGC && !this->CompileKernel), // Enable aliases as they should have no downside with ODR indicators. - UsePrivateAlias(UseOdrIndicator || ClUsePrivateAlias), - UseOdrIndicator(UseOdrIndicator || ClUseOdrIndicator), + UsePrivateAlias(ClUsePrivateAlias.getNumOccurrences() > 0 + ? ClUsePrivateAlias + : UseOdrIndicator), + UseOdrIndicator(ClUseOdrIndicator.getNumOccurrences() > 0 + ? ClUseOdrIndicator + : UseOdrIndicator), // Not a typo: ClWithComdat is almost completely pointless without // ClUseGlobalsGC (because then it only works on modules without // globals, which are rare); it is a prerequisite for ClUseGlobalsGC; diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index a84d2c4836d11..3c724c81c643b 100644 --- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -1234,19 +1234,22 @@ DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName, // Initialize DataFlowSanitizer runtime functions and declare them in the module void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) { + LLVMContext &C = M.getContext(); { AttributeList AL; - AL = AL.addFnAttribute(M.getContext(), Attribute::NoUnwind); - AL = AL.addFnAttribute(M.getContext(), Attribute::ReadOnly); - AL = AL.addRetAttribute(M.getContext(), Attribute::ZExt); + AL = AL.addFnAttribute(C, Attribute::NoUnwind); + AL = AL.addFnAttribute( + C, Attribute::getWithMemoryEffects(C, MemoryEffects::readOnly())); + AL = AL.addRetAttribute(C, Attribute::ZExt); DFSanUnionLoadFn = Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy, AL); } { AttributeList AL; - AL = AL.addFnAttribute(M.getContext(), Attribute::NoUnwind); - AL = AL.addFnAttribute(M.getContext(), Attribute::ReadOnly); - AL = AL.addRetAttribute(M.getContext(), Attribute::ZExt); + AL = AL.addFnAttribute(C, Attribute::NoUnwind); + AL = AL.addFnAttribute( + C, Attribute::getWithMemoryEffects(C, MemoryEffects::readOnly())); + AL = AL.addRetAttribute(C, Attribute::ZExt); DFSanLoadLabelAndOriginFn = Mod->getOrInsertFunction( "__dfsan_load_label_and_origin", DFSanLoadLabelAndOriginFnTy, AL); } @@ -1470,8 +1473,8 @@ bool DataFlowSanitizer::runImpl( } } - ReadOnlyNoneAttrs.addAttribute(Attribute::ReadOnly) - .addAttribute(Attribute::ReadNone); + // TODO: This could be more precise. + ReadOnlyNoneAttrs.addAttribute(Attribute::Memory); // First, change the ABI of every function in the module. ABI-listed // functions keep their original ABI and get a wrapper function. diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 6490a8d543276..cb4d1b6a34e2c 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -4074,12 +4074,9 @@ struct MemorySanitizerVisitor : public InstVisitor { // will become a non-readonly function after it is instrumented by us. To // prevent this code from being optimized out, mark that function // non-readonly in advance. + // TODO: We can likely do better than dropping memory() completely here. AttributeMask B; - B.addAttribute(Attribute::ReadOnly) - .addAttribute(Attribute::ReadNone) - .addAttribute(Attribute::WriteOnly) - .addAttribute(Attribute::ArgMemOnly) - .addAttribute(Attribute::Speculatable); + B.addAttribute(Attribute::Memory).addAttribute(Attribute::Speculatable); Call->removeFnAttrs(B); if (Function *Func = Call->getCalledFunction()) { @@ -5769,13 +5766,9 @@ bool MemorySanitizer::sanitizeFunction(Function &F, TargetLibraryInfo &TLI) { MemorySanitizerVisitor Visitor(F, *this, TLI); - // Clear out readonly/readnone attributes. + // Clear out memory attributes. AttributeMask B; - B.addAttribute(Attribute::ReadOnly) - .addAttribute(Attribute::ReadNone) - .addAttribute(Attribute::WriteOnly) - .addAttribute(Attribute::ArgMemOnly) - .addAttribute(Attribute::Speculatable); + B.addAttribute(Attribute::Memory).addAttribute(Attribute::Speculatable); F.removeFnAttrs(B); return Visitor.runOnFunction(); diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index d627c3eb5d803..5b5f88d78b3b2 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -302,12 +302,17 @@ static cl::opt PGOTraceFuncHash( static cl::opt PGOFunctionSizeThreshold( "pgo-function-size-threshold", cl::Hidden, - cl::desc("Do not instrument functions smaller than this threshold")); + cl::desc("Do not instrument functions smaller than this threshold.")); static cl::opt MatchMemProf( "pgo-match-memprof", cl::init(true), cl::Hidden, cl::desc("Perform matching and annotation of memprof profiles.")); +static cl::opt PGOFunctionCriticalEdgeThreshold( + "pgo-critical-edge-threshold", cl::init(20000), cl::Hidden, + cl::desc("Do not instrument functions with the number of critical edges " + " greater than this threshold.")); + namespace llvm { // Command line option to turn on CFG dot dump after profile annotation. // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts @@ -1846,6 +1851,38 @@ static void collectComdatMembers( ComdatMembers.insert(std::make_pair(C, &GA)); } +// Don't perform PGO instrumeatnion / profile-use. +static bool skipPGO(const Function &F) { + if (F.isDeclaration()) + return true; + if (F.hasFnAttribute(llvm::Attribute::NoProfile)) + return true; + if (F.hasFnAttribute(llvm::Attribute::SkipProfile)) + return true; + if (F.getInstructionCount() < PGOFunctionSizeThreshold) + return true; + + // If there are too many critical edges, PGO might cause + // compiler time problem. Skip PGO if the number of + // critical edges execeed the threshold. + unsigned NumCriticalEdges = 0; + for (auto &BB : F) { + const Instruction *TI = BB.getTerminator(); + for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { + if (isCriticalEdge(TI, I)) + NumCriticalEdges++; + } + } + if (NumCriticalEdges > PGOFunctionCriticalEdgeThreshold) { + LLVM_DEBUG(dbgs() << "In func " << F.getName() + << ", NumCriticalEdges=" << NumCriticalEdges + << " exceed the threshold. Skip PGO.\n"); + return true; + } + + return false; +} + static bool InstrumentAllFunctions( Module &M, function_ref LookupTLI, function_ref LookupBPI, @@ -1858,13 +1895,7 @@ static bool InstrumentAllFunctions( collectComdatMembers(M, ComdatMembers); for (auto &F : M) { - if (F.isDeclaration()) - continue; - if (F.hasFnAttribute(llvm::Attribute::NoProfile)) - continue; - if (F.hasFnAttribute(llvm::Attribute::SkipProfile)) - continue; - if (F.getInstructionCount() < PGOFunctionSizeThreshold) + if (skipPGO(F)) continue; auto &TLI = LookupTLI(F); auto *BPI = LookupBPI(F); @@ -2092,7 +2123,7 @@ static bool annotateAllFunctions( if (PGOInstrumentEntry.getNumOccurrences() > 0) InstrumentFuncEntry = PGOInstrumentEntry; for (auto &F : M) { - if (F.isDeclaration()) + if (skipPGO(F)) continue; auto &TLI = LookupTLI(F); auto *BPI = LookupBPI(F); diff --git a/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp b/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp index 07c03ee2049ac..267446bddcf5f 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp @@ -291,9 +291,9 @@ bool MemOPSizeOpt::perform(MemOp MO) { uint64_t SavedRemainCount = SavedTotalCount; SmallVector SizeIds; SmallVector CaseCounts; + SmallDenseSet SeenSizeId; uint64_t MaxCount = 0; unsigned Version = 0; - int64_t LastV = -1; // Default case is in the front -- save the slot here. CaseCounts.push_back(0); SmallVector RemainingVDs; @@ -316,15 +316,12 @@ bool MemOPSizeOpt::perform(MemOp MO) { break; } - if (V == LastV) { - LLVM_DEBUG(dbgs() << "Invalid Profile Data in Function " << Func.getName() - << ": Two consecutive, identical values in MemOp value" - "counts.\n"); + if (!SeenSizeId.insert(V).second) { + errs() << "Invalid Profile Data in Function " << Func.getName() + << ": Two identical values in MemOp value counts.\n"; return false; } - LastV = V; - SizeIds.push_back(V); CaseCounts.push_back(C); if (C > MaxCount) diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp index 8b9076aff8fa9..bd214e004a022 100644 --- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -748,6 +748,7 @@ void State::addInfoFor(BasicBlock &BB) { static bool checkAndReplaceCondition(CmpInst *Cmp, ConstraintInfo &Info) { LLVM_DEBUG(dbgs() << "Checking " << *Cmp << "\n"); + CmpInst::Predicate Pred = Cmp->getPredicate(); Value *A = Cmp->getOperand(0); Value *B = Cmp->getOperand(1); @@ -771,7 +772,6 @@ static bool checkAndReplaceCondition(CmpInst *Cmp, ConstraintInfo &Info) { }); bool Changed = false; - LLVMContext &Ctx = Cmp->getModule()->getContext(); if (CSToUse.isConditionImplied(R.Coefficients)) { if (!DebugCounter::shouldExecute(EliminatedCounter)) return false; @@ -780,7 +780,9 @@ static bool checkAndReplaceCondition(CmpInst *Cmp, ConstraintInfo &Info) { dbgs() << "Condition " << *Cmp << " implied by dominating constraints\n"; dumpWithNames(CSToUse, Info.getValue2Index(R.IsSigned)); }); - Cmp->replaceUsesWithIf(ConstantInt::getTrue(Ctx), [](Use &U) { + Constant *TrueC = + ConstantInt::getTrue(CmpInst::makeCmpResultType(Cmp->getType())); + Cmp->replaceUsesWithIf(TrueC, [](Use &U) { // Conditions in an assume trivially simplify to true. Skip uses // in assume calls to not destroy the available information. auto *II = dyn_cast(U.getUser()); @@ -797,7 +799,9 @@ static bool checkAndReplaceCondition(CmpInst *Cmp, ConstraintInfo &Info) { dbgs() << "Condition !" << *Cmp << " implied by dominating constraints\n"; dumpWithNames(CSToUse, Info.getValue2Index(R.IsSigned)); }); - Cmp->replaceAllUsesWith(ConstantInt::getFalse(Ctx)); + Constant *FalseC = + ConstantInt::getFalse(CmpInst::makeCmpResultType(Cmp->getType())); + Cmp->replaceAllUsesWith(FalseC); NumCondsRemoved++; Changed = true; } diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index ad47fd7c6e9d1..ea887d982b1b7 100644 --- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -340,18 +340,16 @@ static bool processICmp(ICmpInst *Cmp, LazyValueInfo *LVI) { /// exploiting range information. static bool constantFoldCmp(CmpInst *Cmp, LazyValueInfo *LVI) { Value *Op0 = Cmp->getOperand(0); - auto *C = dyn_cast(Cmp->getOperand(1)); - if (!C) - return false; - + Value *Op1 = Cmp->getOperand(1); LazyValueInfo::Tristate Result = - LVI->getPredicateAt(Cmp->getPredicate(), Op0, C, Cmp, + LVI->getPredicateAt(Cmp->getPredicate(), Op0, Op1, Cmp, /*UseBlockValue=*/true); if (Result == LazyValueInfo::Unknown) return false; ++NumCmps; - Constant *TorF = ConstantInt::get(Type::getInt1Ty(Cmp->getContext()), Result); + Constant *TorF = + ConstantInt::get(CmpInst::makeCmpResultType(Op0->getType()), Result); Cmp->replaceAllUsesWith(TorF); Cmp->eraseFromParent(); return true; diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index a489a890f6641..1bc5123ded321 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -2828,17 +2828,10 @@ bool GVNPass::performScalarPRE(Instruction *CurInst) { NumWithout = 2; break; } - // It is not safe to do PRE when P->CurrentBlock is a loop backedge, and - // when CurInst has operand defined in CurrentBlock (so it may be defined - // by phi in the loop header). + // It is not safe to do PRE when P->CurrentBlock is a loop backedge. assert(BlockRPONumber.count(P) && BlockRPONumber.count(CurrentBlock) && "Invalid BlockRPONumber map."); - if (BlockRPONumber[P] >= BlockRPONumber[CurrentBlock] && - llvm::any_of(CurInst->operands(), [&](const Use &U) { - if (auto *Inst = dyn_cast(U.get())) - return Inst->getParent() == CurrentBlock; - return false; - })) { + if (BlockRPONumber[P] >= BlockRPONumber[CurrentBlock]) { NumWithout = 2; break; } diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index 9efb40f231e40..fdc1232ad4fa6 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1281,7 +1281,7 @@ bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { MadeAnyChanges = true; ToMove->moveBefore(*ExitBlock, InsertPt); - SE->forgetBlockAndLoopDispositions(ToMove); + SE->forgetValue(ToMove); if (Done) break; InsertPt = ToMove->getIterator(); } @@ -1308,7 +1308,8 @@ static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken, } static void replaceLoopPHINodesWithPreheaderValues( - LoopInfo *LI, Loop *L, SmallVectorImpl &DeadInsts) { + LoopInfo *LI, Loop *L, SmallVectorImpl &DeadInsts, + ScalarEvolution &SE) { assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!"); auto *LoopPreheader = L->getLoopPreheader(); auto *LoopHeader = L->getHeader(); @@ -1317,6 +1318,7 @@ static void replaceLoopPHINodesWithPreheaderValues( auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader); for (User *U : PN.users()) Worklist.push_back(cast(U)); + SE.forgetValue(&PN); PN.replaceAllUsesWith(PreheaderIncoming); DeadInsts.emplace_back(&PN); } @@ -1588,7 +1590,7 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { // unconditional exit, we can still replace header phis with their // preheader value. if (!L->contains(BI->getSuccessor(CI->isNullValue()))) - replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts); + replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE); return true; } @@ -1675,7 +1677,7 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { // the header PHIs with values coming from the preheader. if (ExitCount->isZero()) { foldExit(L, ExitingBB, true, DeadInsts); - replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts); + replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE); Changed = true; continue; } diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index 3cf35172b7499..05b807b57b4e1 100644 --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -831,18 +831,26 @@ bool LoopInterchangeLegality::currentLimitations() { } Inductions.clear(); - if (!findInductionAndReductions(InnerLoop, Inductions, nullptr)) { - LLVM_DEBUG( - dbgs() << "Only inner loops with induction or reduction PHI nodes " - << "are supported currently.\n"); - ORE->emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner", - InnerLoop->getStartLoc(), - InnerLoop->getHeader()) - << "Only inner loops with induction or reduction PHI nodes can be" - " interchange currently."; - }); - return true; + // For multi-level loop nests, make sure that all phi nodes for inner loops + // at all levels can be recognized as a induction or reduction phi. Bail out + // if a phi node at a certain nesting level cannot be properly recognized. + Loop *CurLevelLoop = OuterLoop; + while (!CurLevelLoop->getSubLoops().empty()) { + // We already made sure that the loop nest is tightly nested. + CurLevelLoop = CurLevelLoop->getSubLoops().front(); + if (!findInductionAndReductions(CurLevelLoop, Inductions, nullptr)) { + LLVM_DEBUG( + dbgs() << "Only inner loops with induction or reduction PHI nodes " + << "are supported currently.\n"); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner", + CurLevelLoop->getStartLoc(), + CurLevelLoop->getHeader()) + << "Only inner loops with induction or reduction PHI nodes can be" + " interchange currently."; + }); + return true; + } } // TODO: Triangular loops are not handled for now. diff --git a/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/llvm/lib/Transforms/Scalar/LoopPredication.cpp index fbd4a39c7949e..1e4060abeb885 100644 --- a/llvm/lib/Transforms/Scalar/LoopPredication.cpp +++ b/llvm/lib/Transforms/Scalar/LoopPredication.cpp @@ -763,17 +763,17 @@ unsigned LoopPredication::collectChecks(SmallVectorImpl &Checks, // resulting list of subconditions in Checks vector. SmallVector Worklist(1, Condition); SmallPtrSet Visited; + Visited.insert(Condition); Value *WideableCond = nullptr; do { Value *Condition = Worklist.pop_back_val(); - if (!Visited.insert(Condition).second) - continue; - Value *LHS, *RHS; using namespace llvm::PatternMatch; if (match(Condition, m_And(m_Value(LHS), m_Value(RHS)))) { - Worklist.push_back(LHS); - Worklist.push_back(RHS); + if (Visited.insert(LHS).second) + Worklist.push_back(LHS); + if (Visited.insert(RHS).second) + Worklist.push_back(RHS); continue; } diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 387b71da43737..2040b032d0cc4 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -331,16 +331,27 @@ void MemCpyOptPass::eraseInstruction(Instruction *I) { } // Check for mod or ref of Loc between Start and End, excluding both boundaries. -// Start and End must be in the same block +// Start and End must be in the same block. +// If SkippedLifetimeStart is provided, skip over one clobbering lifetime.start +// intrinsic and store it inside SkippedLifetimeStart. static bool accessedBetween(AliasAnalysis &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, - const MemoryUseOrDef *End) { + const MemoryUseOrDef *End, + Instruction **SkippedLifetimeStart = nullptr) { assert(Start->getBlock() == End->getBlock() && "Only local supported"); for (const MemoryAccess &MA : make_range(++Start->getIterator(), End->getIterator())) { - if (isModOrRefSet(AA.getModRefInfo(cast(MA).getMemoryInst(), - Loc))) + Instruction *I = cast(MA).getMemoryInst(); + if (isModOrRefSet(AA.getModRefInfo(I, Loc))) { + auto *II = dyn_cast(I); + if (II && II->getIntrinsicID() == Intrinsic::lifetime_start && + SkippedLifetimeStart && !*SkippedLifetimeStart) { + *SkippedLifetimeStart = I; + continue; + } + return true; + } } return false; } @@ -913,12 +924,21 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, // Check that nothing touches the dest of the copy between // the call and the store/memcpy. + Instruction *SkippedLifetimeStart = nullptr; if (accessedBetween(*AA, DestLoc, MSSA->getMemoryAccess(C), - MSSA->getMemoryAccess(cpyStore))) { + MSSA->getMemoryAccess(cpyStore), &SkippedLifetimeStart)) { LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer modified after call\n"); return false; } + // Code below tries to move the lifetime marker before "C". Check the + // correctness of this motion. + if (SkippedLifetimeStart && SkippedLifetimeStart->getNumOperands() == 3) { + auto *LiveI = cast(SkippedLifetimeStart->getOperand(1)); + if (!DT->dominates(LiveI, C)) + return false; + } + // Check that accessing the first srcSize bytes of dest will not cause a // trap. Otherwise the transform is invalid since it might cause a trap // to occur earlier than it otherwise would. @@ -1094,6 +1114,12 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad, cast(cpyDest)->setAlignment(srcAlign); } + if (SkippedLifetimeStart) { + SkippedLifetimeStart->moveBefore(C); + MSSAU->moveBefore(MSSA->getMemoryAccess(SkippedLifetimeStart), + MSSA->getMemoryAccess(C)); + } + // Update AA metadata // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be // handled here, but combineMetadata doesn't support them yet diff --git a/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp index 689a2a286cb9f..6ad3bc9a69155 100644 --- a/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp +++ b/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp @@ -80,10 +80,9 @@ static bool optimizeSQRT(CallInst *Call, Function *CalledFunc, Instruction *LibCall = Call->clone(); Builder.Insert(LibCall); - // Add attribute "readnone" so that backend can use a native sqrt instruction - // for this call. - Call->removeFnAttr(Attribute::WriteOnly); - Call->addFnAttr(Attribute::ReadNone); + // Add memory(none) attribute, so that the backend can use a native sqrt + // instruction for this call. + Call->setDoesNotAccessMemory(); // Insert a FP compare instruction and use it as the CurrBB branch condition. Builder.SetInsertPoint(CurrBBTerm); diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index 68cf9cc77cc48..12fd2e677909f 100644 --- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -1430,10 +1430,7 @@ normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent, // machine model for purposes of optimization. We have to strip these on // both function declarations and call sites. static constexpr Attribute::AttrKind FnAttrsToStrip[] = - {Attribute::ReadNone, Attribute::ReadOnly, Attribute::WriteOnly, - Attribute::ArgMemOnly, Attribute::InaccessibleMemOnly, - Attribute::InaccessibleMemOrArgMemOnly, - Attribute::NoSync, Attribute::NoFree}; + {Attribute::Memory, Attribute::NoSync, Attribute::NoFree}; // Create new attribute set containing only attributes which can be transferred // from original call to the safepoint. diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp index 8f90c2b8ba1a7..fe1c632c85caa 100644 --- a/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -590,21 +590,28 @@ bool llvm::runIPSCCP( } } - // If we replaced an argument, the argmemonly and - // inaccessiblemem_or_argmemonly attributes do not hold any longer. Remove - // them from both the function and callsites. + // If we replaced an argument, we may now also access a global (currently + // classified as "other" memory). Update memory attribute to reflect this. if (ReplacedPointerArg) { - AttributeMask AttributesToRemove; - AttributesToRemove.addAttribute(Attribute::ArgMemOnly); - AttributesToRemove.addAttribute(Attribute::InaccessibleMemOrArgMemOnly); - F.removeFnAttrs(AttributesToRemove); - + auto UpdateAttrs = [&](AttributeList AL) { + MemoryEffects ME = AL.getMemoryEffects(); + if (ME == MemoryEffects::unknown()) + return AL; + + ME |= MemoryEffects(MemoryEffects::Other, + ME.getModRef(MemoryEffects::ArgMem)); + return AL.addFnAttribute( + F.getContext(), + Attribute::getWithMemoryEffects(F.getContext(), ME)); + }; + + F.setAttributes(UpdateAttrs(F.getAttributes())); for (User *U : F.users()) { auto *CB = dyn_cast(U); if (!CB || CB->getCalledFunction() != &F) continue; - CB->removeFnAttrs(AttributesToRemove); + CB->setAttributes(UpdateAttrs(CB->getAttributes())); } } MadeChanges |= ReplacedPointerArg; diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index de7e0500a97ca..f7ecc47947151 100644 --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -541,6 +541,7 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, else // Forget the entire nest as this exits the entire nest. SE->forgetTopmostLoop(&L); + SE->forgetBlockAndLoopDispositions(); } if (MSSAU && VerifyMemorySSA) @@ -2766,7 +2767,8 @@ static bool collectUnswitchCandidates( if (CollectGuards) for (auto &I : *BB) if (isGuard(&I)) { - auto *Cond = cast(&I)->getArgOperand(0); + auto *Cond = + skipTrivialSelect(cast(&I)->getArgOperand(0)); // TODO: Support AND, OR conditions and partial unswitching. if (!isa(Cond) && L.isLoopInvariant(Cond)) UnswitchCandidates.push_back({&I, {Cond}}); diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp index 2adf172f6b98a..81d151c2904e8 100644 --- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -856,7 +856,12 @@ BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) { BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName, Func, Insert); FlowSet.insert(Flow); - TermDL[Flow] = TermDL[Dominator]; + + // use a temporary variable to avoid a use-after-free if the map's storage is + // reallocated + DebugLoc DL = TermDL[Dominator]; + TermDL[Flow] = std::move(DL); + DT->addNewBlock(Flow, Dominator); ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion); return Flow; diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index f1d1d0d4f1ef3..5fb4ee1cac524 100644 --- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -75,11 +75,6 @@ static bool setOnlyReadsMemory(Function &F) { static bool setOnlyWritesMemory(Function &F) { if (F.onlyWritesMemory()) // writeonly or readnone return false; - // Turn readonly and writeonly into readnone. - if (F.hasFnAttribute(Attribute::ReadOnly)) { - F.removeFnAttr(Attribute::ReadOnly); - return setDoesNotAccessMemory(F); - } ++NumWriteOnly; F.setOnlyWritesMemory(); return true; diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 23b943c69386d..fba9dbb2c9c27 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -904,24 +904,18 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, // Those attributes cannot be propagated safely. Explicitly list them // here so we get a warning if new attributes are added. case Attribute::AllocSize: - case Attribute::ArgMemOnly: case Attribute::Builtin: case Attribute::Convergent: - case Attribute::InaccessibleMemOnly: - case Attribute::InaccessibleMemOrArgMemOnly: case Attribute::JumpTable: case Attribute::Naked: case Attribute::NoBuiltin: case Attribute::NoMerge: case Attribute::NoReturn: case Attribute::NoSync: - case Attribute::ReadNone: - case Attribute::ReadOnly: case Attribute::ReturnsTwice: case Attribute::Speculatable: case Attribute::StackAlignment: case Attribute::WillReturn: - case Attribute::WriteOnly: case Attribute::AllocKind: case Attribute::PresplitCoroutine: case Attribute::Memory: @@ -984,6 +978,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, case Attribute::NoUndef: case Attribute::NonNull: case Attribute::Preallocated: + case Attribute::ReadNone: + case Attribute::ReadOnly: case Attribute::Returned: case Attribute::SExt: case Attribute::StructRet: @@ -993,6 +989,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, case Attribute::ZExt: case Attribute::ImmArg: case Attribute::ByRef: + case Attribute::WriteOnly: // These are not really attributes. case Attribute::None: case Attribute::EndAttrKinds: diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index c007a5990f337..636392ae810b7 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -594,7 +594,7 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, } // Use a map to unique and a vector to guarantee deterministic ordering. - llvm::SmallDenseSet, 4> DeadDebugSet; + llvm::SmallDenseSet DeadDebugSet; llvm::SmallVector DeadDebugInst; if (ExitBlock) { @@ -623,11 +623,8 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, auto *DVI = dyn_cast(&I); if (!DVI) continue; - auto Key = - DeadDebugSet.find({DVI->getVariable(), DVI->getExpression()}); - if (Key != DeadDebugSet.end()) + if (!DeadDebugSet.insert(DebugVariable(DVI)).second) continue; - DeadDebugSet.insert({DVI->getVariable(), DVI->getExpression()}); DeadDebugInst.push_back(DVI); } diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp index a3455577a35c5..79d4ab9803b61 100644 --- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp +++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp @@ -1042,11 +1042,8 @@ void SCCPInstVisitor::visitCmpInst(CmpInst &I) { auto V1State = getValueState(Op1); auto V2State = getValueState(Op2); - Constant *C = V1State.getCompare(I.getPredicate(), I.getType(), V2State); + Constant *C = V1State.getCompare(I.getPredicate(), I.getType(), V2State, DL); if (C) { - // TODO: getCompare() currently has incorrect handling for unknown/undef. - if (isa(C)) - return; ValueLatticeElement CV; CV.markConstant(C); mergeInValue(&I, CV); diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index fcdd85838340d..ee7f8b2b1f7e9 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -2635,6 +2635,34 @@ static bool MergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU) { return Changed; } +namespace { +/// Track ephemeral values, which should be ignored for cost-modelling +/// purposes. Requires walking instructions in reverse order. +class EphemeralValueTracker { + SmallPtrSet EphValues; + + bool isEphemeral(const Instruction *I) { + if (isa(I)) + return true; + return !I->mayHaveSideEffects() && !I->isTerminator() && + all_of(I->users(), [&](const User *U) { + return EphValues.count(cast(U)); + }); + } + +public: + bool track(const Instruction *I) { + if (isEphemeral(I)) { + EphValues.insert(I); + return true; + } + return false; + } + + bool contains(const Instruction *I) const { return EphValues.contains(I); } +}; +} // namespace + /// Determine if we can hoist sink a sole store instruction out of a /// conditional block. /// @@ -2859,13 +2887,11 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, unsigned SpeculatedInstructions = 0; Value *SpeculatedStoreValue = nullptr; StoreInst *SpeculatedStore = nullptr; - for (BasicBlock::iterator BBI = ThenBB->begin(), - BBE = std::prev(ThenBB->end()); - BBI != BBE; ++BBI) { - Instruction *I = &*BBI; + EphemeralValueTracker EphTracker; + for (Instruction &I : reverse(drop_end(*ThenBB))) { // Skip debug info. if (isa(I)) { - SpeculatedDbgIntrinsics.push_back(I); + SpeculatedDbgIntrinsics.push_back(&I); continue; } @@ -2877,10 +2903,14 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, // the samples collected on the non-conditional path are counted towards // the conditional path. We leave it for the counts inference algorithm to // figure out a proper count for an unknown probe. - SpeculatedDbgIntrinsics.push_back(I); + SpeculatedDbgIntrinsics.push_back(&I); continue; } + // Ignore ephemeral values, they will be dropped by the transform. + if (EphTracker.track(&I)) + continue; + // Only speculatively execute a single instruction (not counting the // terminator) for now. ++SpeculatedInstructions; @@ -2888,23 +2918,23 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, return false; // Don't hoist the instruction if it's unsafe or expensive. - if (!isSafeToSpeculativelyExecute(I) && + if (!isSafeToSpeculativelyExecute(&I) && !(HoistCondStores && (SpeculatedStoreValue = isSafeToSpeculateStore( - I, BB, ThenBB, EndBB)))) + &I, BB, ThenBB, EndBB)))) return false; if (!SpeculatedStoreValue && - computeSpeculationCost(I, TTI) > + computeSpeculationCost(&I, TTI) > PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic) return false; // Store the store speculation candidate. if (SpeculatedStoreValue) - SpeculatedStore = cast(I); + SpeculatedStore = cast(&I); // Do not hoist the instruction if any of its operands are defined but not // used in BB. The transformation will prevent the operand from // being sunk into the use block. - for (Use &Op : I->operands()) { + for (Use &Op : I.operands()) { Instruction *OpI = dyn_cast(Op); if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects()) continue; // Not a candidate for sinking. @@ -2956,10 +2986,16 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, // be misleading while debugging. // Similarly strip attributes that maybe dependent on condition we are // hoisting above. - for (auto &I : *ThenBB) { + for (auto &I : make_early_inc_range(*ThenBB)) { if (!SpeculatedStoreValue || &I != SpeculatedStore) I.setDebugLoc(DebugLoc()); I.dropUndefImplyingAttrsAndUnknownMetadata(); + + // Drop ephemeral values. + if (EphTracker.contains(&I)) { + I.replaceAllUsesWith(PoisonValue::get(I.getType())); + I.eraseFromParent(); + } } // Hoist the instructions. @@ -3002,15 +3038,7 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, /// Return true if we can thread a branch across this block. static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { int Size = 0; - - SmallPtrSet EphValues; - auto IsEphemeral = [&](const Instruction *I) { - if (isa(I)) - return true; - return !I->mayHaveSideEffects() && !I->isTerminator() && - all_of(I->users(), - [&](const User *U) { return EphValues.count(U); }); - }; + EphemeralValueTracker EphTracker; // Walk the loop in reverse so that we can identify ephemeral values properly // (values only feeding assumes). @@ -3021,11 +3049,9 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { return false; // Ignore ephemeral values which are deleted during codegen. - if (IsEphemeral(&I)) - EphValues.insert(&I); // We will delete Phis while threading, so Phis should not be accounted in // block's size. - else if (!isa(I)) { + if (!EphTracker.track(&I) && !isa(I)) { if (Size++ > MaxSmallBlockSize) return false; // Don't clone large BB's. } diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 7922f785c338a..9b5d0b8f5daa7 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1856,7 +1856,6 @@ static Value *getIntToFPVal(Value *I2F, IRBuilderBase &B, unsigned DstWidth) { Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) { Module *M = Pow->getModule(); Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1); - AttributeList Attrs; // Attributes are only meaningful on the original call Module *Mod = Pow->getModule(); Type *Ty = Pow->getType(); bool Ignored; @@ -1881,8 +1880,7 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) { LibFunc LibFn; Function *CalleeFn = BaseFn->getCalledFunction(); - if (CalleeFn && - TLI->getLibFunc(CalleeFn->getName(), LibFn) && + if (CalleeFn && TLI->getLibFunc(CalleeFn->getName(), LibFn) && isLibFuncEmittable(M, TLI, LibFn)) { StringRef ExpName; Intrinsic::ID ID; @@ -1892,14 +1890,18 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) { switch (LibFn) { default: return nullptr; - case LibFunc_expf: case LibFunc_exp: case LibFunc_expl: + case LibFunc_expf: + case LibFunc_exp: + case LibFunc_expl: ExpName = TLI->getName(LibFunc_exp); ID = Intrinsic::exp; LibFnFloat = LibFunc_expf; LibFnDouble = LibFunc_exp; LibFnLongDouble = LibFunc_expl; break; - case LibFunc_exp2f: case LibFunc_exp2: case LibFunc_exp2l: + case LibFunc_exp2f: + case LibFunc_exp2: + case LibFunc_exp2l: ExpName = TLI->getName(LibFunc_exp2); ID = Intrinsic::exp2; LibFnFloat = LibFunc_exp2f; @@ -1932,6 +1934,8 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) { if (!match(Pow->getArgOperand(0), m_APFloat(BaseF))) return nullptr; + AttributeList NoAttrs; // Attributes are only meaningful on the original call + // pow(2.0, itofp(x)) -> ldexp(1.0, x) if (match(Base, m_SpecificFP(2.0)) && (isa(Expo) || isa(Expo)) && @@ -1940,7 +1944,7 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) { return copyFlags(*Pow, emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), ExpoI, TLI, LibFunc_ldexp, LibFunc_ldexpf, - LibFunc_ldexpl, B, Attrs)); + LibFunc_ldexpl, B, NoAttrs)); } // pow(2.0 ** n, x) -> exp2(n * x) @@ -1964,7 +1968,7 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) { else return copyFlags(*Pow, emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, LibFunc_exp2f, - LibFunc_exp2l, B, Attrs)); + LibFunc_exp2l, B, NoAttrs)); } } @@ -1974,7 +1978,7 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) { hasFloatFn(M, TLI, Ty, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l)) return copyFlags(*Pow, emitUnaryFloatFnCall(Expo, TLI, LibFunc_exp10, LibFunc_exp10f, LibFunc_exp10l, - B, Attrs)); + B, NoAttrs)); // pow(x, y) -> exp2(log2(x) * y) if (Pow->hasApproxFunc() && Pow->hasNoNaNs() && BaseF->isFiniteNonZero() && @@ -2000,7 +2004,7 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) { LibFunc_exp2l)) return copyFlags(*Pow, emitUnaryFloatFnCall(FMul, TLI, LibFunc_exp2, LibFunc_exp2f, - LibFunc_exp2l, B, Attrs)); + LibFunc_exp2l, B, NoAttrs)); } } @@ -2032,7 +2036,6 @@ static Value *getSqrtCall(Value *V, AttributeList Attrs, bool NoErrno, /// Use square root in place of pow(x, +/-0.5). Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilderBase &B) { Value *Sqrt, *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1); - AttributeList Attrs; // Attributes are only meaningful on the original call Module *Mod = Pow->getModule(); Type *Ty = Pow->getType(); @@ -2054,7 +2057,8 @@ Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilderBase &B) { !isKnownNeverInfinity(Base, TLI)) return nullptr; - Sqrt = getSqrtCall(Base, Attrs, Pow->doesNotAccessMemory(), Mod, B, TLI); + Sqrt = getSqrtCall(Base, AttributeList(), Pow->doesNotAccessMemory(), Mod, B, + TLI); if (!Sqrt) return nullptr; @@ -2160,8 +2164,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) { return nullptr; ExpoF = &ExpoI; - Sqrt = getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(), - Pow->doesNotAccessMemory(), M, B, TLI); + Sqrt = getSqrtCall(Base, AttributeList(), Pow->doesNotAccessMemory(), M, + B, TLI); if (!Sqrt) return nullptr; } @@ -2205,7 +2209,6 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) { Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilderBase &B) { Module *M = CI->getModule(); Function *Callee = CI->getCalledFunction(); - AttributeList Attrs; // Attributes are only meaningful on the original call StringRef Name = Callee->getName(); Value *Ret = nullptr; if (UnsafeFPShrink && Name == TLI->getName(LibFunc_exp2) && @@ -2215,14 +2218,14 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilderBase &B) { Type *Ty = CI->getType(); Value *Op = CI->getArgOperand(0); - // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= IntSize - // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < IntSize + // exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= IntSize + // exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < IntSize if ((isa(Op) || isa(Op)) && hasFloatFn(M, TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) { if (Value *Exp = getIntToFPVal(Op, B, TLI->getIntSize())) return emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), Exp, TLI, - LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl, - B, Attrs); + LibFunc_ldexp, LibFunc_ldexpf, + LibFunc_ldexpl, B, AttributeList()); } return Ret; @@ -2260,7 +2263,6 @@ Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilderBase &B) { Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilderBase &B) { Function *LogFn = Log->getCalledFunction(); - AttributeList Attrs; // Attributes are only meaningful on the original call StringRef LogNm = LogFn->getName(); Intrinsic::ID LogID = LogFn->getIntrinsicID(); Module *Mod = Log->getModule(); @@ -2371,12 +2373,13 @@ Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilderBase &B) { TLI->getLibFunc(*Arg, ArgLb); // log(pow(x,y)) -> y*log(x) + AttributeList NoAttrs; if (ArgLb == PowLb || ArgID == Intrinsic::pow) { Value *LogX = Log->doesNotAccessMemory() ? B.CreateCall(Intrinsic::getDeclaration(Mod, LogID, Ty), Arg->getOperand(0), "log") - : emitUnaryFloatFnCall(Arg->getOperand(0), TLI, LogNm, B, Attrs); + : emitUnaryFloatFnCall(Arg->getOperand(0), TLI, LogNm, B, NoAttrs); Value *MulY = B.CreateFMul(Arg->getArgOperand(1), LogX, "mul"); // Since pow() may have side effects, e.g. errno, // dead code elimination may not be trusted to remove it. @@ -2399,7 +2402,7 @@ Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilderBase &B) { Value *LogE = Log->doesNotAccessMemory() ? B.CreateCall(Intrinsic::getDeclaration(Mod, LogID, Ty), Eul, "log") - : emitUnaryFloatFnCall(Eul, TLI, LogNm, B, Attrs); + : emitUnaryFloatFnCall(Eul, TLI, LogNm, B, NoAttrs); Value *MulY = B.CreateFMul(Arg->getArgOperand(0), LogE, "mul"); // Since exp() may have side effects, e.g. errno, // dead code elimination may not be trusted to remove it. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 7983165f09842..0a01b5f90182e 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6561,10 +6561,9 @@ Optional LoopVectorizationCostModel::getReductionPatternCost( return None; RetI = RetI->user_back(); } - if (match(RetI, m_Mul(m_Value(), m_Value())) && + + if (match(RetI, m_OneUse(m_Mul(m_Value(), m_Value()))) && RetI->user_back()->getOpcode() == Instruction::Add) { - if (!RetI->hasOneUser()) - return None; RetI = RetI->user_back(); } diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index be3f560ead854..ac2397beb4926 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3795,49 +3795,6 @@ BoUpSLP::findPartiallyOrderedLoads(const BoUpSLP::TreeEntry &TE) { return None; } -/// Check if two insertelement instructions are from the same buildvector. -static bool areTwoInsertFromSameBuildVector( - InsertElementInst *VU, InsertElementInst *V, - function_ref GetBaseOperand) { - // Instructions must be from the same basic blocks. - if (VU->getParent() != V->getParent()) - return false; - // Checks if 2 insertelements are from the same buildvector. - if (VU->getType() != V->getType()) - return false; - // Multiple used inserts are separate nodes. - if (!VU->hasOneUse() && !V->hasOneUse()) - return false; - auto *IE1 = VU; - auto *IE2 = V; - unsigned Idx1 = *getInsertIndex(IE1); - unsigned Idx2 = *getInsertIndex(IE2); - // Go through the vector operand of insertelement instructions trying to find - // either VU as the original vector for IE2 or V as the original vector for - // IE1. - do { - if (IE2 == VU) - return VU->hasOneUse(); - if (IE1 == V) - return V->hasOneUse(); - if (IE1) { - if ((IE1 != VU && !IE1->hasOneUse()) || - getInsertIndex(IE1).value_or(Idx2) == Idx2) - IE1 = nullptr; - else - IE1 = dyn_cast_or_null(GetBaseOperand(IE1)); - } - if (IE2) { - if ((IE2 != V && !IE2->hasOneUse()) || - getInsertIndex(IE2).value_or(Idx1) == Idx1) - IE2 = nullptr; - else - IE2 = dyn_cast_or_null(GetBaseOperand(IE2)); - } - } while (IE1 || IE2); - return false; -} - Optional BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) { // No need to reorder if need to shuffle reuses, still need to shuffle the @@ -3901,58 +3858,6 @@ Optional BoUpSLP::getReorderingData(const TreeEntry &TE, (TopToBottom && isa(TE.getMainOp()))) && !TE.isAltShuffle()) return TE.ReorderIndices; - if (TE.State == TreeEntry::Vectorize && TE.getOpcode() == Instruction::PHI) { - auto PHICompare = [](llvm::Value *V1, llvm::Value *V2) { - if (V1->user_empty() || V2->user_empty()) - return false; - auto *FirstUserOfPhi1 = cast(*V1->user_begin()); - auto *FirstUserOfPhi2 = cast(*V2->user_begin()); - if (auto *IE1 = dyn_cast(FirstUserOfPhi1)) - if (auto *IE2 = dyn_cast(FirstUserOfPhi2)) { - if (!areTwoInsertFromSameBuildVector( - IE1, IE2, - [](InsertElementInst *II) { return II->getOperand(0); })) - return false; - Optional Idx1 = getInsertIndex(IE1); - Optional Idx2 = getInsertIndex(IE2); - if (Idx1 == None || Idx2 == None) - return false; - return *Idx1 < *Idx2; - } - if (auto *EE1 = dyn_cast(FirstUserOfPhi1)) - if (auto *EE2 = dyn_cast(FirstUserOfPhi2)) { - if (EE1->getOperand(0) != EE2->getOperand(0)) - return false; - Optional Idx1 = getExtractIndex(EE1); - Optional Idx2 = getExtractIndex(EE2); - if (Idx1 == None || Idx2 == None) - return false; - return *Idx1 < *Idx2; - } - return false; - }; - auto IsIdentityOrder = [](const OrdersType &Order) { - for (unsigned Idx : seq(0, Order.size())) - if (Idx != Order[Idx]) - return false; - return true; - }; - if (!TE.ReorderIndices.empty()) - return TE.ReorderIndices; - DenseMap PhiToId; - SmallVector Phis; - OrdersType ResOrder(TE.Scalars.size()); - for (unsigned Id = 0, Sz = TE.Scalars.size(); Id < Sz; ++Id) { - PhiToId[TE.Scalars[Id]] = Id; - Phis.push_back(TE.Scalars[Id]); - } - llvm::stable_sort(Phis, PHICompare); - for (unsigned Id = 0, Sz = Phis.size(); Id < Sz; ++Id) - ResOrder[Id] = PhiToId[Phis[Id]]; - if (IsIdentityOrder(ResOrder)) - return {}; - return ResOrder; - } if (TE.State == TreeEntry::NeedToGather) { // TODO: add analysis of other gather nodes with extractelement // instructions and other values/instructions, not only undefs. @@ -3996,7 +3901,7 @@ static bool isRepeatedNonIdentityClusteredMask(ArrayRef Mask, ArrayRef FirstCluster = Mask.slice(0, Sz); if (ShuffleVectorInst::isIdentityMask(FirstCluster)) return false; - for (unsigned I = 0, E = Mask.size(); I < E; I += Sz) { + for (unsigned I = Sz, E = Mask.size(); I < E; I += Sz) { ArrayRef Cluster = Mask.slice(I, Sz); if (Cluster != FirstCluster) return false; @@ -4017,10 +3922,10 @@ void BoUpSLP::reorderNodeWithReuses(TreeEntry &TE, ArrayRef Mask) const { // Try to improve gathered nodes with clustered reuses, if possible. reorderScalars(TE.Scalars, makeArrayRef(TE.ReuseShuffleIndices).slice(0, Sz)); // Fill the reuses mask with the identity submasks. - for (auto It = TE.ReuseShuffleIndices.begin(), - End = TE.ReuseShuffleIndices.end(); + for (auto *It = TE.ReuseShuffleIndices.begin(), + *End = TE.ReuseShuffleIndices.end(); It != End; std::advance(It, Sz)) - std::iota(It, std::next(It + Sz), 0); + std::iota(It, std::next(It, Sz), 0); } void BoUpSLP::reorderTopToBottom() { @@ -4030,9 +3935,6 @@ void BoUpSLP::reorderTopToBottom() { // their ordering. DenseMap GathersToOrders; - // Phi nodes can have preferred ordering based on their result users - DenseMap PhisToOrders; - // AltShuffles can also have a preferred ordering that leads to fewer // instructions, e.g., the addsub instruction in x86. DenseMap AltShufflesToOrders; @@ -4047,7 +3949,7 @@ void BoUpSLP::reorderTopToBottom() { // extracts. for_each(VectorizableTree, [this, &TTIRef, &VFToOrderedEntries, &GathersToOrders, &ExternalUserReorderMap, - &AltShufflesToOrders, &PhisToOrders]( + &AltShufflesToOrders]( const std::unique_ptr &TE) { // Look for external users that will probably be vectorized. SmallVector ExternalUserReorderIndices = @@ -4104,9 +4006,6 @@ void BoUpSLP::reorderTopToBottom() { VFToOrderedEntries[TE->getVectorFactor()].insert(TE.get()); if (TE->State != TreeEntry::Vectorize || !TE->ReuseShuffleIndices.empty()) GathersToOrders.try_emplace(TE.get(), *CurrentOrder); - if (TE->State == TreeEntry::Vectorize && - TE->getOpcode() == Instruction::PHI) - PhisToOrders.try_emplace(TE.get(), *CurrentOrder); } }); @@ -4132,8 +4031,8 @@ void BoUpSLP::reorderTopToBottom() { if (!OpTE->ReuseShuffleIndices.empty() && !GathersToOrders.count(OpTE)) continue; // Count number of orders uses. - const auto &Order = [OpTE, &GathersToOrders, &AltShufflesToOrders, - &PhisToOrders]() -> const OrdersType & { + const auto &Order = [OpTE, &GathersToOrders, + &AltShufflesToOrders]() -> const OrdersType & { if (OpTE->State == TreeEntry::NeedToGather || !OpTE->ReuseShuffleIndices.empty()) { auto It = GathersToOrders.find(OpTE); @@ -4145,12 +4044,6 @@ void BoUpSLP::reorderTopToBottom() { if (It != AltShufflesToOrders.end()) return It->second; } - if (OpTE->State == TreeEntry::Vectorize && - isa(OpTE->getMainOp())) { - auto It = PhisToOrders.find(OpTE); - if (It != PhisToOrders.end()) - return It->second; - } return OpTE->ReorderIndices; }(); // First consider the order of the external scalar users. @@ -7245,6 +7138,51 @@ InstructionCost BoUpSLP::getSpillCost() const { return Cost; } +/// Check if two insertelement instructions are from the same buildvector. +static bool areTwoInsertFromSameBuildVector( + InsertElementInst *VU, InsertElementInst *V, + function_ref GetBaseOperand) { + // Instructions must be from the same basic blocks. + if (VU->getParent() != V->getParent()) + return false; + // Checks if 2 insertelements are from the same buildvector. + if (VU->getType() != V->getType()) + return false; + // Multiple used inserts are separate nodes. + if (!VU->hasOneUse() && !V->hasOneUse()) + return false; + auto *IE1 = VU; + auto *IE2 = V; + Optional Idx1 = getInsertIndex(IE1); + Optional Idx2 = getInsertIndex(IE2); + if (Idx1 == None || Idx2 == None) + return false; + // Go through the vector operand of insertelement instructions trying to find + // either VU as the original vector for IE2 or V as the original vector for + // IE1. + do { + if (IE2 == VU) + return VU->hasOneUse(); + if (IE1 == V) + return V->hasOneUse(); + if (IE1) { + if ((IE1 != VU && !IE1->hasOneUse()) || + getInsertIndex(IE1).value_or(*Idx2) == *Idx2) + IE1 = nullptr; + else + IE1 = dyn_cast_or_null(GetBaseOperand(IE1)); + } + if (IE2) { + if ((IE2 != V && !IE2->hasOneUse()) || + getInsertIndex(IE2).value_or(*Idx1) == *Idx1) + IE2 = nullptr; + else + IE2 = dyn_cast_or_null(GetBaseOperand(IE2)); + } + } while (IE1 || IE2); + return false; +} + /// Checks if the \p IE1 instructions is followed by \p IE2 instruction in the /// buildvector sequence. static bool isFirstInsertElement(const InsertElementInst *IE1, diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index a21add2d47256..bac72b8fa8ebb 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -127,6 +127,27 @@ class VectorCombine { }; } // namespace +static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI) { + // Do not widen load if atomic/volatile or under asan/hwasan/memtag/tsan. + // The widened load may load data from dirty regions or create data races + // non-existent in the source. + if (!Load || !Load->isSimple() || !Load->hasOneUse() || + Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) || + mustSuppressSpeculation(*Load)) + return false; + + // We are potentially transforming byte-sized (8-bit) memory accesses, so make + // sure we have all of our type-based constraints in place for this target. + Type *ScalarTy = Load->getType()->getScalarType(); + uint64_t ScalarSize = ScalarTy->getPrimitiveSizeInBits(); + unsigned MinVectorSize = TTI.getMinVectorRegisterBitWidth(); + if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 || + ScalarSize % 8 != 0) + return false; + + return true; +} + bool VectorCombine::vectorizeLoadInsert(Instruction &I) { // Match insert into fixed vector of scalar value. // TODO: Handle non-zero insert index. @@ -142,35 +163,22 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) { if (!HasExtract) X = Scalar; - // Match source value as load of scalar or vector. - // Do not vectorize scalar load (widening) if atomic/volatile or under - // asan/hwasan/memtag/tsan. The widened load may load data from dirty regions - // or create data races non-existent in the source. auto *Load = dyn_cast(X); - if (!Load || !Load->isSimple() || !Load->hasOneUse() || - Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) || - mustSuppressSpeculation(*Load)) + if (!canWidenLoad(Load, TTI)) return false; - const DataLayout &DL = I.getModule()->getDataLayout(); - Value *SrcPtr = Load->getPointerOperand()->stripPointerCasts(); - assert(isa(SrcPtr->getType()) && "Expected a pointer type"); - - unsigned AS = Load->getPointerAddressSpace(); - - // We are potentially transforming byte-sized (8-bit) memory accesses, so make - // sure we have all of our type-based constraints in place for this target. Type *ScalarTy = Scalar->getType(); uint64_t ScalarSize = ScalarTy->getPrimitiveSizeInBits(); unsigned MinVectorSize = TTI.getMinVectorRegisterBitWidth(); - if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 || - ScalarSize % 8 != 0) - return false; // Check safety of replacing the scalar load with a larger vector load. // We use minimal alignment (maximum flexibility) because we only care about // the dereferenceable region. When calculating cost and creating a new op, // we may use a larger value based on alignment attributes. + const DataLayout &DL = I.getModule()->getDataLayout(); + Value *SrcPtr = Load->getPointerOperand()->stripPointerCasts(); + assert(isa(SrcPtr->getType()) && "Expected a pointer type"); + unsigned MinVecNumElts = MinVectorSize / ScalarSize; auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts, false); unsigned OffsetEltIndex = 0; @@ -215,6 +223,7 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) { // Use the greater of the alignment on the load or its source pointer. Alignment = std::max(SrcPtr->getPointerAlignment(DL), Alignment); Type *LoadTy = Load->getType(); + unsigned AS = Load->getPointerAddressSpace(); InstructionCost OldCost = TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, AS); APInt DemandedElts = APInt::getOneBitSet(MinVecNumElts, 0); diff --git a/llvm/test/Analysis/BasicAA/cs-cs-arm.ll b/llvm/test/Analysis/BasicAA/cs-cs-arm.ll index d6a9976590778..6bf321b09201a 100644 --- a/llvm/test/Analysis/BasicAA/cs-cs-arm.ll +++ b/llvm/test/Analysis/BasicAA/cs-cs-arm.ll @@ -1,8 +1,5 @@ ; RUN: opt < %s -aa-pipeline=basic-aa -passes=aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s ; REQUIRES: arm-registered-target -; This hasn't been run in a long time and it no longer matches reality. -; Filed issue #58738. -; XFAIL: * target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" target triple = "arm-apple-ios" @@ -17,16 +14,18 @@ entry: call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) nounwind %c = add <8 x i16> %a, %b + load i8, i8* %p + load i8, i8* %q ret <8 x i16> %c ; CHECK-LABEL: Function: test1: ; CHECK: NoAlias: i8* %p, i8* %q -; CHECK: Just Ref (MustAlias): Ptr: i8* %p <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) +; CHECK: Just Ref: Ptr: i8* %p <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) ; CHECK: NoModRef: Ptr: i8* %q <-> %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) ; CHECK: NoModRef: Ptr: i8* %p <-> call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) -; CHECK: Both ModRef (MustAlias): Ptr: i8* %q <-> call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) -; CHECK: Just Ref (MustAlias): Ptr: i8* %p <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) +; CHECK: Both ModRef: Ptr: i8* %q <-> call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) +; CHECK: Just Ref: Ptr: i8* %p <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) ; CHECK: NoModRef: Ptr: i8* %q <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) ; CHECK: NoModRef: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #{{[0-9]+}} <-> call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) ; CHECK: NoModRef: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #{{[0-9]+}} <-> %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) diff --git a/llvm/test/Analysis/BasicAA/cs-cs.ll b/llvm/test/Analysis/BasicAA/cs-cs.ll index 3a0cd5947e2a5..513f305b3a19e 100644 --- a/llvm/test/Analysis/BasicAA/cs-cs.ll +++ b/llvm/test/Analysis/BasicAA/cs-cs.ll @@ -429,19 +429,19 @@ entry: } -; CHECK: attributes #0 = { argmemonly nocallback nofree nounwind willreturn writeonly } -; CHECK-NEXT: attributes #1 = { argmemonly nocallback nofree nounwind willreturn } -; CHECK-NEXT: attributes #2 = { argmemonly nosync nounwind willreturn } -; CHECK-NEXT: attributes #3 = { noinline nounwind readonly } -; CHECK-NEXT: attributes #4 = { noinline nounwind writeonly } +; CHECK: attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: write) } +; CHECK-NEXT: attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +; CHECK-NEXT: attributes #2 = { nosync nounwind willreturn memory(argmem: readwrite) } +; CHECK-NEXT: attributes #3 = { noinline nounwind memory(read) } +; CHECK-NEXT: attributes #4 = { noinline nounwind memory(write) } ; CHECK-NEXT: attributes #5 = { nounwind ssp } -; CHECK-NEXT: attributes #6 = { inaccessiblememonly nounwind } -; CHECK-NEXT: attributes #7 = { inaccessiblemem_or_argmemonly nounwind } -; CHECK-NEXT: attributes #8 = { argmemonly nounwind } -; CHECK-NEXT: attributes #9 = { readonly } -; CHECK-NEXT: attributes #10 = { inaccessiblememonly } -; CHECK-NEXT: attributes #11 = { inaccessiblemem_or_argmemonly } -; CHECK-NEXT: attributes #12 = { argmemonly } +; CHECK-NEXT: attributes #6 = { nounwind memory(inaccessiblemem: readwrite) } +; CHECK-NEXT: attributes #7 = { nounwind memory(argmem: readwrite, inaccessiblemem: readwrite) } +; CHECK-NEXT: attributes #8 = { nounwind memory(argmem: readwrite) } +; CHECK-NEXT: attributes #9 = { memory(read) } +; CHECK-NEXT: attributes #10 = { memory(inaccessiblemem: readwrite) } +; CHECK-NEXT: attributes #11 = { memory(argmem: readwrite, inaccessiblemem: readwrite) } +; CHECK-NEXT: attributes #12 = { memory(argmem: readwrite) } attributes #0 = { argmemonly nounwind } attributes #1 = { noinline nounwind readonly } diff --git a/llvm/test/Analysis/BasicAA/intrinsics-arm.ll b/llvm/test/Analysis/BasicAA/intrinsics-arm.ll index d43445c7a50c4..3fda58721e663 100644 --- a/llvm/test/Analysis/BasicAA/intrinsics-arm.ll +++ b/llvm/test/Analysis/BasicAA/intrinsics-arm.ll @@ -26,6 +26,6 @@ entry: declare <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8*, i32) nounwind readonly declare void @llvm.arm.neon.vst1.p0i8.v8i16(i8*, <8 x i16>, i32) nounwind -; CHECK: attributes #0 = { argmemonly nocallback nofree nosync nounwind readonly willreturn } -; CHECK: attributes #1 = { argmemonly nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: read) } +; CHECK: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } ; CHECK: attributes [[ATTR]] = { nounwind } diff --git a/llvm/test/Analysis/BasicAA/intrinsics.ll b/llvm/test/Analysis/BasicAA/intrinsics.ll index 46e9f4e06c2f2..3965286215e3d 100644 --- a/llvm/test/Analysis/BasicAA/intrinsics.ll +++ b/llvm/test/Analysis/BasicAA/intrinsics.ll @@ -22,6 +22,6 @@ entry: declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>) nounwind readonly declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>) nounwind -; CHECK: attributes #0 = { argmemonly nocallback nofree nosync nounwind readonly willreturn } -; CHECK: attributes #1 = { argmemonly nocallback nofree nosync nounwind willreturn writeonly } +; CHECK: attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: read) } +; CHECK: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) } ; CHECK: attributes [[ATTR]] = { nounwind } diff --git a/llvm/test/Analysis/BasicAA/pure-const-dce.ll b/llvm/test/Analysis/BasicAA/pure-const-dce.ll index 8d07084561732..c6b8b0669ab23 100644 --- a/llvm/test/Analysis/BasicAA/pure-const-dce.ll +++ b/llvm/test/Analysis/BasicAA/pure-const-dce.ll @@ -50,5 +50,5 @@ declare i32 @TestPure(i32) readonly declare i32 @TestNone(i32) -; CHECK: attributes [[READNONE]] = { readnone } -; CHECK: attributes [[READONLY]] = { readonly } +; CHECK: attributes [[READNONE]] = { memory(none) } +; CHECK: attributes [[READONLY]] = { memory(read) } diff --git a/llvm/test/Analysis/CostModel/RISCV/masked_ldst.ll b/llvm/test/Analysis/CostModel/RISCV/masked_ldst.ll index 779f3ca505d58..680ced82c0d64 100644 --- a/llvm/test/Analysis/CostModel/RISCV/masked_ldst.ll +++ b/llvm/test/Analysis/CostModel/RISCV/masked_ldst.ll @@ -3,23 +3,23 @@ define void @fixed() { ; CHECK-LABEL: 'fixed' -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 8, <2 x i1> undef, <2 x i8> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 8, <4 x i1> undef, <4 x i8> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %v8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 8, <8 x i1> undef, <8 x i8> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %v16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 8, <16 x i1> undef, <16 x i8> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 8, <2 x i1> undef, <2 x i16> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 8, <4 x i1> undef, <4 x i16> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %v8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 8, <8 x i1> undef, <8 x i16> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 8, <2 x i1> undef, <2 x i32> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 8, <4 x i1> undef, <4 x i32> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2i64 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* undef, i32 8, <2 x i1> undef, <2 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0v2i8(<2 x i8>* undef, i32 8, <2 x i1> undef, <2 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* undef, i32 8, <4 x i1> undef, <4 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 8, <8 x i1> undef, <8 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 8, <16 x i1> undef, <16 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0v2i16(<2 x i16>* undef, i32 8, <2 x i1> undef, <2 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 8, <4 x i1> undef, <4 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 8, <8 x i1> undef, <8 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 8, <2 x i1> undef, <2 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 8, <4 x i1> undef, <4 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* undef, i32 8, <2 x i1> undef, <2 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = call <2 x half> @llvm.masked.load.v2f16.p0v2f16(<2 x half>* undef, i32 8, <2 x i1> undef, <2 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = call <4 x half> @llvm.masked.load.v4f16.p0v4f16(<4 x half>* undef, i32 8, <4 x i1> undef, <4 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8f16 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* undef, i32 8, <8 x i1> undef, <8 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 8, <2 x i1> undef, <2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 8, <4 x i1> undef, <4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2f64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* undef, i32 8, <2 x i1> undef, <2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v4i64 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* undef, i32 8, <4 x i1> undef, <4 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* undef, i32 8, <2 x i1> undef, <2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* undef, i32 8, <4 x i1> undef, <4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* undef, i32 8, <2 x i1> undef, <2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* undef, i32 8, <4 x i1> undef, <4 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v32f16 = call <32 x half> @llvm.masked.load.v32f16.p0v32f16(<32 x half>* undef, i32 8, <32 x i1> undef, <32 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll index 0833c3516d287..12d7fba69f487 100644 --- a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll @@ -1,15 +1,18 @@ ; RUN: opt -mtriple amdgcn-- -passes='print' -disable-output %s 2>&1 | FileCheck %s ; CHECK: DIVERGENT: %orig = atomicrmw xchg i32* %ptr, i32 %val seq_cst -define i32 @test1(i32* %ptr, i32 %val) #0 { +define amdgpu_kernel void @test1(i32* %ptr, i32 %val) #0 { %orig = atomicrmw xchg i32* %ptr, i32 %val seq_cst - ret i32 %orig + store i32 %orig, i32* %ptr + ret void } ; CHECK: DIVERGENT: %orig = cmpxchg i32* %ptr, i32 %cmp, i32 %new seq_cst seq_cst -define {i32, i1} @test2(i32* %ptr, i32 %cmp, i32 %new) { +define amdgpu_kernel void @test2(i32* %ptr, i32 %cmp, i32 %new) { %orig = cmpxchg i32* %ptr, i32 %cmp, i32 %new seq_cst seq_cst - ret {i32, i1} %orig + %val = extractvalue { i32, i1 } %orig, 0 + store i32 %val, i32* %ptr + ret void } ; CHECK: DIVERGENT: %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 %val, i32 0, i32 0, i1 false) @@ -41,5 +44,14 @@ declare i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* nocapture, i64, declare i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* nocapture, i32, i32, i32, i1) #1 declare i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* nocapture, i64, i32, i32, i1) #1 +; CHECK: DIVERGENT: %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %ptr, i32 %val) +define amdgpu_kernel void @test_atomic_csub_i32(i32 addrspace(1)* %ptr, i32 %val) #0 { + %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %ptr, i32 %val) + store i32 %ret, i32 addrspace(1)* %ptr, align 4 + ret void +} + +declare i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* nocapture, i32) #1 + attributes #0 = { nounwind } -attributes #1 = { nounwind argmemonly } +attributes #1 = { argmemonly nounwind willreturn } diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/read_register.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/read_register.ll new file mode 100644 index 0000000000000..91e5d588710ab --- /dev/null +++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/read_register.ll @@ -0,0 +1,142 @@ +; RUN: opt -mtriple amdgcn-unknown-amdhsa -mcpu=gfx90a -passes='print' -disable-output %s 2>&1 | FileCheck %s + +; CHECK-LABEL: Divergence Analysis' for function 'read_register_exec': +; CHECK-NOT: DIVERGENT +define i64 @read_register_exec() { + %reg = call i64 @llvm.read_register.i64(metadata !0) + ret i64 %reg +} + +; CHECK-LABEL: Divergence Analysis' for function 'read_register_m0': +; CHECK-NOT: DIVERGENT +define i32 @read_register_m0() { + %reg = call i32 @llvm.read_register.i32(metadata !1) + ret i32 %reg +} + +; CHECK-LABEL: Divergence Analysis' for function 'read_register_s17': +; CHECK-NOT: DIVERGENT +define i32 @read_register_s17() { + %reg = call i32 @llvm.read_register.i32(metadata !2) + ret i32 %reg +} + +; CHECK-LABEL: Divergence Analysis' for function 'read_register_s17_i17': +; CHECK-NOT: DIVERGENT +define i17 @read_register_s17_i17() { + %reg = call i17 @llvm.read_register.i17(metadata !2) + ret i17 %reg +} + +; CHECK-LABEL: Divergence Analysis' for function 'read_register_v0': +; CHECK: DIVERGENT +define i32 @read_register_v0() { + %reg = call i32 @llvm.read_register.i32(metadata !3) + ret i32 %reg +} + +; CHECK-LABEL: Divergence Analysis' for function 'read_register_v0_v1': +; CHECK: DIVERGENT +define i64 @read_register_v0_v1() { + %reg = call i64 @llvm.read_register.i64(metadata !4) + ret i64 %reg +} + +; CHECK-LABEL: Divergence Analysis' for function 'read_register_a0': +; CHECK: DIVERGENT +define i32 @read_register_a0() { + %reg = call i32 @llvm.read_register.i32(metadata !5) + ret i32 %reg +} + +; CHECK-LABEL: Divergence Analysis' for function 'read_register_a0_a1': +; CHECK: DIVERGENT +define i64 @read_register_a0_a1() { + %reg = call i64 @llvm.read_register.i64(metadata !6) + ret i64 %reg +} + +; CHECK-LABEL: Divergence Analysis' for function 'read_register_vcc_i64': +; CHECK-NOT: DIVERGENT +define i64 @read_register_vcc_i64() { + %reg = call i64 @llvm.read_register.i64(metadata !7) + ret i64 %reg +} + +; CHECK-LABEL: Divergence Analysis' for function 'read_register_vcc_i1': +; CHECK: DIVERGENT +define i1 @read_register_vcc_i1() { + %reg = call i1 @llvm.read_register.i1(metadata !7) + ret i1 %reg +} + +; CHECK-LABEL: Divergence Analysis' for function 'read_register_invalid_reg': +; CHECK-NOT: DIVERGENT +define i64 @read_register_invalid_reg() { + %reg = call i64 @llvm.read_register.i64(metadata !8) + ret i64 %reg +} + +; CHECK-LABEL: Divergence Analysis' for function 'read_register_flat_scratch': +; CHECK-NOT: DIVERGENT +define i32 @read_register_flat_scratch() { + %reg = call i32 @llvm.read_register.i32(metadata !9) + ret i32 %reg +} + +; CHECK-LABEL: Divergence Analysis' for function 'read_register_vcc_lo_i32': +; CHECK-NOT: DIVERGENT +define i32 @read_register_vcc_lo_i32() { + %reg = call i32 @llvm.read_register.i32(metadata !10) + ret i32 %reg +} + +; CHECK-LABEL: Divergence Analysis' for function 'read_register_vcc_hi_i32': +; CHECK-NOT: DIVERGENT +define i32 @read_register_vcc_hi_i32() { + %reg = call i32 @llvm.read_register.i32(metadata !11) + ret i32 %reg +} + +; CHECK-LABEL: Divergence Analysis' for function 'read_register_exec_lo_i32': +; CHECK-NOT: DIVERGENT +define i32 @read_register_exec_lo_i32() { + %reg = call i32 @llvm.read_register.i32(metadata !12) + ret i32 %reg +} + +; CHECK-LABEL: Divergence Analysis' for function 'read_register_exec_hi_i32': +; CHECK-NOT: DIVERGENT +define i32 @read_register_exec_hi_i32() { + %reg = call i32 @llvm.read_register.i32(metadata !13) + ret i32 %reg +} + +; FIXME: Why does the verifier allow this? +; CHECK-LABEL: Divergence Analysis' for function 'read_register_empty_str_i32': +; CHECK-NOT: DIVERGENT +define i32 @read_register_empty_str_i32() { + %reg = call i32 @llvm.read_register.i32(metadata !14) + ret i32 %reg +} + +declare i64 @llvm.read_register.i64(metadata) +declare i32 @llvm.read_register.i32(metadata) +declare i17 @llvm.read_register.i17(metadata) +declare i1 @llvm.read_register.i1(metadata) + +!0 = !{!"exec"} +!1 = !{!"m0"} +!2 = !{!"s17"} +!3 = !{!"v0"} +!4 = !{!"v[0:1]"} +!5 = !{!"a0"} +!6 = !{!"a[0:1]"} +!7 = !{!"vcc"} +!8 = !{!"not a register"} +!9 = !{!"flat_scratch"} +!10 = !{!"vcc_lo"} +!11 = !{!"vcc_hi"} +!12 = !{!"exec_lo"} +!13 = !{!"exec_hi"} +!14 = !{!""} diff --git a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/atomics.ll b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/atomics.ll index dfb54c8f97dce..e6f2385ba130a 100644 --- a/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/atomics.ll +++ b/llvm/test/Analysis/LegacyDivergenceAnalysis/AMDGPU/atomics.ll @@ -1,15 +1,18 @@ ; RUN: opt -mtriple=amdgcn-- -amdgpu-use-legacy-divergence-analysis -passes='print' 2>&1 -disable-output %s | FileCheck %s ; CHECK: DIVERGENT: %orig = atomicrmw xchg i32* %ptr, i32 %val seq_cst -define i32 @test1(i32* %ptr, i32 %val) #0 { +define amdgpu_kernel void @test1(i32* %ptr, i32 %val) #0 { %orig = atomicrmw xchg i32* %ptr, i32 %val seq_cst - ret i32 %orig + store i32 %orig, i32* %ptr + ret void } ; CHECK: DIVERGENT: %orig = cmpxchg i32* %ptr, i32 %cmp, i32 %new seq_cst seq_cst -define {i32, i1} @test2(i32* %ptr, i32 %cmp, i32 %new) { +define amdgpu_kernel void @test2(i32* %ptr, i32 %cmp, i32 %new) { %orig = cmpxchg i32* %ptr, i32 %cmp, i32 %new seq_cst seq_cst - ret {i32, i1} %orig + %val = extractvalue { i32, i1 } %orig, 0 + store i32 %val, i32* %ptr + ret void } ; CHECK: DIVERGENT: %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 %val, i32 0, i32 0, i1 false) @@ -41,5 +44,14 @@ declare i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* nocapture, i64, declare i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* nocapture, i32, i32, i32, i1) #1 declare i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* nocapture, i64, i32, i32, i1) #1 +; CHECK: DIVERGENT: %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %ptr, i32 %val) +define amdgpu_kernel void @test_atomic_csub_i32(i32 addrspace(1)* %ptr, i32 %val) #0 { + %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %ptr, i32 %val) + store i32 %ret, i32 addrspace(1)* %ptr, align 4 + ret void +} + +declare i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* nocapture, i32) #1 + attributes #0 = { nounwind } -attributes #1 = { nounwind argmemonly } +attributes #1 = { argmemonly nounwind willreturn } diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll index 6134f578895bc..7f8f7e8c6d662 100644 --- a/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll +++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll @@ -72,14 +72,14 @@ define i32 @test3_no(i8* %p) nounwind { declare void @callee(i32* %p) nounwind declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) nounwind -; CHECK: attributes #0 = { mustprogress nofree norecurse nosync nounwind readnone willreturn } -; CHECK: attributes #1 = { argmemonly mustprogress nofree norecurse nosync nounwind willreturn writeonly } -; CHECK: attributes #2 = { nofree nosync nounwind readnone } +; CHECK: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) } +; CHECK: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; CHECK: attributes #2 = { nofree nosync nounwind memory(none) } ; CHECK: attributes #3 = { nounwind } -; CHECK: attributes #4 = { mustprogress nofree nosync nounwind readnone willreturn } -; CHECK: attributes #5 = { argmemonly mustprogress nofree nosync nounwind willreturn } -; CHECK: attributes #6 = { argmemonly mustprogress nofree norecurse nosync nounwind willreturn } -; CHECK: attributes #7 = { argmemonly nocallback nofree nounwind willreturn } +; CHECK: attributes #4 = { mustprogress nofree nosync nounwind willreturn memory(none) } +; CHECK: attributes #5 = { mustprogress nofree nosync nounwind willreturn memory(argmem: readwrite) } +; CHECK: attributes #6 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } +; CHECK: attributes #7 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } ; Root note. !0 = !{ } diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll index 42166584a78a4..f3b4b1abd2168 100644 --- a/llvm/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll +++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll @@ -22,8 +22,8 @@ entry: declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>) nounwind readonly declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>) nounwind -; CHECK: attributes #0 = { argmemonly nocallback nofree nosync nounwind readonly willreturn } -; CHECK: attributes #1 = { argmemonly nocallback nofree nosync nounwind willreturn writeonly } +; CHECK: attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: read) } +; CHECK: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) } ; CHECK: attributes [[NUW]] = { nounwind } !0 = !{!"tbaa root"} diff --git a/llvm/test/Assembler/aarch64-intrinsics-attributes.ll b/llvm/test/Assembler/aarch64-intrinsics-attributes.ll index cbf5de3d934a8..40e7789b4e44c 100644 --- a/llvm/test/Assembler/aarch64-intrinsics-attributes.ll +++ b/llvm/test/Assembler/aarch64-intrinsics-attributes.ll @@ -23,4 +23,4 @@ declare <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32>, <4 x i32>) declare @llvm.aarch64.sve.dup.nxv4i32(, , i32) ; CHECK: attributes [[NOFREE_NOUNWIND_WILLRETURN]] = { nofree nounwind willreturn } -; CHECK: attributes [[NO_CALLBACK_NOFREE_NOSYNC_NOUNWIND_READNONE_WILLRETURN]] = { nocallback nofree nosync nounwind readnone willreturn } +; CHECK: attributes [[NO_CALLBACK_NOFREE_NOSYNC_NOUNWIND_READNONE_WILLRETURN]] = { nocallback nofree nosync nounwind willreturn memory(none) } diff --git a/llvm/test/Assembler/masked-load-store-intrinsics-attributes.ll b/llvm/test/Assembler/masked-load-store-intrinsics-attributes.ll index afa90ec76a8f1..c3a6dec809301 100644 --- a/llvm/test/Assembler/masked-load-store-intrinsics-attributes.ll +++ b/llvm/test/Assembler/masked-load-store-intrinsics-attributes.ll @@ -15,6 +15,6 @@ declare <16 x float> @llvm.masked.expandload.v16f32 (ptr, <16 x i1>, <16 x float ; CHECK: declare void @llvm.masked.compressstore.v8i32(<8 x i32>, ptr nocapture, <8 x i1>) [[ARGMEMONLY_NOCALLBACK_NOFREE_NOSYNC_NOUNWIND_WILLRETURN_WRITEONLY:#[0-9]+]] declare void @llvm.masked.compressstore.v8i32(<8 x i32>, ptr, <8 x i1>) -; CHECK: attributes [[ARGMEMONLY_NOCALLBACK_NOFREE_NOSYNC_NOUNWIND_READONLY_WILLRETURN]] = { argmemonly nocallback nofree nosync nounwind readonly willreturn } -; CHECK: attributes [[ARGMEMONLY_NOCALLBACK_NOFREE_NOSYNC_NOUNWIND_WILLRETURN_WRITEONLY]] = { argmemonly nocallback nofree nosync nounwind willreturn writeonly } -; CHECK: attributes [[NOCALLBACK_NOFREE_NOSYNC_NOUNWIND_READONLY_WILLRETURN]] = { nocallback nofree nosync nounwind readonly willreturn } +; CHECK: attributes [[ARGMEMONLY_NOCALLBACK_NOFREE_NOSYNC_NOUNWIND_READONLY_WILLRETURN]] = { nocallback nofree nosync nounwind willreturn memory(argmem: read) } +; CHECK: attributes [[ARGMEMONLY_NOCALLBACK_NOFREE_NOSYNC_NOUNWIND_WILLRETURN_WRITEONLY]] = { nocallback nofree nosync nounwind willreturn memory(argmem: write) } +; CHECK: attributes [[NOCALLBACK_NOFREE_NOSYNC_NOUNWIND_READONLY_WILLRETURN]] = { nocallback nofree nosync nounwind willreturn memory(read) } diff --git a/llvm/test/Bindings/llvm-c/debug_info.ll b/llvm/test/Bindings/llvm-c/debug_info.ll index 874cf818dca29..a7fcd8a999ef1 100644 --- a/llvm/test/Bindings/llvm-c/debug_info.ll +++ b/llvm/test/Bindings/llvm-c/debug_info.ll @@ -12,13 +12,13 @@ ; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 0, metadata !41, metadata !DIExpression(DW_OP_constu, 0, DW_OP_stack_value)), !dbg !44 ; CHECK-NEXT: } -; CHECK: ; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +; CHECK: ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) ; CHECK-NEXT: declare void @llvm.dbg.declare(metadata, metadata, metadata) #0 -; CHECK: ; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +; CHECK: ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) ; CHECK-NEXT: declare void @llvm.dbg.value(metadata, metadata, metadata) #0 -; CHECK: attributes #0 = { nocallback nofree nosync nounwind readnone speculatable willreturn } +; CHECK: attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; CHECK: !llvm.dbg.cu = !{!0} ; CHECK-NEXT: !FooType = !{!28} diff --git a/llvm/test/Bitcode/attributes-3.3.ll b/llvm/test/Bitcode/attributes-3.3.ll index 6a645fbaed79d..f9aef5d2f612d 100644 --- a/llvm/test/Bitcode/attributes-3.3.ll +++ b/llvm/test/Bitcode/attributes-3.3.ll @@ -213,8 +213,8 @@ define void @f34() ; CHECK: attributes #0 = { noreturn } ; CHECK: attributes #1 = { nounwind } -; CHECK: attributes #2 = { readnone } -; CHECK: attributes #3 = { readonly } +; CHECK: attributes #2 = { memory(none) } +; CHECK: attributes #3 = { memory(read) } ; CHECK: attributes #4 = { noinline } ; CHECK: attributes #5 = { alwaysinline } ; CHECK: attributes #6 = { optsize } diff --git a/llvm/test/Bitcode/attributes.ll b/llvm/test/Bitcode/attributes.ll index f8d31722acf07..7d42ae9b8a073 100644 --- a/llvm/test/Bitcode/attributes.ll +++ b/llvm/test/Bitcode/attributes.ll @@ -540,8 +540,8 @@ define void @f88() skipprofile { ret void } ; CHECK: attributes #0 = { noreturn } ; CHECK: attributes #1 = { nounwind } -; CHECK: attributes #2 = { readnone } -; CHECK: attributes #3 = { readonly } +; CHECK: attributes #2 = { memory(none) } +; CHECK: attributes #3 = { memory(read) } ; CHECK: attributes #4 = { noinline } ; CHECK: attributes #5 = { alwaysinline } ; CHECK: attributes #6 = { optsize } @@ -564,13 +564,13 @@ define void @f88() skipprofile { ret void } ; CHECK: attributes #23 = { noinline optnone } ; CHECK: attributes #24 = { jumptable } ; CHECK: attributes #25 = { convergent } -; CHECK: attributes #26 = { argmemonly } +; CHECK: attributes #26 = { memory(argmem: readwrite) } ; CHECK: attributes #27 = { norecurse } -; CHECK: attributes #28 = { inaccessiblememonly } -; CHECK: attributes #29 = { inaccessiblemem_or_argmemonly } +; CHECK: attributes #28 = { memory(inaccessiblemem: readwrite) } +; CHECK: attributes #29 = { memory(argmem: readwrite, inaccessiblemem: readwrite) } ; CHECK: attributes #30 = { allocsize(0) } ; CHECK: attributes #31 = { allocsize(0,1) } -; CHECK: attributes #32 = { writeonly } +; CHECK: attributes #32 = { memory(write) } ; CHECK: attributes #33 = { speculatable } ; CHECK: attributes #34 = { sanitize_hwaddress } ; CHECK: attributes #35 = { shadowcallstack } diff --git a/llvm/test/Bitcode/compatibility-3.6.ll b/llvm/test/Bitcode/compatibility-3.6.ll index edc363fc50354..932f63d80e344 100644 --- a/llvm/test/Bitcode/compatibility-3.6.ll +++ b/llvm/test/Bitcode/compatibility-3.6.ll @@ -1168,8 +1168,8 @@ define void @intrinsics.codegen() { ; CHECK: attributes #15 = { nounwind } ; CHECK: attributes #16 = { noinline optnone } ; CHECK: attributes #17 = { optsize } -; CHECK: attributes #18 = { readnone } -; CHECK: attributes #19 = { readonly } +; CHECK: attributes #18 = { memory(none) } +; CHECK: attributes #19 = { memory(read) } ; CHECK: attributes #20 = { returns_twice } ; CHECK: attributes #21 = { sanitize_address } ; CHECK: attributes #22 = { sanitize_memory } @@ -1179,12 +1179,12 @@ define void @intrinsics.codegen() { ; CHECK: attributes #26 = { sspstrong } ; CHECK: attributes #27 = { uwtable } ; CHECK: attributes #28 = { "cpu"="cortex-a8" } -; CHECK: attributes #29 = { nocallback nofree nosync nounwind readnone willreturn } +; CHECK: attributes #29 = { nocallback nofree nosync nounwind willreturn memory(none) } ; CHECK: attributes #30 = { nocallback nofree nosync nounwind willreturn } -; CHECK: attributes #31 = { argmemonly nounwind readonly } -; CHECK: attributes #32 = { argmemonly nounwind } -; CHECK: attributes #33 = { nounwind readonly } -; CHECK: attributes #34 = { inaccessiblemem_or_argmemonly nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #31 = { nounwind memory(argmem: read) } +; CHECK: attributes #32 = { nounwind memory(argmem: readwrite) } +; CHECK: attributes #33 = { nounwind memory(read) } +; CHECK: attributes #34 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } ; CHECK: attributes #35 = { builtin } ;; Metadata diff --git a/llvm/test/Bitcode/compatibility-3.7.ll b/llvm/test/Bitcode/compatibility-3.7.ll index 81b0551e7045f..f32ae0a4668d5 100644 --- a/llvm/test/Bitcode/compatibility-3.7.ll +++ b/llvm/test/Bitcode/compatibility-3.7.ll @@ -1229,8 +1229,8 @@ define void @misc.metadata() { ; CHECK: attributes #16 = { nounwind } ; CHECK: attributes #17 = { noinline optnone } ; CHECK: attributes #18 = { optsize } -; CHECK: attributes #19 = { readnone } -; CHECK: attributes #20 = { readonly } +; CHECK: attributes #19 = { memory(none) } +; CHECK: attributes #20 = { memory(read) } ; CHECK: attributes #21 = { returns_twice } ; CHECK: attributes #22 = { safestack } ; CHECK: attributes #23 = { sanitize_address } @@ -1242,12 +1242,12 @@ define void @misc.metadata() { ; CHECK: attributes #29 = { "thunk" } ; CHECK: attributes #30 = { uwtable } ; CHECK: attributes #31 = { "cpu"="cortex-a8" } -; CHECK: attributes #32 = { nocallback nofree nosync nounwind readnone willreturn } +; CHECK: attributes #32 = { nocallback nofree nosync nounwind willreturn memory(none) } ; CHECK: attributes #33 = { nocallback nofree nosync nounwind willreturn } -; CHECK: attributes #34 = { argmemonly nounwind readonly } -; CHECK: attributes #35 = { argmemonly nounwind } -; CHECK: attributes #36 = { nounwind readonly } -; CHECK: attributes #37 = { inaccessiblemem_or_argmemonly nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #34 = { nounwind memory(argmem: read) } +; CHECK: attributes #35 = { nounwind memory(argmem: readwrite) } +; CHECK: attributes #36 = { nounwind memory(read) } +; CHECK: attributes #37 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } ; CHECK: attributes #38 = { builtin } ;; Metadata diff --git a/llvm/test/Bitcode/compatibility-3.8.ll b/llvm/test/Bitcode/compatibility-3.8.ll index 3cab693081fe6..8e9a1a193e299 100644 --- a/llvm/test/Bitcode/compatibility-3.8.ll +++ b/llvm/test/Bitcode/compatibility-3.8.ll @@ -1536,8 +1536,8 @@ normal: ; CHECK: attributes #16 = { nounwind } ; CHECK: attributes #17 = { noinline optnone } ; CHECK: attributes #18 = { optsize } -; CHECK: attributes #19 = { readnone } -; CHECK: attributes #20 = { readonly } +; CHECK: attributes #19 = { memory(none) } +; CHECK: attributes #20 = { memory(read) } ; CHECK: attributes #21 = { returns_twice } ; CHECK: attributes #22 = { safestack } ; CHECK: attributes #23 = { sanitize_address } @@ -1550,14 +1550,14 @@ normal: ; CHECK: attributes #30 = { uwtable } ; CHECK: attributes #31 = { "cpu"="cortex-a8" } ; CHECK: attributes #32 = { norecurse } -; CHECK: attributes #33 = { inaccessiblememonly } -; CHECK: attributes #34 = { inaccessiblemem_or_argmemonly } -; CHECK: attributes #35 = { nocallback nofree nosync nounwind readnone willreturn } +; CHECK: attributes #33 = { memory(inaccessiblemem: readwrite) } +; CHECK: attributes #34 = { memory(argmem: readwrite, inaccessiblemem: readwrite) } +; CHECK: attributes #35 = { nocallback nofree nosync nounwind willreturn memory(none) } ; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn } -; CHECK: attributes #37 = { argmemonly nounwind readonly } -; CHECK: attributes #38 = { argmemonly nounwind } -; CHECK: attributes #39 = { nounwind readonly } -; CHECK: attributes #40 = { inaccessiblemem_or_argmemonly nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #37 = { nounwind memory(argmem: read) } +; CHECK: attributes #38 = { nounwind memory(argmem: readwrite) } +; CHECK: attributes #39 = { nounwind memory(read) } +; CHECK: attributes #40 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } ; CHECK: attributes #41 = { builtin } ;; Metadata diff --git a/llvm/test/Bitcode/compatibility-3.9.ll b/llvm/test/Bitcode/compatibility-3.9.ll index 0411f5decca7f..ebb50e40d2fe7 100644 --- a/llvm/test/Bitcode/compatibility-3.9.ll +++ b/llvm/test/Bitcode/compatibility-3.9.ll @@ -1609,8 +1609,8 @@ declare void @f.writeonly() writeonly ; CHECK: attributes #16 = { nounwind } ; CHECK: attributes #17 = { noinline optnone } ; CHECK: attributes #18 = { optsize } -; CHECK: attributes #19 = { readnone } -; CHECK: attributes #20 = { readonly } +; CHECK: attributes #19 = { memory(none) } +; CHECK: attributes #20 = { memory(read) } ; CHECK: attributes #21 = { returns_twice } ; CHECK: attributes #22 = { safestack } ; CHECK: attributes #23 = { sanitize_address } @@ -1623,15 +1623,15 @@ declare void @f.writeonly() writeonly ; CHECK: attributes #30 = { uwtable } ; CHECK: attributes #31 = { "cpu"="cortex-a8" } ; CHECK: attributes #32 = { norecurse } -; CHECK: attributes #33 = { inaccessiblememonly } -; CHECK: attributes #34 = { inaccessiblemem_or_argmemonly } -; CHECK: attributes #35 = { nocallback nofree nosync nounwind readnone willreturn } +; CHECK: attributes #33 = { memory(inaccessiblemem: readwrite) } +; CHECK: attributes #34 = { memory(argmem: readwrite, inaccessiblemem: readwrite) } +; CHECK: attributes #35 = { nocallback nofree nosync nounwind willreturn memory(none) } ; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn } -; CHECK: attributes #37 = { argmemonly nounwind readonly } -; CHECK: attributes #38 = { argmemonly nounwind } -; CHECK: attributes #39 = { nounwind readonly } -; CHECK: attributes #40 = { writeonly } -; CHECK: attributes #41 = { inaccessiblemem_or_argmemonly nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #37 = { nounwind memory(argmem: read) } +; CHECK: attributes #38 = { nounwind memory(argmem: readwrite) } +; CHECK: attributes #39 = { nounwind memory(read) } +; CHECK: attributes #40 = { memory(write) } +; CHECK: attributes #41 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } ; CHECK: attributes #42 = { builtin } ;; Metadata diff --git a/llvm/test/Bitcode/compatibility-4.0.ll b/llvm/test/Bitcode/compatibility-4.0.ll index 4ce6a436c9c49..3c3f943e7e8a4 100644 --- a/llvm/test/Bitcode/compatibility-4.0.ll +++ b/llvm/test/Bitcode/compatibility-4.0.ll @@ -1634,8 +1634,8 @@ define i8** @constexpr() { ; CHECK: attributes #16 = { nounwind } ; CHECK: attributes #17 = { noinline optnone } ; CHECK: attributes #18 = { optsize } -; CHECK: attributes #19 = { readnone } -; CHECK: attributes #20 = { readonly } +; CHECK: attributes #19 = { memory(none) } +; CHECK: attributes #20 = { memory(read) } ; CHECK: attributes #21 = { returns_twice } ; CHECK: attributes #22 = { safestack } ; CHECK: attributes #23 = { sanitize_address } @@ -1648,15 +1648,15 @@ define i8** @constexpr() { ; CHECK: attributes #30 = { uwtable } ; CHECK: attributes #31 = { "cpu"="cortex-a8" } ; CHECK: attributes #32 = { norecurse } -; CHECK: attributes #33 = { inaccessiblememonly } -; CHECK: attributes #34 = { inaccessiblemem_or_argmemonly } -; CHECK: attributes #35 = { nocallback nofree nosync nounwind readnone willreturn } +; CHECK: attributes #33 = { memory(inaccessiblemem: readwrite) } +; CHECK: attributes #34 = { memory(argmem: readwrite, inaccessiblemem: readwrite) } +; CHECK: attributes #35 = { nocallback nofree nosync nounwind willreturn memory(none) } ; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn } -; CHECK: attributes #37 = { argmemonly nounwind readonly } -; CHECK: attributes #38 = { argmemonly nounwind } -; CHECK: attributes #39 = { nounwind readonly } -; CHECK: attributes #40 = { writeonly } -; CHECK: attributes #41 = { inaccessiblemem_or_argmemonly nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #37 = { nounwind memory(argmem: read) } +; CHECK: attributes #38 = { nounwind memory(argmem: readwrite) } +; CHECK: attributes #39 = { nounwind memory(read) } +; CHECK: attributes #40 = { memory(write) } +; CHECK: attributes #41 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } ; CHECK: attributes #42 = { builtin } ;; Metadata diff --git a/llvm/test/Bitcode/compatibility-5.0.ll b/llvm/test/Bitcode/compatibility-5.0.ll index 4a6377dd718d8..cc7e6fe0ea897 100644 --- a/llvm/test/Bitcode/compatibility-5.0.ll +++ b/llvm/test/Bitcode/compatibility-5.0.ll @@ -1649,8 +1649,8 @@ define i8** @constexpr() { ; CHECK: attributes #16 = { nounwind } ; CHECK: attributes #17 = { noinline optnone } ; CHECK: attributes #18 = { optsize } -; CHECK: attributes #19 = { readnone } -; CHECK: attributes #20 = { readonly } +; CHECK: attributes #19 = { memory(none) } +; CHECK: attributes #20 = { memory(read) } ; CHECK: attributes #21 = { returns_twice } ; CHECK: attributes #22 = { safestack } ; CHECK: attributes #23 = { sanitize_address } @@ -1663,16 +1663,16 @@ define i8** @constexpr() { ; CHECK: attributes #30 = { uwtable } ; CHECK: attributes #31 = { "cpu"="cortex-a8" } ; CHECK: attributes #32 = { norecurse } -; CHECK: attributes #33 = { inaccessiblememonly } -; CHECK: attributes #34 = { inaccessiblemem_or_argmemonly } -; CHECK: attributes #35 = { nocallback nofree nosync nounwind readnone willreturn } -; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn } -; CHECK: attributes #37 = { argmemonly nounwind readonly } -; CHECK: attributes #38 = { argmemonly nounwind } -; CHECK: attributes #39 = { nounwind readonly } -; CHECK: attributes #40 = { writeonly } +; CHECK: attributes #33 = { memory(inaccessiblemem: readwrite) } +; CHECK: attributes #34 = { memory(argmem: readwrite, inaccessiblemem: readwrite) } +; CHECK: attributes #35 = { nocallback nofree nosync nounwind willreturn memory(none) } +; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #37 = { nounwind memory(argmem: read) } +; CHECK: attributes #38 = { nounwind memory(argmem: readwrite) } +; CHECK: attributes #39 = { nounwind memory(read) } +; CHECK: attributes #40 = { memory(write) } ; CHECK: attributes #41 = { speculatable } -; CHECK: attributes #42 = { inaccessiblemem_or_argmemonly nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #42 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } ; CHECK: attributes #43 = { builtin } ;; Metadata diff --git a/llvm/test/Bitcode/compatibility-6.0.ll b/llvm/test/Bitcode/compatibility-6.0.ll index 17ad8db81a0ab..125cfac5e9b54 100644 --- a/llvm/test/Bitcode/compatibility-6.0.ll +++ b/llvm/test/Bitcode/compatibility-6.0.ll @@ -1660,8 +1660,8 @@ define i8** @constexpr() { ; CHECK: attributes #16 = { nounwind } ; CHECK: attributes #17 = { noinline optnone } ; CHECK: attributes #18 = { optsize } -; CHECK: attributes #19 = { readnone } -; CHECK: attributes #20 = { readonly } +; CHECK: attributes #19 = { memory(none) } +; CHECK: attributes #20 = { memory(read) } ; CHECK: attributes #21 = { returns_twice } ; CHECK: attributes #22 = { safestack } ; CHECK: attributes #23 = { sanitize_address } @@ -1674,16 +1674,16 @@ define i8** @constexpr() { ; CHECK: attributes #30 = { uwtable } ; CHECK: attributes #31 = { "cpu"="cortex-a8" } ; CHECK: attributes #32 = { norecurse } -; CHECK: attributes #33 = { inaccessiblememonly } -; CHECK: attributes #34 = { inaccessiblemem_or_argmemonly } -; CHECK: attributes #35 = { nocallback nofree nosync nounwind readnone willreturn } +; CHECK: attributes #33 = { memory(inaccessiblemem: readwrite) } +; CHECK: attributes #34 = { memory(argmem: readwrite, inaccessiblemem: readwrite) } +; CHECK: attributes #35 = { nocallback nofree nosync nounwind willreturn memory(none) } ; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn } -; CHECK: attributes #37 = { argmemonly nounwind readonly } -; CHECK: attributes #38 = { argmemonly nounwind } -; CHECK: attributes #39 = { nounwind readonly } -; CHECK: attributes #40 = { writeonly } +; CHECK: attributes #37 = { nounwind memory(argmem: read) } +; CHECK: attributes #38 = { nounwind memory(argmem: readwrite) } +; CHECK: attributes #39 = { nounwind memory(read) } +; CHECK: attributes #40 = { memory(write) } ; CHECK: attributes #41 = { speculatable } -; CHECK: attributes #42 = { inaccessiblemem_or_argmemonly nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #42 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } ; CHECK: attributes #43 = { builtin } ;; Metadata diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll index ac97f9d79e7ad..60c2e375f4c22 100644 --- a/llvm/test/Bitcode/compatibility.ll +++ b/llvm/test/Bitcode/compatibility.ll @@ -1985,8 +1985,8 @@ declare void @f.allockind() allockind("alloc,uninitialized") ; CHECK: attributes #16 = { nounwind } ; CHECK: attributes #17 = { noinline optnone } ; CHECK: attributes #18 = { optsize } -; CHECK: attributes #19 = { readnone } -; CHECK: attributes #20 = { readonly } +; CHECK: attributes #19 = { memory(none) } +; CHECK: attributes #20 = { memory(read) } ; CHECK: attributes #21 = { returns_twice } ; CHECK: attributes #22 = { safestack } ; CHECK: attributes #23 = { sanitize_address } @@ -1999,15 +1999,15 @@ declare void @f.allockind() allockind("alloc,uninitialized") ; CHECK: attributes #30 = { uwtable } ; CHECK: attributes #31 = { "cpu"="cortex-a8" } ; CHECK: attributes #32 = { norecurse } -; CHECK: attributes #33 = { inaccessiblememonly } -; CHECK: attributes #34 = { inaccessiblemem_or_argmemonly } -; CHECK: attributes #35 = { nocallback nofree nosync nounwind readnone willreturn } +; CHECK: attributes #33 = { memory(inaccessiblemem: readwrite) } +; CHECK: attributes #34 = { memory(argmem: readwrite, inaccessiblemem: readwrite) } +; CHECK: attributes #35 = { nocallback nofree nosync nounwind willreturn memory(none) } ; CHECK: attributes #36 = { nocallback nofree nosync nounwind willreturn } -; CHECK: attributes #37 = { argmemonly nounwind readonly } -; CHECK: attributes #38 = { argmemonly nounwind } -; CHECK: attributes #39 = { nounwind readonly } -; CHECK: attributes #40 = { inaccessiblemem_or_argmemonly nocallback nofree nosync nounwind willreturn } -; CHECK: attributes #41 = { writeonly } +; CHECK: attributes #37 = { nounwind memory(argmem: read) } +; CHECK: attributes #38 = { nounwind memory(argmem: readwrite) } +; CHECK: attributes #39 = { nounwind memory(read) } +; CHECK: attributes #40 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) } +; CHECK: attributes #41 = { memory(write) } ; CHECK: attributes #42 = { speculatable } ; CHECK: attributes #43 = { strictfp } ; CHECK: attributes #44 = { nosanitize_coverage } diff --git a/llvm/test/Bitcode/ptest-new.ll b/llvm/test/Bitcode/ptest-new.ll index 68d53ff3385b8..952ea795d62fb 100644 --- a/llvm/test/Bitcode/ptest-new.ll +++ b/llvm/test/Bitcode/ptest-new.ll @@ -23,4 +23,4 @@ declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone ; CHECK: attributes #0 = { nounwind } -; CHECK: attributes #1 = { nocallback nofree nosync nounwind readnone willreturn } +; CHECK: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) } diff --git a/llvm/test/Bitcode/ptest-old.ll b/llvm/test/Bitcode/ptest-old.ll index a41afc0c39bf8..b09fac9c397b1 100644 --- a/llvm/test/Bitcode/ptest-old.ll +++ b/llvm/test/Bitcode/ptest-old.ll @@ -24,4 +24,4 @@ declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone ; CHECK: attributes #0 = { nounwind } -; CHECK: attributes #1 = { nocallback nofree nosync nounwind readnone willreturn } +; CHECK: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) } diff --git a/llvm/test/Bitcode/upgrade-frame-pointer.ll b/llvm/test/Bitcode/upgrade-frame-pointer.ll index 5251db9ec2db1..f723c33d644a8 100644 --- a/llvm/test/Bitcode/upgrade-frame-pointer.ll +++ b/llvm/test/Bitcode/upgrade-frame-pointer.ll @@ -27,7 +27,7 @@ attributes #1 = { readnone "no-frame-pointer-elim"="false" "no-frame-pointer-eli ;; Other attributes (e.g. readnone) are unaffected. ; CHECK: attributes #0 = { "frame-pointer"="all" } -; CHECK: attributes #1 = { readnone "frame-pointer"="all" } +; CHECK: attributes #1 = { memory(none) "frame-pointer"="all" } ; CHECK: attributes #2 = { "frame-pointer"="non-leaf" } -; CHECK: attributes #3 = { readnone "frame-pointer"="non-leaf" } +; CHECK: attributes #3 = { memory(none) "frame-pointer"="non-leaf" } ; CHECK: attributes #4 = { "frame-pointer"="none" } diff --git a/llvm/test/Bitcode/upgrade-invariant-group-barrier.ll b/llvm/test/Bitcode/upgrade-invariant-group-barrier.ll index 3b4e9aed3b00b..b9e3c4fb5d6bb 100644 --- a/llvm/test/Bitcode/upgrade-invariant-group-barrier.ll +++ b/llvm/test/Bitcode/upgrade-invariant-group-barrier.ll @@ -13,9 +13,9 @@ define void @test(i8* %p1, i16* %p16) { ret void } -; CHECK: Function Attrs: inaccessiblememonly nocallback nofree nosync nounwind speculatable willreturn +; CHECK: Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(inaccessiblemem: readwrite) ; CHECK: declare i8* @llvm.launder.invariant.group.p0i8(i8*) -; CHECK: Function Attrs: inaccessiblememonly nocallback nofree nosync nounwind speculatable willreturn +; CHECK: Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(inaccessiblemem: readwrite) ; CHECK: declare i16* @llvm.launder.invariant.group.p0i16(i16*) declare i8* @llvm.invariant.group.barrier(i8*) declare i8* @llvm.invariant.group.barrier.p0i8(i8*) diff --git a/llvm/test/CodeGen/AArch64/aarch64-v1f32-arg.ll b/llvm/test/CodeGen/AArch64/aarch64-v1f32-arg.ll new file mode 100644 index 0000000000000..1677a7b5d013d --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-v1f32-arg.ll @@ -0,0 +1,11 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64 | FileCheck %s + +define <1 x float> @f(<16 x i64> %0, <1 x float> %1) { +; CHECK-LABEL: f: +; CHECK: // %bb.0: // %BB +; CHECK-NEXT: ldr d0, [sp] +; CHECK-NEXT: ret +BB: + ret <1 x float> %1 +} diff --git a/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll b/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll index 86ef69ff4e936..cf72e4b1fce9b 100644 --- a/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll +++ b/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll @@ -964,9 +964,9 @@ entry: define i16 @test_ignored_rightbits(i32 %dst, i32 %in) { ; LLC-LABEL: test_ignored_rightbits: ; LLC: // %bb.0: -; LLC-NEXT: and w0, w0, #0x7 -; LLC-NEXT: bfi w0, w1, #3, #4 -; LLC-NEXT: bfi w0, w0, #8, #7 +; LLC-NEXT: and w8, w0, #0x7 +; LLC-NEXT: bfi w8, w1, #3, #4 +; LLC-NEXT: orr w0, w8, w8, lsl #8 ; LLC-NEXT: ret ; OPT-LABEL: @test_ignored_rightbits( ; OPT-NEXT: [[POSITIONED_FIELD:%.*]] = shl i32 [[IN:%.*]], 3 @@ -1000,8 +1000,8 @@ define void @sameOperandBFI(i64 %src, i64 %src2, i16 *%ptr) { ; LLC-NEXT: lsr x8, x0, #47 ; LLC-NEXT: and w9, w1, #0x3 ; LLC-NEXT: bfi w9, w8, #2, #2 -; LLC-NEXT: bfi w9, w9, #4, #4 -; LLC-NEXT: strh w9, [x2] +; LLC-NEXT: orr w8, w9, w9, lsl #4 +; LLC-NEXT: strh w8, [x2] ; LLC-NEXT: .LBB30_2: // %end ; LLC-NEXT: ret ; OPT-LABEL: @sameOperandBFI( diff --git a/llvm/test/CodeGen/AArch64/arm64-non-pow2-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-non-pow2-ldst.ll index eca81e58004cb..5a44550cc172a 100644 --- a/llvm/test/CodeGen/AArch64/arm64-non-pow2-ldst.ll +++ b/llvm/test/CodeGen/AArch64/arm64-non-pow2-ldst.ll @@ -5,8 +5,8 @@ define i24 @ldi24(ptr %p) nounwind { ; CHECK-LABEL: ldi24: ; CHECK: // %bb.0: ; CHECK-NEXT: ldrb w8, [x0, #2] -; CHECK-NEXT: ldrh w0, [x0] -; CHECK-NEXT: bfi w0, w8, #16, #16 +; CHECK-NEXT: ldrh w9, [x0] +; CHECK-NEXT: orr w0, w9, w8, lsl #16 ; CHECK-NEXT: ret %r = load i24, i24* %p ret i24 %r @@ -17,9 +17,9 @@ define i56 @ldi56(ptr %p) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldrb w8, [x0, #6] ; CHECK-NEXT: ldrh w9, [x0, #4] -; CHECK-NEXT: ldr w0, [x0] -; CHECK-NEXT: bfi w9, w8, #16, #16 -; CHECK-NEXT: bfi x0, x9, #32, #32 +; CHECK-NEXT: ldr w10, [x0] +; CHECK-NEXT: orr w8, w9, w8, lsl #16 +; CHECK-NEXT: orr x0, x10, x8, lsl #32 ; CHECK-NEXT: ret %r = load i56, i56* %p ret i56 %r @@ -41,10 +41,10 @@ define i120 @ldi120(ptr %p) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldrb w8, [x0, #14] ; CHECK-NEXT: ldrh w9, [x0, #12] -; CHECK-NEXT: ldr w1, [x0, #8] +; CHECK-NEXT: ldr w10, [x0, #8] ; CHECK-NEXT: ldr x0, [x0] -; CHECK-NEXT: bfi w9, w8, #16, #16 -; CHECK-NEXT: bfi x1, x9, #32, #32 +; CHECK-NEXT: orr w8, w9, w8, lsl #16 +; CHECK-NEXT: orr x1, x10, x8, lsl #32 ; CHECK-NEXT: ret %r = load i120, i120* %p ret i120 %r @@ -55,10 +55,10 @@ define i280 @ldi280(ptr %p) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldp x8, x1, [x0] ; CHECK-NEXT: ldrb w9, [x0, #34] -; CHECK-NEXT: ldrh w4, [x0, #32] +; CHECK-NEXT: ldrh w10, [x0, #32] ; CHECK-NEXT: ldp x2, x3, [x0, #16] ; CHECK-NEXT: mov x0, x8 -; CHECK-NEXT: bfi x4, x9, #16, #8 +; CHECK-NEXT: orr x4, x10, x9, lsl #16 ; CHECK-NEXT: ret %r = load i280, i280* %p ret i280 %r @@ -133,7 +133,7 @@ define void @i56_or(ptr %a) { ; CHECK-NEXT: ldrh w10, [x8, #4]! ; CHECK-NEXT: ldrb w11, [x8, #2] ; CHECK-NEXT: orr w9, w9, #0x180 -; CHECK-NEXT: bfi w10, w11, #16, #16 +; CHECK-NEXT: orr w10, w10, w11, lsl #16 ; CHECK-NEXT: str w9, [x0] ; CHECK-NEXT: strb w11, [x8, #2] ; CHECK-NEXT: strh w10, [x8] @@ -153,7 +153,7 @@ define void @i56_and_or(ptr %a) { ; CHECK-NEXT: ldrb w11, [x8, #2] ; CHECK-NEXT: orr w9, w9, #0x180 ; CHECK-NEXT: and w9, w9, #0xffffff80 -; CHECK-NEXT: bfi w10, w11, #16, #16 +; CHECK-NEXT: orr w10, w10, w11, lsl #16 ; CHECK-NEXT: strb w11, [x8, #2] ; CHECK-NEXT: str w9, [x0] ; CHECK-NEXT: strh w10, [x8] @@ -172,11 +172,11 @@ define void @i56_insert_bit(ptr %a, i1 zeroext %bit) { ; CHECK-NEXT: ldr w11, [x0] ; CHECK-NEXT: ldrh w9, [x8, #4]! ; CHECK-NEXT: ldrb w10, [x8, #2] -; CHECK-NEXT: bfi w9, w10, #16, #8 +; CHECK-NEXT: orr w9, w9, w10, lsl #16 ; CHECK-NEXT: strb w10, [x8, #2] -; CHECK-NEXT: bfi x11, x9, #32, #24 -; CHECK-NEXT: strh w9, [x8] +; CHECK-NEXT: orr x11, x11, x9, lsl #32 ; CHECK-NEXT: and x11, x11, #0xffffffffffffdfff +; CHECK-NEXT: strh w9, [x8] ; CHECK-NEXT: orr w11, w11, w1, lsl #13 ; CHECK-NEXT: str w11, [x0] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-strict-align.ll b/llvm/test/CodeGen/AArch64/arm64-strict-align.ll index 28c158f7a2eb0..a7450349766fe 100644 --- a/llvm/test/CodeGen/AArch64/arm64-strict-align.ll +++ b/llvm/test/CodeGen/AArch64/arm64-strict-align.ll @@ -5,7 +5,7 @@ define i32 @f0(i32* nocapture %p) nounwind { ; CHECK-STRICT: ldrh [[HIGH:w[0-9]+]], [x0, #2] ; CHECK-STRICT: ldrh [[LOW:w[0-9]+]], [x0] -; CHECK-STRICT: bfi [[LOW]], [[HIGH]], #16, #16 +; CHECK-STRICT: orr w0, [[LOW]], [[HIGH]], lsl #16 ; CHECK-STRICT: ret ; CHECK: ldr w0, [x0] @@ -16,7 +16,7 @@ define i32 @f0(i32* nocapture %p) nounwind { define i64 @f1(i64* nocapture %p) nounwind { ; CHECK-STRICT: ldp w[[LOW:[0-9]+]], w[[HIGH:[0-9]+]], [x0] -; CHECK-STRICT: bfi x[[LOW]], x[[HIGH]], #32, #32 +; CHECK-STRICT: orr x0, x[[LOW]], x[[HIGH]], lsl #32 ; CHECK-STRICT: ret ; CHECK: ldr x0, [x0] diff --git a/llvm/test/CodeGen/AArch64/arm64_32.ll b/llvm/test/CodeGen/AArch64/arm64_32.ll index fbf12e80b6b53..0eb5b637b08f9 100644 --- a/llvm/test/CodeGen/AArch64/arm64_32.ll +++ b/llvm/test/CodeGen/AArch64/arm64_32.ll @@ -662,8 +662,9 @@ define void @test_struct_hi(i32 %hi) nounwind { ; CHECK-LABEL: test_struct_hi: ; CHECK: mov w[[IN:[0-9]+]], w0 ; CHECK: bl _get_int -; CHECK-FAST-NEXT: mov w0, w0 -; CHECK-NEXT: bfi x0, x[[IN]], #32, #32 +; CHECK-FAST-NEXT: mov w[[DST:[0-9]+]], w0 +; CHECK-FAST-NEXT: orr x0, x[[DST]], x[[IN]], lsl #32 +; CHECK-OPT-NEXT: bfi x0, x[[IN]], #32, #32 ; CHECK-NEXT: bl _take_pair %val.64 = call i64 @get_int() %val.32 = trunc i64 %val.64 to i32 diff --git a/llvm/test/CodeGen/AArch64/bfis-in-loop.ll b/llvm/test/CodeGen/AArch64/bfis-in-loop.ll index 6ee2feb2c2176..5207f2ba32d36 100644 --- a/llvm/test/CodeGen/AArch64/bfis-in-loop.ll +++ b/llvm/test/CodeGen/AArch64/bfis-in-loop.ll @@ -28,8 +28,8 @@ define i64 @bfis_in_loop_zero() { ; CHECK-NEXT: ldr x11, [x9, #8] ; CHECK-NEXT: and x9, x10, #0xff ; CHECK-NEXT: and x10, x0, #0xffffffff00000000 -; CHECK-NEXT: bfi x9, x8, #8, #32 -; CHECK-NEXT: bfi x10, x12, #16, #1 +; CHECK-NEXT: orr x9, x9, x8, lsl #8 +; CHECK-NEXT: orr x10, x10, x12, lsl #16 ; CHECK-NEXT: orr x0, x10, x9 ; CHECK-NEXT: ldr x9, [x11, #16] ; CHECK-NEXT: cbnz x11, .LBB0_1 @@ -97,8 +97,8 @@ define i64 @bfis_in_loop_undef() { ; CHECK-NEXT: ldr x11, [x9, #8] ; CHECK-NEXT: and x9, x10, #0xff ; CHECK-NEXT: and x10, x0, #0xffffffff00000000 -; CHECK-NEXT: bfi x9, x8, #8, #32 -; CHECK-NEXT: bfi x10, x12, #16, #1 +; CHECK-NEXT: orr x9, x9, x8, lsl #8 +; CHECK-NEXT: orr x10, x10, x12, lsl #16 ; CHECK-NEXT: orr x0, x10, x9 ; CHECK-NEXT: ldr x9, [x11, #16] ; CHECK-NEXT: cbnz x11, .LBB1_1 diff --git a/llvm/test/CodeGen/AArch64/bitfield-insert.ll b/llvm/test/CodeGen/AArch64/bitfield-insert.ll index a27e293ffe881..b8e69d5cfaafe 100644 --- a/llvm/test/CodeGen/AArch64/bitfield-insert.ll +++ b/llvm/test/CodeGen/AArch64/bitfield-insert.ll @@ -269,8 +269,7 @@ define i32 @test_nouseful_bits(i8 %a, i32 %b) { ; CHECK-NEXT: lsl w8, w8, #8 ; CHECK-NEXT: mov w9, w8 ; CHECK-NEXT: bfxil w9, w0, #0, #8 -; CHECK-NEXT: bfi w8, w9, #16, #16 -; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: orr w0, w8, w9, lsl #16 ; CHECK-NEXT: ret %conv = zext i8 %a to i32 ; 0 0 0 A %shl = shl i32 %b, 8 ; B2 B1 B0 0 @@ -601,3 +600,39 @@ define i64 @test_and_extended_shift_with_imm(i64 %0) { %3 = and i64 %2, 32640 ; #0x7f80 ret i64 %3 } + +; orr with left-shifted operand is better than bfi, since it improves data +; dependency, and orr has a smaller latency and higher throughput than bfm on +; some AArch64 processors (for the rest, orr is at least as good as bfm) +; +; ubfx x8, x0, #8, #7 +; and x9, x0, #0x7f +; orr x0, x9, x8, lsl #7 +define i64 @test_orr_not_bfxil_i64(i64 %0) { +; CHECK-LABEL: test_orr_not_bfxil_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ubfx x8, x0, #8, #7 +; CHECK-NEXT: and x9, x0, #0x7f +; CHECK-NEXT: orr x0, x9, x8, lsl #7 +; CHECK-NEXT: ret + %2 = and i64 %0, 127 + %3 = lshr i64 %0, 1 + %4 = and i64 %3, 16256 ; 0x3f80 + %5 = or i64 %4, %2 + ret i64 %5 +} + +; The 32-bit test for `test_orr_not_bfxil_i64`. +define i32 @test_orr_not_bfxil_i32(i32 %0) { +; CHECK-LABEL: test_orr_not_bfxil_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ubfx w8, w0, #8, #7 +; CHECK-NEXT: and w9, w0, #0x7f +; CHECK-NEXT: orr w0, w9, w8, lsl #7 +; CHECK-NEXT: ret + %2 = and i32 %0, 127 + %3 = lshr i32 %0, 1 + %4 = and i32 %3, 16256 ; 0x3f80 + %5 = or i32 %4, %2 + ret i32 %5 +} diff --git a/llvm/test/CodeGen/AArch64/build-pair-isel.ll b/llvm/test/CodeGen/AArch64/build-pair-isel.ll index c9c5098017389..970a2c69343f5 100644 --- a/llvm/test/CodeGen/AArch64/build-pair-isel.ll +++ b/llvm/test/CodeGen/AArch64/build-pair-isel.ll @@ -1,13 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64 -o - -O0 %s | FileCheck %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-ios5.0.0" ; This test checks we don't fail isel due to unhandled build_pair nodes. -; CHECK: bfi define void @compare_and_swap128() { +; CHECK-LABEL: compare_and_swap128: +; CHECK: // %bb.0: +; CHECK-NEXT: //APP +; CHECK-NEXT: nop +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: // implicit-def: $x9 +; CHECK-NEXT: mov w9, w10 +; CHECK-NEXT: mov w8, w8 +; CHECK-NEXT: // kill: def $x8 killed $w8 +; CHECK-NEXT: orr x8, x8, x9, lsl #32 +; CHECK-NEXT: // implicit-def: $x9 +; CHECK-NEXT: str x8, [x9] +; CHECK-NEXT: ret %1 = call i128 asm sideeffect "nop", "=r,~{memory}"() store i128 %1, i128* undef, align 16 ret void } - - diff --git a/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll b/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll index bb37cc81a7ab1..c4481871dec49 100644 --- a/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll +++ b/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll @@ -19,8 +19,7 @@ define i8 @rotl_i8_const_shift(i8 %x) { ; CHECK-LABEL: rotl_i8_const_shift: ; CHECK: // %bb.0: ; CHECK-NEXT: ubfx w8, w0, #5, #3 -; CHECK-NEXT: bfi w8, w0, #3, #29 -; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: orr w0, w8, w0, lsl #3 ; CHECK-NEXT: ret %f = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 3) ret i8 %f diff --git a/llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll b/llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll new file mode 100644 index 0000000000000..6d2f75b86011e --- /dev/null +++ b/llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll @@ -0,0 +1,87 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64 < %s | FileCheck %s + +; All tests are doing unordered vector comparisons on vectors larger than a +; Neon vector. + +define i1 @unordered_floating_point_compare_on_v8f32(<8 x float> %a_vec) { +; CHECK-LABEL: unordered_floating_point_compare_on_v8f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmgt v1.4s, v1.4s, #0.0 +; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: fcmgt v0.4s, v0.4s, #0.0 +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: xtn v0.8b, v0.8h +; CHECK-NEXT: umaxv b0, v0.8b +; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: bic w0, w8, w9 +; CHECK-NEXT: ret + %a_cmp = fcmp ule <8 x float> %a_vec, zeroinitializer + %cmp_result = bitcast <8 x i1> %a_cmp to i8 + %all_zero = icmp eq i8 %cmp_result, 0 + ret i1 %all_zero +} + +define i1 @unordered_floating_point_compare_on_v16f32(<16 x float> %a_vec) { +; CHECK-LABEL: unordered_floating_point_compare_on_v16f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmgt v3.4s, v3.4s, #0.0 +; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: fcmgt v2.4s, v2.4s, #0.0 +; CHECK-NEXT: fcmgt v1.4s, v1.4s, #0.0 +; CHECK-NEXT: fcmgt v0.4s, v0.4s, #0.0 +; CHECK-NEXT: mvn v3.16b, v3.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: uzp1 v2.8h, v2.8h, v3.8h +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: uzp1 v0.16b, v0.16b, v2.16b +; CHECK-NEXT: umaxv b0, v0.16b +; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: bic w0, w8, w9 +; CHECK-NEXT: ret + %a_cmp = fcmp ule <16 x float> %a_vec, zeroinitializer + %cmp_result = bitcast <16 x i1> %a_cmp to i16 + %all_zero = icmp eq i16 %cmp_result, 0 + ret i1 %all_zero +} + +define i1 @unordered_floating_point_compare_on_v32f32(<32 x float> %a_vec) { +; CHECK-LABEL: unordered_floating_point_compare_on_v32f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmgt v7.4s, v7.4s, #0.0 +; CHECK-NEXT: mov w9, #1 +; CHECK-NEXT: fcmgt v6.4s, v6.4s, #0.0 +; CHECK-NEXT: fcmgt v5.4s, v5.4s, #0.0 +; CHECK-NEXT: fcmgt v4.4s, v4.4s, #0.0 +; CHECK-NEXT: fcmgt v3.4s, v3.4s, #0.0 +; CHECK-NEXT: fcmgt v2.4s, v2.4s, #0.0 +; CHECK-NEXT: fcmgt v1.4s, v1.4s, #0.0 +; CHECK-NEXT: fcmgt v0.4s, v0.4s, #0.0 +; CHECK-NEXT: mvn v7.16b, v7.16b +; CHECK-NEXT: mvn v6.16b, v6.16b +; CHECK-NEXT: mvn v5.16b, v5.16b +; CHECK-NEXT: mvn v4.16b, v4.16b +; CHECK-NEXT: mvn v3.16b, v3.16b +; CHECK-NEXT: mvn v2.16b, v2.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: uzp1 v6.8h, v6.8h, v7.8h +; CHECK-NEXT: uzp1 v4.8h, v4.8h, v5.8h +; CHECK-NEXT: uzp1 v2.8h, v2.8h, v3.8h +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: uzp1 v1.16b, v4.16b, v6.16b +; CHECK-NEXT: uzp1 v0.16b, v0.16b, v2.16b +; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-NEXT: umaxv b0, v0.16b +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: bic w0, w9, w8 +; CHECK-NEXT: ret + %a_cmp = fcmp ule <32 x float> %a_vec, zeroinitializer + %cmp_result = bitcast <32 x i1> %a_cmp to i32 + %all_zero = icmp eq i32 %cmp_result, 0 + ret i1 %all_zero +} diff --git a/llvm/test/CodeGen/AArch64/load-combine-big-endian.ll b/llvm/test/CodeGen/AArch64/load-combine-big-endian.ll index 43e04e341b7e1..bff4f2113df3a 100644 --- a/llvm/test/CodeGen/AArch64/load-combine-big-endian.ll +++ b/llvm/test/CodeGen/AArch64/load-combine-big-endian.ll @@ -463,8 +463,8 @@ define i32 @zext_load_i32_by_i8_shl_8(i32* %arg) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldrb w8, [x0] ; CHECK-NEXT: ldrb w9, [x0, #1] -; CHECK-NEXT: lsl w0, w8, #8 -; CHECK-NEXT: bfi w0, w9, #16, #8 +; CHECK-NEXT: lsl w8, w8, #8 +; CHECK-NEXT: orr w0, w8, w9, lsl #16 ; CHECK-NEXT: ret %tmp = bitcast i32* %arg to i8* %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 @@ -486,8 +486,8 @@ define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldrb w8, [x0] ; CHECK-NEXT: ldrb w9, [x0, #1] -; CHECK-NEXT: lsl w0, w8, #16 -; CHECK-NEXT: bfi w0, w9, #24, #8 +; CHECK-NEXT: lsl w8, w8, #16 +; CHECK-NEXT: orr w0, w8, w9, lsl #24 ; CHECK-NEXT: ret %tmp = bitcast i32* %arg to i8* %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 @@ -527,8 +527,8 @@ define i32 @zext_load_i32_by_i8_bswap_shl_8(i32* %arg) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldrb w8, [x0, #1] ; CHECK-NEXT: ldrb w9, [x0] -; CHECK-NEXT: lsl w0, w8, #8 -; CHECK-NEXT: bfi w0, w9, #16, #8 +; CHECK-NEXT: lsl w8, w8, #8 +; CHECK-NEXT: orr w0, w8, w9, lsl #16 ; CHECK-NEXT: ret %tmp = bitcast i32* %arg to i8* %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 @@ -550,8 +550,8 @@ define i32 @zext_load_i32_by_i8_bswap_shl_16(i32* %arg) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldrb w8, [x0, #1] ; CHECK-NEXT: ldrb w9, [x0] -; CHECK-NEXT: lsl w0, w8, #16 -; CHECK-NEXT: bfi w0, w9, #24, #8 +; CHECK-NEXT: lsl w8, w8, #16 +; CHECK-NEXT: orr w0, w8, w9, lsl #24 ; CHECK-NEXT: ret %tmp = bitcast i32* %arg to i8* %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 @@ -576,8 +576,8 @@ define i16 @load_i16_from_nonzero_offset(i8* %p) { ; CHECK-LABEL: load_i16_from_nonzero_offset: ; CHECK: // %bb.0: ; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: ldrb w0, [x0, #2] -; CHECK-NEXT: bfi w0, w8, #8, #24 +; CHECK-NEXT: ldrb w9, [x0, #2] +; CHECK-NEXT: orr w0, w9, w8, lsl #8 ; CHECK-NEXT: ret %p1.i16 = bitcast i8* %p to i16* %p2.i8 = getelementptr i8, i8* %p, i64 2 diff --git a/llvm/test/CodeGen/AArch64/load-combine.ll b/llvm/test/CodeGen/AArch64/load-combine.ll index 293967bcec75c..de1b0f13adf0a 100644 --- a/llvm/test/CodeGen/AArch64/load-combine.ll +++ b/llvm/test/CodeGen/AArch64/load-combine.ll @@ -453,8 +453,8 @@ define i32 @zext_load_i32_by_i8_shl_8(i32* %arg) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldrb w8, [x0] ; CHECK-NEXT: ldrb w9, [x0, #1] -; CHECK-NEXT: lsl w0, w8, #8 -; CHECK-NEXT: bfi w0, w9, #16, #8 +; CHECK-NEXT: lsl w8, w8, #8 +; CHECK-NEXT: orr w0, w8, w9, lsl #16 ; CHECK-NEXT: ret %tmp = bitcast i32* %arg to i8* @@ -477,8 +477,8 @@ define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldrb w8, [x0] ; CHECK-NEXT: ldrb w9, [x0, #1] -; CHECK-NEXT: lsl w0, w8, #16 -; CHECK-NEXT: bfi w0, w9, #24, #8 +; CHECK-NEXT: lsl w8, w8, #16 +; CHECK-NEXT: orr w0, w8, w9, lsl #24 ; CHECK-NEXT: ret %tmp = bitcast i32* %arg to i8* @@ -521,8 +521,8 @@ define i32 @zext_load_i32_by_i8_bswap_shl_8(i32* %arg) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldrb w8, [x0, #1] ; CHECK-NEXT: ldrb w9, [x0] -; CHECK-NEXT: lsl w0, w8, #8 -; CHECK-NEXT: bfi w0, w9, #16, #8 +; CHECK-NEXT: lsl w8, w8, #8 +; CHECK-NEXT: orr w0, w8, w9, lsl #16 ; CHECK-NEXT: ret %tmp = bitcast i32* %arg to i8* @@ -545,8 +545,8 @@ define i32 @zext_load_i32_by_i8_bswap_shl_16(i32* %arg) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldrb w8, [x0, #1] ; CHECK-NEXT: ldrb w9, [x0] -; CHECK-NEXT: lsl w0, w8, #16 -; CHECK-NEXT: bfi w0, w9, #24, #8 +; CHECK-NEXT: lsl w8, w8, #16 +; CHECK-NEXT: orr w0, w8, w9, lsl #24 ; CHECK-NEXT: ret %tmp = bitcast i32* %arg to i8* @@ -603,7 +603,7 @@ define void @short_vector_to_i32_unused_low_i8(<4 x i8>* %in, i32* %out, i32* %p ; CHECK-NEXT: umov w10, v0.h[3] ; CHECK-NEXT: lsl w8, w8, #16 ; CHECK-NEXT: bfi w8, w9, #8, #8 -; CHECK-NEXT: bfi w8, w10, #24, #8 +; CHECK-NEXT: orr w8, w8, w10, lsl #24 ; CHECK-NEXT: str w8, [x1] ; CHECK-NEXT: ret %ld = load <4 x i8>, <4 x i8>* %in, align 4 @@ -634,8 +634,8 @@ define void @short_vector_to_i32_unused_high_i8(<4 x i8>* %in, i32* %out, i32* % ; CHECK-NEXT: ldrh w9, [x0] ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-NEXT: umov w8, v0.h[2] -; CHECK-NEXT: bfi w9, w8, #16, #8 -; CHECK-NEXT: str w9, [x1] +; CHECK-NEXT: orr w8, w9, w8, lsl #16 +; CHECK-NEXT: str w8, [x1] ; CHECK-NEXT: ret %ld = load <4 x i8>, <4 x i8>* %in, align 4 @@ -665,7 +665,7 @@ define void @short_vector_to_i32_unused_low_i16(<4 x i8>* %in, i32* %out, i32* % ; CHECK-NEXT: umov w8, v0.h[3] ; CHECK-NEXT: umov w9, v0.h[2] ; CHECK-NEXT: lsl w8, w8, #24 -; CHECK-NEXT: bfi w8, w9, #16, #8 +; CHECK-NEXT: orr w8, w8, w9, lsl #16 ; CHECK-NEXT: str w8, [x1] ; CHECK-NEXT: ret %ld = load <4 x i8>, <4 x i8>* %in, align 4 diff --git a/llvm/test/CodeGen/AArch64/logic-shift.ll b/llvm/test/CodeGen/AArch64/logic-shift.ll index 12c3e18317f88..ba63c4433a2a3 100644 --- a/llvm/test/CodeGen/AArch64/logic-shift.ll +++ b/llvm/test/CodeGen/AArch64/logic-shift.ll @@ -818,8 +818,7 @@ define i32 @or_fshr_wrong_shift(i32 %x, i32 %y) { ; CHECK: // %bb.0: ; CHECK-NEXT: orr w8, w0, w1 ; CHECK-NEXT: lsr w8, w8, #26 -; CHECK-NEXT: bfi w8, w0, #7, #25 -; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: orr w0, w8, w0, lsl #7 ; CHECK-NEXT: ret %or1 = or i32 %x, %y %sh1 = shl i32 %x, 7 diff --git a/llvm/test/CodeGen/AArch64/nontemporal-load.ll b/llvm/test/CodeGen/AArch64/nontemporal-load.ll index 288ba22e79289..f8ff50b6e4c54 100644 --- a/llvm/test/CodeGen/AArch64/nontemporal-load.ll +++ b/llvm/test/CodeGen/AArch64/nontemporal-load.ll @@ -490,27 +490,27 @@ define <4 x i65> @test_ldnp_v4i65(<4 x i65>* %A) { ; ; CHECK-BE-LABEL: test_ldnp_v4i65: ; CHECK-BE: // %bb.0: -; CHECK-BE-NEXT: ldp x9, x8, [x0, #16] -; CHECK-BE-NEXT: ldp x11, x10, [x0] -; CHECK-BE-NEXT: ldrb w7, [x0, #32] -; CHECK-BE-NEXT: lsr x13, x9, #56 -; CHECK-BE-NEXT: lsr x14, x11, #56 -; CHECK-BE-NEXT: extr x15, x10, x9, #56 -; CHECK-BE-NEXT: bfi x7, x8, #8, #56 -; CHECK-BE-NEXT: extr x8, x9, x8, #56 -; CHECK-BE-NEXT: extr x12, x11, x10, #56 -; CHECK-BE-NEXT: lsr x11, x11, #59 -; CHECK-BE-NEXT: ubfx x9, x9, #57, #1 +; CHECK-BE-NEXT: ldp x10, x9, [x0, #16] +; CHECK-BE-NEXT: ldp x12, x11, [x0] +; CHECK-BE-NEXT: ldrb w8, [x0, #32] +; CHECK-BE-NEXT: lsr x13, x10, #56 +; CHECK-BE-NEXT: lsr x14, x12, #56 +; CHECK-BE-NEXT: extr x15, x11, x10, #56 +; CHECK-BE-NEXT: orr x7, x8, x9, lsl #8 +; CHECK-BE-NEXT: extr x8, x10, x9, #56 +; CHECK-BE-NEXT: extr x9, x12, x11, #56 +; CHECK-BE-NEXT: lsr x12, x12, #59 +; CHECK-BE-NEXT: ubfx x10, x10, #57, #1 ; CHECK-BE-NEXT: extr x5, x13, x8, #1 -; CHECK-BE-NEXT: extr x1, x14, x12, #3 -; CHECK-BE-NEXT: ubfx x12, x10, #58, #1 -; CHECK-BE-NEXT: fmov d0, x11 -; CHECK-BE-NEXT: and x11, x8, #0x1 -; CHECK-BE-NEXT: lsr x10, x10, #56 -; CHECK-BE-NEXT: fmov d2, x9 -; CHECK-BE-NEXT: fmov d1, x12 -; CHECK-BE-NEXT: extr x3, x10, x15, #2 -; CHECK-BE-NEXT: fmov d3, x11 +; CHECK-BE-NEXT: extr x1, x14, x9, #3 +; CHECK-BE-NEXT: ubfx x9, x11, #58, #1 +; CHECK-BE-NEXT: fmov d0, x12 +; CHECK-BE-NEXT: and x12, x8, #0x1 +; CHECK-BE-NEXT: lsr x11, x11, #56 +; CHECK-BE-NEXT: fmov d2, x10 +; CHECK-BE-NEXT: fmov d1, x9 +; CHECK-BE-NEXT: extr x3, x11, x15, #2 +; CHECK-BE-NEXT: fmov d3, x12 ; CHECK-BE-NEXT: mov v0.d[1], x1 ; CHECK-BE-NEXT: mov v2.d[1], x5 ; CHECK-BE-NEXT: mov v1.d[1], x3 diff --git a/llvm/test/CodeGen/AArch64/rotate-extract.ll b/llvm/test/CodeGen/AArch64/rotate-extract.ll index 9a1c6a965bf7f..20008c41c42e8 100644 --- a/llvm/test/CodeGen/AArch64/rotate-extract.ll +++ b/llvm/test/CodeGen/AArch64/rotate-extract.ll @@ -113,8 +113,8 @@ define i64 @no_extract_mul(i64 %i) nounwind { ; CHECK-LABEL: no_extract_mul: ; CHECK: // %bb.0: ; CHECK-NEXT: add x8, x0, x0, lsl #3 -; CHECK-NEXT: lsr x0, x8, #57 -; CHECK-NEXT: bfi x0, x8, #8, #56 +; CHECK-NEXT: lsr x9, x8, #57 +; CHECK-NEXT: orr x0, x9, x8, lsl #8 ; CHECK-NEXT: ret %lhs_mul = mul i64 %i, 2304 %rhs_mul = mul i64 %i, 9 diff --git a/llvm/test/CodeGen/AArch64/shift-logic.ll b/llvm/test/CodeGen/AArch64/shift-logic.ll index 9a7cf004b3b74..be1ddccf901b8 100644 --- a/llvm/test/CodeGen/AArch64/shift-logic.ll +++ b/llvm/test/CodeGen/AArch64/shift-logic.ll @@ -175,3 +175,22 @@ define i64 @desirable_to_commute2(i64* %p, i64 %i) { %r = load i64, i64* %pidx ret i64 %r } + +; Shrink demanded op will shrink the shl to i32, +; Lshr and shl will have different shift amount type. +; Compare apint will cause crash when type is different. +define void @apint_type_mismatch(i16 %a, i32* %p) { +; CHECK-LABEL: apint_type_mismatch: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: and w8, w0, #0x7f8 +; CHECK-NEXT: str w8, [x1] +; CHECK-NEXT: ret +entry: + %lshr = lshr i16 %a, 3 + %and = and i16 %lshr, 255 + %zext = zext i16 %and to i64 + %shl = shl i64 %zext, 3 + %trunc = trunc i64 %shl to i32 + store i32 %trunc, i32* %p + ret void +} diff --git a/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll b/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll index 392bd243b7a4d..7b5041fc58cc9 100644 --- a/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll +++ b/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll @@ -236,65 +236,65 @@ exit: } define void @trunc_v16i64_to_v16i8_in_loop(ptr %A, ptr %dst) { -; CHECK-LABEL: trunc_v16i64_to_v16i8_in_loop: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: LBB3_1: ; %loop -; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add x9, x0, x8, lsl #7 -; CHECK-NEXT: ldp q3, q2, [x9, #96] -; CHECK-NEXT: ldp q1, q0, [x9, #32] -; CHECK-NEXT: uzp1.4s v2, v3, v2 -; CHECK-NEXT: ldp q5, q4, [x9, #64] -; CHECK-NEXT: uzp1.4s v0, v1, v0 -; CHECK-NEXT: ldp q3, q6, [x9] -; CHECK-NEXT: uzp1.4s v4, v5, v4 -; CHECK-NEXT: uzp1.8h v2, v4, v2 -; CHECK-NEXT: uzp1.4s v1, v3, v6 -; CHECK-NEXT: uzp1.8h v0, v1, v0 -; CHECK-NEXT: uzp1.16b v0, v0, v2 -; CHECK-NEXT: str q0, [x1, x8, lsl #4] -; CHECK-NEXT: add x8, x8, #1 -; CHECK-NEXT: cmp x8, #1000 -; CHECK-NEXT: b.eq LBB3_1 -; CHECK-NEXT: ; %bb.2: ; %exit -; CHECK-NEXT: ret - -; CHECK-BE-LABEL: trunc_v16i64_to_v16i8_in_loop: -; CHECK-BE: // %bb.0: // %entry -; CHECK-BE-NEXT: mov x8, xzr -; CHECK-BE-NEXT: .LBB3_1: // %loop -; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-BE-NEXT: add x9, x0, x8, lsl #7 -; CHECK-BE-NEXT: add x10, x9, #48 -; CHECK-BE-NEXT: add x11, x9, #32 -; CHECK-BE-NEXT: ld1 { v5.2d }, [x9] -; CHECK-BE-NEXT: ld1 { v0.2d }, [x10] -; CHECK-BE-NEXT: add x10, x9, #80 -; CHECK-BE-NEXT: ld1 { v1.2d }, [x11] -; CHECK-BE-NEXT: add x11, x9, #112 -; CHECK-BE-NEXT: ld1 { v2.2d }, [x10] -; CHECK-BE-NEXT: add x10, x9, #96 -; CHECK-BE-NEXT: ld1 { v3.2d }, [x11] -; CHECK-BE-NEXT: uzp1 v0.4s, v1.4s, v0.4s -; CHECK-BE-NEXT: ld1 { v4.2d }, [x10] -; CHECK-BE-NEXT: add x10, x9, #64 -; CHECK-BE-NEXT: add x9, x9, #16 -; CHECK-BE-NEXT: ld1 { v6.2d }, [x10] -; CHECK-BE-NEXT: ld1 { v7.2d }, [x9] -; CHECK-BE-NEXT: add x9, x1, x8, lsl #4 -; CHECK-BE-NEXT: uzp1 v3.4s, v4.4s, v3.4s -; CHECK-BE-NEXT: add x8, x8, #1 -; CHECK-BE-NEXT: cmp x8, #1000 -; CHECK-BE-NEXT: uzp1 v2.4s, v6.4s, v2.4s -; CHECK-BE-NEXT: uzp1 v1.4s, v5.4s, v7.4s -; CHECK-BE-NEXT: uzp1 v2.8h, v2.8h, v3.8h -; CHECK-BE-NEXT: uzp1 v0.8h, v1.8h, v0.8h -; CHECK-BE-NEXT: uzp1 v0.16b, v0.16b, v2.16b -; CHECK-BE-NEXT: st1 { v0.16b }, [x9] -; CHECK-BE-NEXT: b.eq .LBB3_1 -; CHECK-BE-NEXT: // %bb.2: // %exit -; CHECK-BE-NEXT: ret +; CHECK-LABEL: trunc_v16i64_to_v16i8_in_loop: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: LBB3_1: ; %loop +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: add x9, x0, x8, lsl #7 +; CHECK-NEXT: ldp q3, q2, [x9, #96] +; CHECK-NEXT: ldp q1, q0, [x9, #32] +; CHECK-NEXT: uzp1.4s v2, v3, v2 +; CHECK-NEXT: ldp q5, q4, [x9, #64] +; CHECK-NEXT: uzp1.4s v0, v1, v0 +; CHECK-NEXT: ldp q3, q6, [x9] +; CHECK-NEXT: uzp1.4s v4, v5, v4 +; CHECK-NEXT: uzp1.8h v2, v4, v2 +; CHECK-NEXT: uzp1.4s v1, v3, v6 +; CHECK-NEXT: uzp1.8h v0, v1, v0 +; CHECK-NEXT: uzp1.16b v0, v0, v2 +; CHECK-NEXT: str q0, [x1, x8, lsl #4] +; CHECK-NEXT: add x8, x8, #1 +; CHECK-NEXT: cmp x8, #1000 +; CHECK-NEXT: b.eq LBB3_1 +; CHECK-NEXT: ; %bb.2: ; %exit +; CHECK-NEXT: ret +; +; CHECK-BE-LABEL: trunc_v16i64_to_v16i8_in_loop: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: mov x8, xzr +; CHECK-BE-NEXT: .LBB3_1: // %loop +; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-BE-NEXT: add x9, x0, x8, lsl #7 +; CHECK-BE-NEXT: add x10, x9, #48 +; CHECK-BE-NEXT: add x11, x9, #32 +; CHECK-BE-NEXT: ld1 { v5.2d }, [x9] +; CHECK-BE-NEXT: ld1 { v0.2d }, [x10] +; CHECK-BE-NEXT: add x10, x9, #80 +; CHECK-BE-NEXT: ld1 { v1.2d }, [x11] +; CHECK-BE-NEXT: add x11, x9, #112 +; CHECK-BE-NEXT: ld1 { v2.2d }, [x10] +; CHECK-BE-NEXT: add x10, x9, #96 +; CHECK-BE-NEXT: ld1 { v3.2d }, [x11] +; CHECK-BE-NEXT: uzp1 v0.4s, v1.4s, v0.4s +; CHECK-BE-NEXT: ld1 { v4.2d }, [x10] +; CHECK-BE-NEXT: add x10, x9, #64 +; CHECK-BE-NEXT: add x9, x9, #16 +; CHECK-BE-NEXT: ld1 { v6.2d }, [x10] +; CHECK-BE-NEXT: ld1 { v7.2d }, [x9] +; CHECK-BE-NEXT: add x9, x1, x8, lsl #4 +; CHECK-BE-NEXT: uzp1 v3.4s, v4.4s, v3.4s +; CHECK-BE-NEXT: add x8, x8, #1 +; CHECK-BE-NEXT: cmp x8, #1000 +; CHECK-BE-NEXT: uzp1 v2.4s, v6.4s, v2.4s +; CHECK-BE-NEXT: uzp1 v1.4s, v5.4s, v7.4s +; CHECK-BE-NEXT: uzp1 v2.8h, v2.8h, v3.8h +; CHECK-BE-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-BE-NEXT: uzp1 v0.16b, v0.16b, v2.16b +; CHECK-BE-NEXT: st1 { v0.16b }, [x9] +; CHECK-BE-NEXT: b.eq .LBB3_1 +; CHECK-BE-NEXT: // %bb.2: // %exit +; CHECK-BE-NEXT: ret entry: br label %loop @@ -315,49 +315,49 @@ exit: } define void @trunc_v8i64_to_v8i8_in_loop(ptr %A, ptr %dst) { -; CHECK-LABEL: trunc_v8i64_to_v8i8_in_loop: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: LBB4_1: ; %loop -; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add x9, x0, x8, lsl #6 -; CHECK-NEXT: ldp q1, q0, [x9, #32] -; CHECK-NEXT: ldp q3, q2, [x9] -; CHECK-NEXT: uzp1.4s v0, v1, v0 -; CHECK-NEXT: uzp1.4s v1, v3, v2 -; CHECK-NEXT: uzp1.8h v0, v1, v0 -; CHECK-NEXT: xtn.8b v0, v0 -; CHECK-NEXT: str d0, [x1, x8, lsl #3] -; CHECK-NEXT: add x8, x8, #1 -; CHECK-NEXT: cmp x8, #1000 -; CHECK-NEXT: b.eq LBB4_1 -; CHECK-NEXT: ; %bb.2: ; %exit -; CHECK-NEXT: ret - -; CHECK-BE-LABEL: trunc_v8i64_to_v8i8_in_loop: -; CHECK-BE: // %bb.0: // %entry -; CHECK-BE-NEXT: mov x8, xzr -; CHECK-BE-NEXT: .LBB4_1: // %loop -; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-BE-NEXT: add x9, x0, x8, lsl #6 -; CHECK-BE-NEXT: add x10, x9, #48 -; CHECK-BE-NEXT: ld1 { v1.2d }, [x9] -; CHECK-BE-NEXT: ld1 { v0.2d }, [x10] -; CHECK-BE-NEXT: add x10, x9, #32 -; CHECK-BE-NEXT: add x9, x9, #16 -; CHECK-BE-NEXT: ld1 { v2.2d }, [x10] -; CHECK-BE-NEXT: ld1 { v3.2d }, [x9] -; CHECK-BE-NEXT: add x9, x1, x8, lsl #3 -; CHECK-BE-NEXT: add x8, x8, #1 -; CHECK-BE-NEXT: cmp x8, #1000 -; CHECK-BE-NEXT: uzp1 v0.4s, v2.4s, v0.4s -; CHECK-BE-NEXT: uzp1 v1.4s, v1.4s, v3.4s -; CHECK-BE-NEXT: uzp1 v0.8h, v1.8h, v0.8h -; CHECK-BE-NEXT: xtn v0.8b, v0.8h -; CHECK-BE-NEXT: st1 { v0.8b }, [x9] -; CHECK-BE-NEXT: b.eq .LBB4_1 -; CHECK-BE-NEXT: // %bb.2: // %exit -; CHECK-BE-NEXT: ret +; CHECK-LABEL: trunc_v8i64_to_v8i8_in_loop: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: LBB4_1: ; %loop +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: add x9, x0, x8, lsl #6 +; CHECK-NEXT: ldp q1, q0, [x9, #32] +; CHECK-NEXT: ldp q3, q2, [x9] +; CHECK-NEXT: uzp1.4s v0, v1, v0 +; CHECK-NEXT: uzp1.4s v1, v3, v2 +; CHECK-NEXT: uzp1.8h v0, v1, v0 +; CHECK-NEXT: xtn.8b v0, v0 +; CHECK-NEXT: str d0, [x1, x8, lsl #3] +; CHECK-NEXT: add x8, x8, #1 +; CHECK-NEXT: cmp x8, #1000 +; CHECK-NEXT: b.eq LBB4_1 +; CHECK-NEXT: ; %bb.2: ; %exit +; CHECK-NEXT: ret +; +; CHECK-BE-LABEL: trunc_v8i64_to_v8i8_in_loop: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: mov x8, xzr +; CHECK-BE-NEXT: .LBB4_1: // %loop +; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-BE-NEXT: add x9, x0, x8, lsl #6 +; CHECK-BE-NEXT: add x10, x9, #48 +; CHECK-BE-NEXT: ld1 { v1.2d }, [x9] +; CHECK-BE-NEXT: ld1 { v0.2d }, [x10] +; CHECK-BE-NEXT: add x10, x9, #32 +; CHECK-BE-NEXT: add x9, x9, #16 +; CHECK-BE-NEXT: ld1 { v2.2d }, [x10] +; CHECK-BE-NEXT: ld1 { v3.2d }, [x9] +; CHECK-BE-NEXT: add x9, x1, x8, lsl #3 +; CHECK-BE-NEXT: add x8, x8, #1 +; CHECK-BE-NEXT: cmp x8, #1000 +; CHECK-BE-NEXT: uzp1 v0.4s, v2.4s, v0.4s +; CHECK-BE-NEXT: uzp1 v1.4s, v1.4s, v3.4s +; CHECK-BE-NEXT: uzp1 v0.8h, v1.8h, v0.8h +; CHECK-BE-NEXT: xtn v0.8b, v0.8h +; CHECK-BE-NEXT: st1 { v0.8b }, [x9] +; CHECK-BE-NEXT: b.eq .LBB4_1 +; CHECK-BE-NEXT: // %bb.2: // %exit +; CHECK-BE-NEXT: ret entry: br label %loop @@ -378,83 +378,83 @@ exit: } define void @trunc_v8i19_to_v8i8_in_loop(ptr %A, ptr %dst) { -; CHECK-LABEL: trunc_v8i19_to_v8i8_in_loop: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: LBB5_1: ; %loop -; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldp x10, x9, [x0] -; CHECK-NEXT: ldrb w11, [x0, #18] -; CHECK-NEXT: ldrh w13, [x0, #16] -; CHECK-NEXT: add x0, x0, #32 -; CHECK-NEXT: lsr x14, x10, #19 -; CHECK-NEXT: fmov s0, w10 -; CHECK-NEXT: ubfx x12, x9, #12, #20 -; CHECK-NEXT: lsr x15, x9, #31 -; CHECK-NEXT: bfi w13, w11, #16, #8 -; CHECK-NEXT: lsr x11, x9, #50 -; CHECK-NEXT: mov.s v0[1], w14 -; CHECK-NEXT: fmov s1, w12 -; CHECK-NEXT: lsr x12, x10, #38 -; CHECK-NEXT: bfi w11, w13, #14, #18 -; CHECK-NEXT: lsr x10, x10, #57 -; CHECK-NEXT: bfi w10, w9, #7, #25 -; CHECK-NEXT: lsr w9, w13, #5 -; CHECK-NEXT: mov.s v1[1], w15 -; CHECK-NEXT: mov.s v0[2], w12 -; CHECK-NEXT: mov.s v1[2], w11 -; CHECK-NEXT: mov.s v0[3], w10 -; CHECK-NEXT: mov.s v1[3], w9 -; CHECK-NEXT: uzp1.8h v0, v0, v1 -; CHECK-NEXT: xtn.8b v0, v0 -; CHECK-NEXT: str d0, [x1, x8, lsl #3] -; CHECK-NEXT: add x8, x8, #1 -; CHECK-NEXT: cmp x8, #1000 -; CHECK-NEXT: b.eq LBB5_1 -; CHECK-NEXT: ; %bb.2: ; %exit -; CHECK-NEXT: ret - -; CHECK-BE-LABEL: trunc_v8i19_to_v8i8_in_loop: -; CHECK-BE: // %bb.0: // %entry -; CHECK-BE-NEXT: mov x8, xzr -; CHECK-BE-NEXT: .LBB5_1: // %loop -; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-BE-NEXT: ldp x10, x9, [x0] -; CHECK-BE-NEXT: ldrh w15, [x0, #16] -; CHECK-BE-NEXT: lsr x12, x10, #40 -; CHECK-BE-NEXT: lsr x13, x10, #45 -; CHECK-BE-NEXT: lsr x11, x9, #40 -; CHECK-BE-NEXT: ubfx x14, x9, #33, #7 -; CHECK-BE-NEXT: ubfx x16, x10, #26, #14 -; CHECK-BE-NEXT: bfi w16, w12, #14, #18 -; CHECK-BE-NEXT: ubfx x12, x9, #14, #18 -; CHECK-BE-NEXT: bfi w14, w11, #7, #24 -; CHECK-BE-NEXT: ldrb w11, [x0, #18] -; CHECK-BE-NEXT: fmov s0, w13 -; CHECK-BE-NEXT: add x0, x0, #32 -; CHECK-BE-NEXT: fmov s1, w14 -; CHECK-BE-NEXT: bfi w11, w15, #8, #16 -; CHECK-BE-NEXT: mov v0.s[1], w16 -; CHECK-BE-NEXT: mov v1.s[1], w12 -; CHECK-BE-NEXT: extr x12, x10, x9, #40 -; CHECK-BE-NEXT: lsl x9, x9, #24 -; CHECK-BE-NEXT: ubfx x10, x10, #7, #25 -; CHECK-BE-NEXT: orr w9, w11, w9 -; CHECK-BE-NEXT: lsr w9, w9, #19 -; CHECK-BE-NEXT: mov v0.s[2], w10 -; CHECK-BE-NEXT: ubfx x10, x12, #12, #20 -; CHECK-BE-NEXT: mov v1.s[2], w9 -; CHECK-BE-NEXT: add x9, x1, x8, lsl #3 -; CHECK-BE-NEXT: add x8, x8, #1 -; CHECK-BE-NEXT: mov v0.s[3], w10 -; CHECK-BE-NEXT: cmp x8, #1000 -; CHECK-BE-NEXT: mov v1.s[3], w11 -; CHECK-BE-NEXT: uzp1 v0.8h, v0.8h, v1.8h -; CHECK-BE-NEXT: xtn v0.8b, v0.8h -; CHECK-BE-NEXT: st1 { v0.8b }, [x9] -; CHECK-BE-NEXT: b.eq .LBB5_1 -; CHECK-BE-NEXT: // %bb.2: // %exit -; CHECK-BE-NEXT: ret +; CHECK-LABEL: trunc_v8i19_to_v8i8_in_loop: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: LBB5_1: ; %loop +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldp x10, x9, [x0] +; CHECK-NEXT: ldrb w11, [x0, #18] +; CHECK-NEXT: ldrh w13, [x0, #16] +; CHECK-NEXT: add x0, x0, #32 +; CHECK-NEXT: lsr x14, x10, #19 +; CHECK-NEXT: fmov s0, w10 +; CHECK-NEXT: ubfx x12, x9, #12, #20 +; CHECK-NEXT: lsr x15, x9, #31 +; CHECK-NEXT: orr w11, w13, w11, lsl #16 +; CHECK-NEXT: lsr x13, x9, #50 +; CHECK-NEXT: mov.s v0[1], w14 +; CHECK-NEXT: fmov s1, w12 +; CHECK-NEXT: lsr x12, x10, #38 +; CHECK-NEXT: orr w13, w13, w11, lsl #14 +; CHECK-NEXT: lsr x10, x10, #57 +; CHECK-NEXT: orr w9, w10, w9, lsl #7 +; CHECK-NEXT: lsr w10, w11, #5 +; CHECK-NEXT: mov.s v1[1], w15 +; CHECK-NEXT: mov.s v0[2], w12 +; CHECK-NEXT: mov.s v1[2], w13 +; CHECK-NEXT: mov.s v0[3], w9 +; CHECK-NEXT: mov.s v1[3], w10 +; CHECK-NEXT: uzp1.8h v0, v0, v1 +; CHECK-NEXT: xtn.8b v0, v0 +; CHECK-NEXT: str d0, [x1, x8, lsl #3] +; CHECK-NEXT: add x8, x8, #1 +; CHECK-NEXT: cmp x8, #1000 +; CHECK-NEXT: b.eq LBB5_1 +; CHECK-NEXT: ; %bb.2: ; %exit +; CHECK-NEXT: ret +; +; CHECK-BE-LABEL: trunc_v8i19_to_v8i8_in_loop: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: mov x8, xzr +; CHECK-BE-NEXT: .LBB5_1: // %loop +; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-BE-NEXT: ldp x10, x9, [x0] +; CHECK-BE-NEXT: ldrh w11, [x0, #16] +; CHECK-BE-NEXT: lsr x13, x10, #45 +; CHECK-BE-NEXT: lsr x15, x10, #40 +; CHECK-BE-NEXT: lsr x12, x9, #40 +; CHECK-BE-NEXT: ubfx x14, x9, #33, #7 +; CHECK-BE-NEXT: ubfx x16, x10, #26, #14 +; CHECK-BE-NEXT: orr w12, w14, w12, lsl #7 +; CHECK-BE-NEXT: ldrb w14, [x0, #18] +; CHECK-BE-NEXT: orr w15, w16, w15, lsl #14 +; CHECK-BE-NEXT: fmov s0, w13 +; CHECK-BE-NEXT: add x0, x0, #32 +; CHECK-BE-NEXT: fmov s1, w12 +; CHECK-BE-NEXT: ubfx x12, x9, #14, #18 +; CHECK-BE-NEXT: orr w11, w14, w11, lsl #8 +; CHECK-BE-NEXT: mov v0.s[1], w15 +; CHECK-BE-NEXT: mov v1.s[1], w12 +; CHECK-BE-NEXT: extr x12, x10, x9, #40 +; CHECK-BE-NEXT: lsl x9, x9, #24 +; CHECK-BE-NEXT: ubfx x10, x10, #7, #25 +; CHECK-BE-NEXT: orr w9, w11, w9 +; CHECK-BE-NEXT: lsr w9, w9, #19 +; CHECK-BE-NEXT: mov v0.s[2], w10 +; CHECK-BE-NEXT: ubfx x10, x12, #12, #20 +; CHECK-BE-NEXT: mov v1.s[2], w9 +; CHECK-BE-NEXT: add x9, x1, x8, lsl #3 +; CHECK-BE-NEXT: add x8, x8, #1 +; CHECK-BE-NEXT: mov v0.s[3], w10 +; CHECK-BE-NEXT: cmp x8, #1000 +; CHECK-BE-NEXT: mov v1.s[3], w11 +; CHECK-BE-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-BE-NEXT: xtn v0.8b, v0.8h +; CHECK-BE-NEXT: st1 { v0.8b }, [x9] +; CHECK-BE-NEXT: b.eq .LBB5_1 +; CHECK-BE-NEXT: // %bb.2: // %exit +; CHECK-BE-NEXT: ret entry: br label %loop @@ -475,67 +475,67 @@ exit: } define void @trunc_v11i64_to_v11i8_in_loop(ptr %A, ptr %dst) { -; CHECK-LABEL: trunc_v11i64_to_v11i8_in_loop: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: mov w8, #1000 -; CHECK-NEXT: LBB6_1: ; %loop -; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldp q1, q0, [x0, #32] -; CHECK-NEXT: add x9, x1, #8 -; CHECK-NEXT: add x10, x1, #10 -; CHECK-NEXT: subs x8, x8, #1 -; CHECK-NEXT: ldp q3, q2, [x0] -; CHECK-NEXT: uzp1.4s v0, v1, v0 -; CHECK-NEXT: ldr d4, [x0, #80] -; CHECK-NEXT: ldr q1, [x0, #64] -; CHECK-NEXT: add x0, x0, #128 -; CHECK-NEXT: uzp1.4s v2, v3, v2 -; CHECK-NEXT: uzp1.4s v1, v1, v4 -; CHECK-NEXT: uzp1.8h v0, v2, v0 -; CHECK-NEXT: xtn.4h v1, v1 -; CHECK-NEXT: uzp1.16b v0, v0, v1 -; CHECK-NEXT: xtn.8b v1, v1 -; CHECK-NEXT: st1.b { v1 }[2], [x10] -; CHECK-NEXT: str d0, [x1], #16 -; CHECK-NEXT: st1.h { v0 }[4], [x9] -; CHECK-NEXT: b.eq LBB6_1 -; CHECK-NEXT: ; %bb.2: ; %exit -; CHECK-NEXT: ret - -; CHECK-BE-LABEL: trunc_v11i64_to_v11i8_in_loop: -; CHECK-BE: // %bb.0: // %entry -; CHECK-BE-NEXT: mov w8, #1000 -; CHECK-BE-NEXT:.LBB6_1: // %loop -; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-BE-NEXT: add x9, x0, #48 -; CHECK-BE-NEXT: add x10, x0, #32 -; CHECK-BE-NEXT: ld1 { v0.2d }, [x0] -; CHECK-BE-NEXT: subs x8, x8, #1 -; CHECK-BE-NEXT: ld1 { v1.2d }, [x9] -; CHECK-BE-NEXT: add x9, x0, #16 -; CHECK-BE-NEXT: ld1 { v2.2d }, [x10] -; CHECK-BE-NEXT: add x10, x0, #64 -; CHECK-BE-NEXT: ld1 { v3.2d }, [x9] -; CHECK-BE-NEXT: add x9, x1, #10 -; CHECK-BE-NEXT: ld1 { v4.2d }, [x10] -; CHECK-BE-NEXT: add x10, x1, #8 -; CHECK-BE-NEXT: uzp1 v1.4s, v2.4s, v1.4s -; CHECK-BE-NEXT: ldr d2, [x0, #80] -; CHECK-BE-NEXT: add x0, x0, #128 -; CHECK-BE-NEXT: uzp1 v0.4s, v0.4s, v3.4s -; CHECK-BE-NEXT: uzp1 v2.4s, v4.4s, v2.4s -; CHECK-BE-NEXT: uzp1 v0.8h, v0.8h, v1.8h -; CHECK-BE-NEXT: xtn v1.4h, v2.4s -; CHECK-BE-NEXT: uzp1 v0.16b, v0.16b, v1.16b -; CHECK-BE-NEXT: xtn v1.8b, v1.8h -; CHECK-BE-NEXT: st1 { v1.b }[2], [x9] -; CHECK-BE-NEXT: rev64 v2.16b, v0.16b -; CHECK-BE-NEXT: rev16 v0.16b, v0.16b -; CHECK-BE-NEXT: str d2, [x1], #16 -; CHECK-BE-NEXT: st1 { v0.h }[4], [x10] -; CHECK-BE-NEXT: b.eq .LBB6_1 -; CHECK-BE-NEXT:// %bb.2: // %exit -; CHECK-BE-NEXT: ret +; CHECK-LABEL: trunc_v11i64_to_v11i8_in_loop: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: mov w8, #1000 +; CHECK-NEXT: LBB6_1: ; %loop +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldp q1, q0, [x0, #32] +; CHECK-NEXT: add x9, x1, #8 +; CHECK-NEXT: add x10, x1, #10 +; CHECK-NEXT: subs x8, x8, #1 +; CHECK-NEXT: ldp q3, q2, [x0] +; CHECK-NEXT: uzp1.4s v0, v1, v0 +; CHECK-NEXT: ldr d4, [x0, #80] +; CHECK-NEXT: ldr q1, [x0, #64] +; CHECK-NEXT: add x0, x0, #128 +; CHECK-NEXT: uzp1.4s v2, v3, v2 +; CHECK-NEXT: uzp1.4s v1, v1, v4 +; CHECK-NEXT: uzp1.8h v0, v2, v0 +; CHECK-NEXT: xtn.4h v1, v1 +; CHECK-NEXT: uzp1.16b v0, v0, v1 +; CHECK-NEXT: xtn.8b v1, v1 +; CHECK-NEXT: st1.b { v1 }[2], [x10] +; CHECK-NEXT: str d0, [x1], #16 +; CHECK-NEXT: st1.h { v0 }[4], [x9] +; CHECK-NEXT: b.eq LBB6_1 +; CHECK-NEXT: ; %bb.2: ; %exit +; CHECK-NEXT: ret +; +; CHECK-BE-LABEL: trunc_v11i64_to_v11i8_in_loop: +; CHECK-BE: // %bb.0: // %entry +; CHECK-BE-NEXT: mov w8, #1000 +; CHECK-BE-NEXT: .LBB6_1: // %loop +; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-BE-NEXT: add x9, x0, #48 +; CHECK-BE-NEXT: add x10, x0, #32 +; CHECK-BE-NEXT: ld1 { v0.2d }, [x0] +; CHECK-BE-NEXT: subs x8, x8, #1 +; CHECK-BE-NEXT: ld1 { v1.2d }, [x9] +; CHECK-BE-NEXT: add x9, x0, #16 +; CHECK-BE-NEXT: ld1 { v2.2d }, [x10] +; CHECK-BE-NEXT: add x10, x0, #64 +; CHECK-BE-NEXT: ld1 { v3.2d }, [x9] +; CHECK-BE-NEXT: add x9, x1, #10 +; CHECK-BE-NEXT: ld1 { v4.2d }, [x10] +; CHECK-BE-NEXT: add x10, x1, #8 +; CHECK-BE-NEXT: uzp1 v1.4s, v2.4s, v1.4s +; CHECK-BE-NEXT: ldr d2, [x0, #80] +; CHECK-BE-NEXT: add x0, x0, #128 +; CHECK-BE-NEXT: uzp1 v0.4s, v0.4s, v3.4s +; CHECK-BE-NEXT: uzp1 v2.4s, v4.4s, v2.4s +; CHECK-BE-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-BE-NEXT: xtn v1.4h, v2.4s +; CHECK-BE-NEXT: uzp1 v0.16b, v0.16b, v1.16b +; CHECK-BE-NEXT: xtn v1.8b, v1.8h +; CHECK-BE-NEXT: st1 { v1.b }[2], [x9] +; CHECK-BE-NEXT: rev64 v2.16b, v0.16b +; CHECK-BE-NEXT: rev16 v0.16b, v0.16b +; CHECK-BE-NEXT: str d2, [x1], #16 +; CHECK-BE-NEXT: st1 { v0.h }[4], [x10] +; CHECK-BE-NEXT: b.eq .LBB6_1 +; CHECK-BE-NEXT: // %bb.2: // %exit +; CHECK-BE-NEXT: ret entry: br label %loop diff --git a/llvm/test/CodeGen/AArch64/urem-seteq.ll b/llvm/test/CodeGen/AArch64/urem-seteq.ll index 9f9e3f712a624..56b030dcca52a 100644 --- a/llvm/test/CodeGen/AArch64/urem-seteq.ll +++ b/llvm/test/CodeGen/AArch64/urem-seteq.ll @@ -82,8 +82,8 @@ define i16 @test_urem_even(i16 %X) nounwind { ; CHECK-NEXT: mul w8, w0, w8 ; CHECK-NEXT: and w9, w8, #0xfffc ; CHECK-NEXT: lsr w9, w9, #1 -; CHECK-NEXT: bfi w9, w8, #15, #17 -; CHECK-NEXT: ubfx w8, w9, #1, #15 +; CHECK-NEXT: orr w8, w9, w8, lsl #15 +; CHECK-NEXT: ubfx w8, w8, #1, #15 ; CHECK-NEXT: cmp w8, #2340 ; CHECK-NEXT: cset w0, hi ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/vec_uaddo.ll b/llvm/test/CodeGen/AArch64/vec_uaddo.ll index eab5c8abd0204..9e73cc5195e4a 100644 --- a/llvm/test/CodeGen/AArch64/vec_uaddo.ll +++ b/llvm/test/CodeGen/AArch64/vec_uaddo.ll @@ -249,17 +249,18 @@ define <4 x i32> @uaddo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind ; CHECK-NEXT: and v1.8b, v1.8b, v2.8b ; CHECK-NEXT: and v0.8b, v0.8b, v2.8b ; CHECK-NEXT: add v0.4h, v0.4h, v1.4h -; CHECK-NEXT: umov w8, v0.h[1] -; CHECK-NEXT: umov w9, v0.h[0] +; CHECK-NEXT: umov w8, v0.h[0] +; CHECK-NEXT: umov w9, v0.h[1] ; CHECK-NEXT: umov w10, v0.h[2] ; CHECK-NEXT: umov w11, v0.h[3] ; CHECK-NEXT: and v1.8b, v0.8b, v2.8b ; CHECK-NEXT: cmeq v0.4h, v1.4h, v0.4h -; CHECK-NEXT: bfi w9, w8, #1, #1 -; CHECK-NEXT: bfi w9, w10, #2, #1 +; CHECK-NEXT: and w8, w8, #0x1 +; CHECK-NEXT: bfi w8, w9, #1, #1 ; CHECK-NEXT: mvn v0.8b, v0.8b -; CHECK-NEXT: bfi w9, w11, #3, #29 -; CHECK-NEXT: and w8, w9, #0xf +; CHECK-NEXT: bfi w8, w10, #2, #1 +; CHECK-NEXT: orr w8, w8, w11, lsl #3 +; CHECK-NEXT: and w8, w8, #0xf ; CHECK-NEXT: sshll v0.4s, v0.4h, #0 ; CHECK-NEXT: strb w8, [x0] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/vec_umulo.ll b/llvm/test/CodeGen/AArch64/vec_umulo.ll index 0fccb574644f1..4b61a873706ad 100644 --- a/llvm/test/CodeGen/AArch64/vec_umulo.ll +++ b/llvm/test/CodeGen/AArch64/vec_umulo.ll @@ -299,14 +299,15 @@ define <4 x i32> @umulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind ; CHECK-NEXT: fmov d2, d0 ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: and v1.8b, v2.8b, v1.8b -; CHECK-NEXT: umov w8, v1.h[1] -; CHECK-NEXT: umov w9, v1.h[0] +; CHECK-NEXT: umov w8, v1.h[0] +; CHECK-NEXT: umov w9, v1.h[1] ; CHECK-NEXT: umov w10, v1.h[2] ; CHECK-NEXT: umov w11, v1.h[3] -; CHECK-NEXT: bfi w9, w8, #1, #1 -; CHECK-NEXT: bfi w9, w10, #2, #1 -; CHECK-NEXT: bfi w9, w11, #3, #29 -; CHECK-NEXT: and w8, w9, #0xf +; CHECK-NEXT: and w8, w8, #0x1 +; CHECK-NEXT: bfi w8, w9, #1, #1 +; CHECK-NEXT: bfi w8, w10, #2, #1 +; CHECK-NEXT: orr w8, w8, w11, lsl #3 +; CHECK-NEXT: and w8, w8, #0xf ; CHECK-NEXT: strb w8, [x0] ; CHECK-NEXT: ret %t = call {<4 x i1>, <4 x i1>} @llvm.umul.with.overflow.v4i1(<4 x i1> %a0, <4 x i1> %a1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-build-vector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-build-vector.mir index 87949ebef75a0..01e4162f0d503 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-build-vector.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-build-vector.mir @@ -252,3 +252,21 @@ body: | %7:_(<5 x s32>) = G_BUILD_VECTOR %3, %4, %5, %6, %2 $vgpr5_vgpr6_vgpr7_vgpr8_vgpr9= COPY %7 ... + +--- +name: value_finder_look_through_copy +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; GFX9-LABEL: name: value_finder_look_through_copy + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: $vgpr2_vgpr3 = COPY [[COPY]](<2 x s32>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0(<2 x s32>) + %3:_(s32) = COPY %1 + %4:_(<2 x s32>) = G_BUILD_VECTOR %3, %2 + $vgpr2_vgpr3= COPY %4 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-fma.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-fma.ll index ec96e2f26d675..cc459814e62ca 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-fma.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-fma.ll @@ -8,9 +8,7 @@ define amdgpu_vs float @test_f16_f32_add_fma_ext_mul(float %x, float %y, float %z, half %u, half %v) { ; GFX9-DENORM-LABEL: test_f16_f32_add_fma_ext_mul: ; GFX9-DENORM: ; %bb.0: ; %.entry -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v4, v4 -; GFX9-DENORM-NEXT: v_mad_f32 v2, v3, v4, v2 +; GFX9-DENORM-NEXT: v_mad_mix_f32 v2, v3, v4, v2 op_sel_hi:[1,1,0] ; GFX9-DENORM-NEXT: v_mac_f32_e32 v2, v0, v1 ; GFX9-DENORM-NEXT: v_mov_b32_e32 v0, v2 ; GFX9-DENORM-NEXT: ; return to shader part epilog @@ -18,25 +16,22 @@ define amdgpu_vs float @test_f16_f32_add_fma_ext_mul(float %x, float %y, float % ; GFX10-LABEL: test_f16_f32_add_fma_ext_mul: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: v_mul_f16_e32 v3, v3, v4 -; GFX10-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX10-NEXT: v_fmac_f32_e32 v3, v0, v1 -; GFX10-NEXT: v_add_f32_e32 v0, v3, v2 +; GFX10-NEXT: v_fma_mix_f32 v0, v0, v1, v3 op_sel_hi:[0,0,1] +; GFX10-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX10-CONTRACT-LABEL: test_f16_f32_add_fma_ext_mul: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: v_mul_f16_e32 v3, v3, v4 -; GFX10-CONTRACT-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v3, v0, v1 -; GFX10-CONTRACT-NEXT: v_add_f32_e32 v0, v3, v2 +; GFX10-CONTRACT-NEXT: v_fma_mix_f32 v0, v0, v1, v3 op_sel_hi:[0,0,1] +; GFX10-CONTRACT-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX10-CONTRACT-NEXT: ; return to shader part epilog ; ; GFX10-DENORM-LABEL: test_f16_f32_add_fma_ext_mul: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: v_mul_f16_e32 v3, v3, v4 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX10-DENORM-NEXT: v_fmac_f32_e32 v3, v0, v1 -; GFX10-DENORM-NEXT: v_add_f32_e32 v0, v3, v2 +; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, v0, v1, v3 op_sel_hi:[0,0,1] +; GFX10-DENORM-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX10-DENORM-NEXT: ; return to shader part epilog .entry: %a = fmul half %u, %v @@ -50,12 +45,8 @@ define amdgpu_vs float @test_f16_f32_add_fma_ext_mul(float %x, float %y, float % define amdgpu_vs float @test_f16_f32_add_ext_fma_mul(half %x, half %y, float %z, half %u, half %v) { ; GFX9-DENORM-LABEL: test_f16_f32_add_ext_fma_mul: ; GFX9-DENORM: ; %bb.0: ; %.entry -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v5, v0 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v0, v3 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v3, v4 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v3, v2 -; GFX9-DENORM-NEXT: v_mac_f32_e32 v0, v5, v1 +; GFX9-DENORM-NEXT: v_mad_mix_f32 v2, v3, v4, v2 op_sel_hi:[1,1,0] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; GFX9-DENORM-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: test_f16_f32_add_ext_fma_mul: @@ -94,34 +85,29 @@ define amdgpu_vs float @test_f16_f32_add_ext_fma_mul(half %x, half %y, float %z, define amdgpu_vs float @test_f16_f32_add_fma_ext_mul_rhs(float %x, float %y, float %z, half %u, half %v) { ; GFX9-DENORM-LABEL: test_f16_f32_add_fma_ext_mul_rhs: ; GFX9-DENORM: ; %bb.0: ; %.entry -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v4, v4 -; GFX9-DENORM-NEXT: v_mac_f32_e32 v0, v3, v4 +; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v3, v4, v0 op_sel_hi:[1,1,0] ; GFX9-DENORM-NEXT: v_mac_f32_e32 v0, v1, v2 ; GFX9-DENORM-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: test_f16_f32_add_fma_ext_mul_rhs: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: v_mul_f16_e32 v3, v3, v4 -; GFX10-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX10-NEXT: v_fmac_f32_e32 v3, v1, v2 -; GFX10-NEXT: v_add_f32_e32 v0, v0, v3 +; GFX10-NEXT: v_fma_mix_f32 v1, v1, v2, v3 op_sel_hi:[0,0,1] +; GFX10-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX10-CONTRACT-LABEL: test_f16_f32_add_fma_ext_mul_rhs: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: v_mul_f16_e32 v3, v3, v4 -; GFX10-CONTRACT-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v3, v1, v2 -; GFX10-CONTRACT-NEXT: v_add_f32_e32 v0, v0, v3 +; GFX10-CONTRACT-NEXT: v_fma_mix_f32 v1, v1, v2, v3 op_sel_hi:[0,0,1] +; GFX10-CONTRACT-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX10-CONTRACT-NEXT: ; return to shader part epilog ; ; GFX10-DENORM-LABEL: test_f16_f32_add_fma_ext_mul_rhs: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: v_mul_f16_e32 v3, v3, v4 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX10-DENORM-NEXT: v_fmac_f32_e32 v3, v1, v2 -; GFX10-DENORM-NEXT: v_add_f32_e32 v0, v0, v3 +; GFX10-DENORM-NEXT: v_fma_mix_f32 v1, v1, v2, v3 op_sel_hi:[0,0,1] +; GFX10-DENORM-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX10-DENORM-NEXT: ; return to shader part epilog .entry: %a = fmul half %u, %v @@ -135,12 +121,8 @@ define amdgpu_vs float @test_f16_f32_add_fma_ext_mul_rhs(float %x, float %y, flo define amdgpu_vs float @test_f16_f32_add_ext_fma_mul_rhs(float %x, half %y, half %z, half %u, half %v) { ; GFX9-DENORM-LABEL: test_f16_f32_add_ext_fma_mul_rhs: ; GFX9-DENORM: ; %bb.0: ; %.entry -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v3, v3 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v4, v4 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX9-DENORM-NEXT: v_mac_f32_e32 v0, v3, v4 -; GFX9-DENORM-NEXT: v_mac_f32_e32 v0, v1, v2 +; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v3, v4, v0 op_sel_hi:[1,1,0] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v1, v2, v0 op_sel_hi:[1,1,0] ; GFX9-DENORM-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: test_f16_f32_add_ext_fma_mul_rhs: @@ -181,72 +163,56 @@ define amdgpu_vs <4 x float> @test_v4f16_v4f32_add_fma_ext_mul(<4 x float> %x, < ; GFX9-DENORM: ; %bb.0: ; %.entry ; GFX9-DENORM-NEXT: v_pk_mul_f16 v12, v12, v14 ; GFX9-DENORM-NEXT: v_pk_mul_f16 v13, v13, v15 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v14, v12 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_sdwa v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v15, v13 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_sdwa v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX9-DENORM-NEXT: v_mac_f32_e32 v14, v0, v4 -; GFX9-DENORM-NEXT: v_mac_f32_e32 v12, v1, v5 -; GFX9-DENORM-NEXT: v_mac_f32_e32 v15, v2, v6 -; GFX9-DENORM-NEXT: v_mac_f32_e32 v13, v3, v7 -; GFX9-DENORM-NEXT: v_add_f32_e32 v0, v14, v8 -; GFX9-DENORM-NEXT: v_add_f32_e32 v1, v12, v9 -; GFX9-DENORM-NEXT: v_add_f32_e32 v2, v15, v10 -; GFX9-DENORM-NEXT: v_add_f32_e32 v3, v13, v11 +; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v0, v4, v12 op_sel_hi:[0,0,1] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v1, v1, v5, v12 op_sel:[0,0,1] op_sel_hi:[0,0,1] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v2, v2, v6, v13 op_sel_hi:[0,0,1] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v3, v3, v7, v13 op_sel:[0,0,1] op_sel_hi:[0,0,1] +; GFX9-DENORM-NEXT: v_add_f32_e32 v0, v0, v8 +; GFX9-DENORM-NEXT: v_add_f32_e32 v1, v1, v9 +; GFX9-DENORM-NEXT: v_add_f32_e32 v2, v2, v10 +; GFX9-DENORM-NEXT: v_add_f32_e32 v3, v3, v11 ; GFX9-DENORM-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: test_v4f16_v4f32_add_fma_ext_mul: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: v_pk_mul_f16 v12, v12, v14 ; GFX10-NEXT: v_pk_mul_f16 v13, v13, v15 -; GFX10-NEXT: v_cvt_f32_f16_e32 v14, v12 -; GFX10-NEXT: v_cvt_f32_f16_sdwa v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-NEXT: v_cvt_f32_f16_e32 v15, v13 -; GFX10-NEXT: v_cvt_f32_f16_sdwa v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-NEXT: v_fmac_f32_e32 v14, v0, v4 -; GFX10-NEXT: v_fmac_f32_e32 v12, v1, v5 -; GFX10-NEXT: v_fmac_f32_e32 v15, v2, v6 -; GFX10-NEXT: v_fmac_f32_e32 v13, v3, v7 -; GFX10-NEXT: v_add_f32_e32 v0, v14, v8 -; GFX10-NEXT: v_add_f32_e32 v1, v12, v9 -; GFX10-NEXT: v_add_f32_e32 v2, v15, v10 -; GFX10-NEXT: v_add_f32_e32 v3, v13, v11 +; GFX10-NEXT: v_fma_mix_f32 v0, v0, v4, v12 op_sel_hi:[0,0,1] +; GFX10-NEXT: v_fma_mix_f32 v1, v1, v5, v12 op_sel:[0,0,1] op_sel_hi:[0,0,1] +; GFX10-NEXT: v_fma_mix_f32 v2, v2, v6, v13 op_sel_hi:[0,0,1] +; GFX10-NEXT: v_fma_mix_f32 v3, v3, v7, v13 op_sel:[0,0,1] op_sel_hi:[0,0,1] +; GFX10-NEXT: v_add_f32_e32 v0, v0, v8 +; GFX10-NEXT: v_add_f32_e32 v1, v1, v9 +; GFX10-NEXT: v_add_f32_e32 v2, v2, v10 +; GFX10-NEXT: v_add_f32_e32 v3, v3, v11 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX10-CONTRACT-LABEL: test_v4f16_v4f32_add_fma_ext_mul: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: v_pk_mul_f16 v12, v12, v14 ; GFX10-CONTRACT-NEXT: v_pk_mul_f16 v13, v13, v15 -; GFX10-CONTRACT-NEXT: v_cvt_f32_f16_e32 v14, v12 -; GFX10-CONTRACT-NEXT: v_cvt_f32_f16_sdwa v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-CONTRACT-NEXT: v_cvt_f32_f16_e32 v15, v13 -; GFX10-CONTRACT-NEXT: v_cvt_f32_f16_sdwa v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v14, v0, v4 -; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v12, v1, v5 -; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v15, v2, v6 -; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v13, v3, v7 -; GFX10-CONTRACT-NEXT: v_add_f32_e32 v0, v14, v8 -; GFX10-CONTRACT-NEXT: v_add_f32_e32 v1, v12, v9 -; GFX10-CONTRACT-NEXT: v_add_f32_e32 v2, v15, v10 -; GFX10-CONTRACT-NEXT: v_add_f32_e32 v3, v13, v11 +; GFX10-CONTRACT-NEXT: v_fma_mix_f32 v0, v0, v4, v12 op_sel_hi:[0,0,1] +; GFX10-CONTRACT-NEXT: v_fma_mix_f32 v1, v1, v5, v12 op_sel:[0,0,1] op_sel_hi:[0,0,1] +; GFX10-CONTRACT-NEXT: v_fma_mix_f32 v2, v2, v6, v13 op_sel_hi:[0,0,1] +; GFX10-CONTRACT-NEXT: v_fma_mix_f32 v3, v3, v7, v13 op_sel:[0,0,1] op_sel_hi:[0,0,1] +; GFX10-CONTRACT-NEXT: v_add_f32_e32 v0, v0, v8 +; GFX10-CONTRACT-NEXT: v_add_f32_e32 v1, v1, v9 +; GFX10-CONTRACT-NEXT: v_add_f32_e32 v2, v2, v10 +; GFX10-CONTRACT-NEXT: v_add_f32_e32 v3, v3, v11 ; GFX10-CONTRACT-NEXT: ; return to shader part epilog ; ; GFX10-DENORM-LABEL: test_v4f16_v4f32_add_fma_ext_mul: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: v_pk_mul_f16 v12, v12, v14 ; GFX10-DENORM-NEXT: v_pk_mul_f16 v13, v13, v15 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v14, v12 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v15, v13 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_fmac_f32_e32 v14, v0, v4 -; GFX10-DENORM-NEXT: v_fmac_f32_e32 v12, v1, v5 -; GFX10-DENORM-NEXT: v_fmac_f32_e32 v15, v2, v6 -; GFX10-DENORM-NEXT: v_fmac_f32_e32 v13, v3, v7 -; GFX10-DENORM-NEXT: v_add_f32_e32 v0, v14, v8 -; GFX10-DENORM-NEXT: v_add_f32_e32 v1, v12, v9 -; GFX10-DENORM-NEXT: v_add_f32_e32 v2, v15, v10 -; GFX10-DENORM-NEXT: v_add_f32_e32 v3, v13, v11 +; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, v0, v4, v12 op_sel_hi:[0,0,1] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v1, v1, v5, v12 op_sel:[0,0,1] op_sel_hi:[0,0,1] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v2, v2, v6, v13 op_sel_hi:[0,0,1] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v3, v3, v7, v13 op_sel:[0,0,1] op_sel_hi:[0,0,1] +; GFX10-DENORM-NEXT: v_add_f32_e32 v0, v0, v8 +; GFX10-DENORM-NEXT: v_add_f32_e32 v1, v1, v9 +; GFX10-DENORM-NEXT: v_add_f32_e32 v2, v2, v10 +; GFX10-DENORM-NEXT: v_add_f32_e32 v3, v3, v11 ; GFX10-DENORM-NEXT: ; return to shader part epilog .entry: %a = fmul <4 x half> %u, %v @@ -339,72 +305,56 @@ define amdgpu_vs <4 x float> @test_v4f16_v4f32_add_fma_ext_mul_rhs(<4 x float> % ; GFX9-DENORM: ; %bb.0: ; %.entry ; GFX9-DENORM-NEXT: v_pk_mul_f16 v12, v12, v14 ; GFX9-DENORM-NEXT: v_pk_mul_f16 v13, v13, v15 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v14, v12 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_sdwa v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v15, v13 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_sdwa v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX9-DENORM-NEXT: v_mac_f32_e32 v14, v4, v8 -; GFX9-DENORM-NEXT: v_mac_f32_e32 v12, v5, v9 -; GFX9-DENORM-NEXT: v_mac_f32_e32 v15, v6, v10 -; GFX9-DENORM-NEXT: v_mac_f32_e32 v13, v7, v11 -; GFX9-DENORM-NEXT: v_add_f32_e32 v0, v0, v14 -; GFX9-DENORM-NEXT: v_add_f32_e32 v1, v1, v12 -; GFX9-DENORM-NEXT: v_add_f32_e32 v2, v2, v15 -; GFX9-DENORM-NEXT: v_add_f32_e32 v3, v3, v13 +; GFX9-DENORM-NEXT: v_mad_mix_f32 v4, v4, v8, v12 op_sel_hi:[0,0,1] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v5, v5, v9, v12 op_sel:[0,0,1] op_sel_hi:[0,0,1] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v6, v6, v10, v13 op_sel_hi:[0,0,1] +; GFX9-DENORM-NEXT: v_mad_mix_f32 v7, v7, v11, v13 op_sel:[0,0,1] op_sel_hi:[0,0,1] +; GFX9-DENORM-NEXT: v_add_f32_e32 v0, v0, v4 +; GFX9-DENORM-NEXT: v_add_f32_e32 v1, v1, v5 +; GFX9-DENORM-NEXT: v_add_f32_e32 v2, v2, v6 +; GFX9-DENORM-NEXT: v_add_f32_e32 v3, v3, v7 ; GFX9-DENORM-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: test_v4f16_v4f32_add_fma_ext_mul_rhs: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: v_pk_mul_f16 v12, v12, v14 ; GFX10-NEXT: v_pk_mul_f16 v13, v13, v15 -; GFX10-NEXT: v_cvt_f32_f16_e32 v14, v12 -; GFX10-NEXT: v_cvt_f32_f16_sdwa v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-NEXT: v_cvt_f32_f16_e32 v15, v13 -; GFX10-NEXT: v_cvt_f32_f16_sdwa v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-NEXT: v_fmac_f32_e32 v14, v4, v8 -; GFX10-NEXT: v_fmac_f32_e32 v12, v5, v9 -; GFX10-NEXT: v_fmac_f32_e32 v15, v6, v10 -; GFX10-NEXT: v_fmac_f32_e32 v13, v7, v11 -; GFX10-NEXT: v_add_f32_e32 v0, v0, v14 -; GFX10-NEXT: v_add_f32_e32 v1, v1, v12 -; GFX10-NEXT: v_add_f32_e32 v2, v2, v15 -; GFX10-NEXT: v_add_f32_e32 v3, v3, v13 +; GFX10-NEXT: v_fma_mix_f32 v4, v4, v8, v12 op_sel_hi:[0,0,1] +; GFX10-NEXT: v_fma_mix_f32 v5, v5, v9, v12 op_sel:[0,0,1] op_sel_hi:[0,0,1] +; GFX10-NEXT: v_fma_mix_f32 v6, v6, v10, v13 op_sel_hi:[0,0,1] +; GFX10-NEXT: v_fma_mix_f32 v7, v7, v11, v13 op_sel:[0,0,1] op_sel_hi:[0,0,1] +; GFX10-NEXT: v_add_f32_e32 v0, v0, v4 +; GFX10-NEXT: v_add_f32_e32 v1, v1, v5 +; GFX10-NEXT: v_add_f32_e32 v2, v2, v6 +; GFX10-NEXT: v_add_f32_e32 v3, v3, v7 ; GFX10-NEXT: ; return to shader part epilog ; ; GFX10-CONTRACT-LABEL: test_v4f16_v4f32_add_fma_ext_mul_rhs: ; GFX10-CONTRACT: ; %bb.0: ; %.entry ; GFX10-CONTRACT-NEXT: v_pk_mul_f16 v12, v12, v14 ; GFX10-CONTRACT-NEXT: v_pk_mul_f16 v13, v13, v15 -; GFX10-CONTRACT-NEXT: v_cvt_f32_f16_e32 v14, v12 -; GFX10-CONTRACT-NEXT: v_cvt_f32_f16_sdwa v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-CONTRACT-NEXT: v_cvt_f32_f16_e32 v15, v13 -; GFX10-CONTRACT-NEXT: v_cvt_f32_f16_sdwa v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v14, v4, v8 -; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v12, v5, v9 -; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v15, v6, v10 -; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v13, v7, v11 -; GFX10-CONTRACT-NEXT: v_add_f32_e32 v0, v0, v14 -; GFX10-CONTRACT-NEXT: v_add_f32_e32 v1, v1, v12 -; GFX10-CONTRACT-NEXT: v_add_f32_e32 v2, v2, v15 -; GFX10-CONTRACT-NEXT: v_add_f32_e32 v3, v3, v13 +; GFX10-CONTRACT-NEXT: v_fma_mix_f32 v4, v4, v8, v12 op_sel_hi:[0,0,1] +; GFX10-CONTRACT-NEXT: v_fma_mix_f32 v5, v5, v9, v12 op_sel:[0,0,1] op_sel_hi:[0,0,1] +; GFX10-CONTRACT-NEXT: v_fma_mix_f32 v6, v6, v10, v13 op_sel_hi:[0,0,1] +; GFX10-CONTRACT-NEXT: v_fma_mix_f32 v7, v7, v11, v13 op_sel:[0,0,1] op_sel_hi:[0,0,1] +; GFX10-CONTRACT-NEXT: v_add_f32_e32 v0, v0, v4 +; GFX10-CONTRACT-NEXT: v_add_f32_e32 v1, v1, v5 +; GFX10-CONTRACT-NEXT: v_add_f32_e32 v2, v2, v6 +; GFX10-CONTRACT-NEXT: v_add_f32_e32 v3, v3, v7 ; GFX10-CONTRACT-NEXT: ; return to shader part epilog ; ; GFX10-DENORM-LABEL: test_v4f16_v4f32_add_fma_ext_mul_rhs: ; GFX10-DENORM: ; %bb.0: ; %.entry ; GFX10-DENORM-NEXT: v_pk_mul_f16 v12, v12, v14 ; GFX10-DENORM-NEXT: v_pk_mul_f16 v13, v13, v15 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v14, v12 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v12, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v15, v13 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v13, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_fmac_f32_e32 v14, v4, v8 -; GFX10-DENORM-NEXT: v_fmac_f32_e32 v12, v5, v9 -; GFX10-DENORM-NEXT: v_fmac_f32_e32 v15, v6, v10 -; GFX10-DENORM-NEXT: v_fmac_f32_e32 v13, v7, v11 -; GFX10-DENORM-NEXT: v_add_f32_e32 v0, v0, v14 -; GFX10-DENORM-NEXT: v_add_f32_e32 v1, v1, v12 -; GFX10-DENORM-NEXT: v_add_f32_e32 v2, v2, v15 -; GFX10-DENORM-NEXT: v_add_f32_e32 v3, v3, v13 +; GFX10-DENORM-NEXT: v_fma_mix_f32 v4, v4, v8, v12 op_sel_hi:[0,0,1] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v5, v5, v9, v12 op_sel:[0,0,1] op_sel_hi:[0,0,1] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v6, v6, v10, v13 op_sel_hi:[0,0,1] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v7, v7, v11, v13 op_sel:[0,0,1] op_sel_hi:[0,0,1] +; GFX10-DENORM-NEXT: v_add_f32_e32 v0, v0, v4 +; GFX10-DENORM-NEXT: v_add_f32_e32 v1, v1, v5 +; GFX10-DENORM-NEXT: v_add_f32_e32 v2, v2, v6 +; GFX10-DENORM-NEXT: v_add_f32_e32 v3, v3, v7 ; GFX10-DENORM-NEXT: ; return to shader part epilog .entry: %a = fmul <4 x half> %u, %v diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-mul.ll index 60e471d30413a..f3e5615783639 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-mul.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-mul.ll @@ -8,16 +8,15 @@ define amdgpu_vs float @test_f16_f32_add_ext_mul(half inreg %x, half inreg %y, float inreg %z) { ; GFX9-FAST-DENORM-LABEL: test_f16_f32_add_ext_mul: ; GFX9-FAST-DENORM: ; %bb.0: ; %.entry -; GFX9-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v0, s0 -; GFX9-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v1, s1 -; GFX9-FAST-DENORM-NEXT: v_mad_f32 v0, v0, v1, s2 +; GFX9-FAST-DENORM-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-FAST-DENORM-NEXT: v_mov_b32_e32 v1, s2 +; GFX9-FAST-DENORM-NEXT: v_mad_mix_f32 v0, s0, v0, v1 op_sel_hi:[1,1,0] ; GFX9-FAST-DENORM-NEXT: ; return to shader part epilog ; ; GFX10-FAST-DENORM-LABEL: test_f16_f32_add_ext_mul: ; GFX10-FAST-DENORM: ; %bb.0: ; %.entry -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v0, s0 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v1, s1 -; GFX10-FAST-DENORM-NEXT: v_fma_f32 v0, v0, v1, s2 +; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v0, s2 +; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v0, s0, s1, v0 op_sel_hi:[1,1,0] ; GFX10-FAST-DENORM-NEXT: ; return to shader part epilog .entry: %a = fmul fast half %x, %y @@ -29,16 +28,15 @@ define amdgpu_vs float @test_f16_f32_add_ext_mul(half inreg %x, half inreg %y, f define amdgpu_vs float @test_f16_f32_add_ext_mul_rhs(half inreg %x, half inreg %y, float inreg %z) { ; GFX9-FAST-DENORM-LABEL: test_f16_f32_add_ext_mul_rhs: ; GFX9-FAST-DENORM: ; %bb.0: ; %.entry -; GFX9-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v0, s0 -; GFX9-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v1, s1 -; GFX9-FAST-DENORM-NEXT: v_mad_f32 v0, v0, v1, s2 +; GFX9-FAST-DENORM-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-FAST-DENORM-NEXT: v_mov_b32_e32 v1, s2 +; GFX9-FAST-DENORM-NEXT: v_mad_mix_f32 v0, s0, v0, v1 op_sel_hi:[1,1,0] ; GFX9-FAST-DENORM-NEXT: ; return to shader part epilog ; ; GFX10-FAST-DENORM-LABEL: test_f16_f32_add_ext_mul_rhs: ; GFX10-FAST-DENORM: ; %bb.0: ; %.entry -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v0, s0 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v1, s1 -; GFX10-FAST-DENORM-NEXT: v_fma_f32 v0, v0, v1, s2 +; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v0, s2 +; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v0, s0, s1, v0 op_sel_hi:[1,1,0] ; GFX10-FAST-DENORM-NEXT: ; return to shader part epilog .entry: %a = fmul fast half %x, %y @@ -70,25 +68,16 @@ define amdgpu_vs <5 x float> @test_5xf16_5xf32_add_ext_mul(<5 x half> inreg %x, ; ; GFX10-FAST-DENORM-LABEL: test_5xf16_5xf32_add_ext_mul: ; GFX10-FAST-DENORM: ; %bb.0: ; %.entry -; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s11, s0, 16 -; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s12, s1, 16 -; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s13, s3, 16 -; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s14, s4, 16 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v0, s0 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v1, s11 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v2, s1 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v3, s12 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v4, s2 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v5, s3 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v6, s13 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v7, s4 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v8, s14 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v9, s5 -; GFX10-FAST-DENORM-NEXT: v_fma_f32 v0, v0, v5, s6 -; GFX10-FAST-DENORM-NEXT: v_fma_f32 v1, v1, v6, s7 -; GFX10-FAST-DENORM-NEXT: v_fma_f32 v2, v2, v7, s8 -; GFX10-FAST-DENORM-NEXT: v_fma_f32 v3, v3, v8, s9 -; GFX10-FAST-DENORM-NEXT: v_fma_f32 v4, v4, v9, s10 +; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v0, s6 +; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v1, s7 +; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v2, s8 +; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v3, s9 +; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v4, s10 +; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v0, s0, s3, v0 op_sel_hi:[1,1,0] +; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v1, s0, s3, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v2, s1, s4, v2 op_sel_hi:[1,1,0] +; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v3, s1, s4, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v4, s2, s5, v4 op_sel_hi:[1,1,0] ; GFX10-FAST-DENORM-NEXT: ; return to shader part epilog .entry: %a = fmul fast <5 x half> %x, %y @@ -122,30 +111,18 @@ define amdgpu_vs <6 x float> @test_6xf16_6xf32_add_ext_mul_rhs(<6 x half> inreg ; ; GFX10-FAST-DENORM-LABEL: test_6xf16_6xf32_add_ext_mul_rhs: ; GFX10-FAST-DENORM: ; %bb.0: ; %.entry -; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s12, s0, 16 -; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s13, s1, 16 -; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s14, s2, 16 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v0, s0 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v2, s1 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v4, s2 -; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s0, s3, 16 -; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s1, s4, 16 -; GFX10-FAST-DENORM-NEXT: s_lshr_b32 s2, s5, 16 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v1, s12 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v3, s13 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v5, s14 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v6, s3 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v7, s0 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v8, s4 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v9, s1 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v10, s5 -; GFX10-FAST-DENORM-NEXT: v_cvt_f32_f16_e32 v11, s2 -; GFX10-FAST-DENORM-NEXT: v_fma_f32 v0, v0, v6, s6 -; GFX10-FAST-DENORM-NEXT: v_fma_f32 v1, v1, v7, s7 -; GFX10-FAST-DENORM-NEXT: v_fma_f32 v2, v2, v8, s8 -; GFX10-FAST-DENORM-NEXT: v_fma_f32 v3, v3, v9, s9 -; GFX10-FAST-DENORM-NEXT: v_fma_f32 v4, v4, v10, s10 -; GFX10-FAST-DENORM-NEXT: v_fma_f32 v5, v5, v11, s11 +; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v0, s6 +; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v1, s7 +; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v2, s8 +; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v3, s9 +; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v4, s10 +; GFX10-FAST-DENORM-NEXT: v_mov_b32_e32 v5, s11 +; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v0, s0, s3, v0 op_sel_hi:[1,1,0] +; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v1, s0, s3, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v2, s1, s4, v2 op_sel_hi:[1,1,0] +; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v3, s1, s4, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v4, s2, s5, v4 op_sel_hi:[1,1,0] +; GFX10-FAST-DENORM-NEXT: v_fma_mix_f32 v5, s2, s5, v5 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; GFX10-FAST-DENORM-NEXT: ; return to shader part epilog .entry: %a = fmul fast <6 x half> %x, %y diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-ext-mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-ext-mul.ll index d846ca98f9419..d225626ff62bc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-ext-mul.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-ext-mul.ll @@ -6,16 +6,12 @@ define amdgpu_vs float @test_f16_to_f32_sub_ext_mul(half %x, half %y, float %z) { ; GFX9-DENORM-LABEL: test_f16_to_f32_sub_ext_mul: ; GFX9-DENORM: ; %bb.0: ; %entry -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v1, -v2 +; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0] ; GFX9-DENORM-NEXT: ; return to shader part epilog ; ; GFX10-DENORM-LABEL: test_f16_to_f32_sub_ext_mul: ; GFX10-DENORM: ; %bb.0: ; %entry -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX10-DENORM-NEXT: v_fma_f32 v0, v0, v1, -v2 +; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0] ; GFX10-DENORM-NEXT: ; return to shader part epilog entry: %a = fmul fast half %x, %y @@ -28,16 +24,12 @@ entry: define amdgpu_vs float @test_f16_to_f32_sub_ext_mul_rhs(float %x, half %y, half %z) { ; GFX9-DENORM-LABEL: test_f16_to_f32_sub_ext_mul_rhs: ; GFX9-DENORM: ; %bb.0: ; %.entry -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX9-DENORM-NEXT: v_mad_f32 v0, -v1, v2, v0 +; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, -v1, v2, v0 op_sel_hi:[1,1,0] ; GFX9-DENORM-NEXT: ; return to shader part epilog ; ; GFX10-DENORM-LABEL: test_f16_to_f32_sub_ext_mul_rhs: ; GFX10-DENORM: ; %bb.0: ; %.entry -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX10-DENORM-NEXT: v_fma_f32 v0, -v1, v2, v0 +; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, -v1, v2, v0 op_sel_hi:[1,1,0] ; GFX10-DENORM-NEXT: ; return to shader part epilog .entry: %a = fmul fast half %y, %z @@ -64,18 +56,12 @@ define amdgpu_vs <4 x float> @test_v4f16_to_v4f32_sub_ext_mul(<4 x half> %x, <4 ; ; GFX10-DENORM-LABEL: test_v4f16_to_v4f32_sub_ext_mul: ; GFX10-DENORM: ; %bb.0: ; %entry -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v8, v0 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v9, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v10, v1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v11, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v0, v2 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v2, v3 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_fma_f32 v0, v8, v0, -v4 -; GFX10-DENORM-NEXT: v_fma_f32 v1, v9, v1, -v5 -; GFX10-DENORM-NEXT: v_fma_f32 v2, v10, v2, -v6 -; GFX10-DENORM-NEXT: v_fma_f32 v3, v11, v3, -v7 +; GFX10-DENORM-NEXT: v_fma_mix_f32 v4, v0, v2, -v4 op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v5, v0, v2, -v5 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v2, v1, v3, -v6 op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v3, v1, v3, -v7 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_mov_b32_e32 v0, v4 +; GFX10-DENORM-NEXT: v_mov_b32_e32 v1, v5 ; GFX10-DENORM-NEXT: ; return to shader part epilog entry: %a = fmul fast <4 x half> %x, %y @@ -102,18 +88,10 @@ define amdgpu_vs <4 x float> @test_v4f16_to_v4f32_sub_ext_mul_rhs(<4 x float> %x ; ; GFX10-DENORM-LABEL: test_v4f16_to_v4f32_sub_ext_mul_rhs: ; GFX10-DENORM: ; %bb.0: ; %.entry -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v8, v4 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v9, v5 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v10, v6 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v11, v7 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_fma_f32 v0, -v8, v10, v0 -; GFX10-DENORM-NEXT: v_fma_f32 v1, -v4, v6, v1 -; GFX10-DENORM-NEXT: v_fma_f32 v2, -v9, v11, v2 -; GFX10-DENORM-NEXT: v_fma_f32 v3, -v5, v7, v3 +; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, -v4, v6, v0 op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v1, -v4, v6, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v2, -v5, v7, v2 op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v3, -v5, v7, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; GFX10-DENORM-NEXT: ; return to shader part epilog .entry: %a = fmul fast <4 x half> %y, %z diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-ext-neg-mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-ext-neg-mul.ll index 84002c0e3f22b..920f099164349 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-ext-neg-mul.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-ext-neg-mul.ll @@ -6,16 +6,12 @@ define amdgpu_vs float @test_f16_to_f32_sub_ext_neg_mul(half %x, half %y, float %z) { ; GFX9-DENORM-LABEL: test_f16_to_f32_sub_ext_neg_mul: ; GFX9-DENORM: ; %bb.0: ; %entry -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e64 v1, -v1 -; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v1, -v2 +; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v0, -v1, -v2 op_sel_hi:[1,1,0] ; GFX9-DENORM-NEXT: ; return to shader part epilog ; ; GFX10-DENORM-LABEL: test_f16_to_f32_sub_ext_neg_mul: ; GFX10-DENORM: ; %bb.0: ; %entry -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e64 v1, -v1 -; GFX10-DENORM-NEXT: v_fma_f32 v0, v0, v1, -v2 +; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, v0, -v1, -v2 op_sel_hi:[1,1,0] ; GFX10-DENORM-NEXT: ; return to shader part epilog entry: %a = fmul fast half %x, %y @@ -29,16 +25,12 @@ entry: define amdgpu_vs float @test_f16_to_f32_sub_neg_ext_mul(half %x, half %y, float %z) { ; GFX9-DENORM-LABEL: test_f16_to_f32_sub_neg_ext_mul: ; GFX9-DENORM: ; %bb.0: ; %entry -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e64 v1, -v1 -; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v1, -v2 +; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, v0, -v1, -v2 op_sel_hi:[1,1,0] ; GFX9-DENORM-NEXT: ; return to shader part epilog ; ; GFX10-DENORM-LABEL: test_f16_to_f32_sub_neg_ext_mul: ; GFX10-DENORM: ; %bb.0: ; %entry -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e64 v1, -v1 -; GFX10-DENORM-NEXT: v_fma_f32 v0, v0, v1, -v2 +; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, v0, -v1, -v2 op_sel_hi:[1,1,0] ; GFX10-DENORM-NEXT: ; return to shader part epilog entry: %a = fmul fast half %x, %y @@ -53,16 +45,12 @@ entry: define amdgpu_vs float @test_f16_to_f32_sub_ext_neg_mul2(float %x, half %y, half %z) { ; GFX9-DENORM-LABEL: test_f16_to_f32_sub_ext_neg_mul2: ; GFX9-DENORM: ; %bb.0: ; %entry -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e64 v2, -v2 -; GFX9-DENORM-NEXT: v_mad_f32 v0, -v1, v2, v0 +; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, -v1, -v2, v0 op_sel_hi:[1,1,0] ; GFX9-DENORM-NEXT: ; return to shader part epilog ; ; GFX10-DENORM-LABEL: test_f16_to_f32_sub_ext_neg_mul2: ; GFX10-DENORM: ; %bb.0: ; %entry -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e64 v2, -v2 -; GFX10-DENORM-NEXT: v_fma_f32 v0, -v1, v2, v0 +; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, -v1, -v2, v0 op_sel_hi:[1,1,0] ; GFX10-DENORM-NEXT: ; return to shader part epilog entry: %a = fmul fast half %y, %z @@ -76,16 +64,12 @@ entry: define amdgpu_vs float @test_f16_to_f32_sub_neg_ext_mul2(float %x, half %y, half %z) { ; GFX9-DENORM-LABEL: test_f16_to_f32_sub_neg_ext_mul2: ; GFX9-DENORM: ; %bb.0: ; %entry -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX9-DENORM-NEXT: v_cvt_f32_f16_e64 v2, -v2 -; GFX9-DENORM-NEXT: v_mad_f32 v0, -v1, v2, v0 +; GFX9-DENORM-NEXT: v_mad_mix_f32 v0, -v1, -v2, v0 op_sel_hi:[1,1,0] ; GFX9-DENORM-NEXT: ; return to shader part epilog ; ; GFX10-DENORM-LABEL: test_f16_to_f32_sub_neg_ext_mul2: ; GFX10-DENORM: ; %bb.0: ; %entry -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v1, v1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e64 v2, -v2 -; GFX10-DENORM-NEXT: v_fma_f32 v0, -v1, v2, v0 +; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, -v1, -v2, v0 op_sel_hi:[1,1,0] ; GFX10-DENORM-NEXT: ; return to shader part epilog entry: %a = fmul fast half %y, %z @@ -113,20 +97,13 @@ define amdgpu_vs <4 x float> @test_v4f16_to_v4f32_sub_ext_neg_mul(<4 x half> %x, ; ; GFX10-DENORM-LABEL: test_v4f16_to_v4f32_sub_ext_neg_mul: ; GFX10-DENORM: ; %bb.0: ; %entry -; GFX10-DENORM-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 -; GFX10-DENORM-NEXT: v_xor_b32_e32 v3, 0x80008000, v3 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v8, v0 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v9, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v10, v1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v11, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v0, v2 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v2, v3 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_fma_f32 v0, v8, v0, -v4 -; GFX10-DENORM-NEXT: v_fma_f32 v1, v9, v1, -v5 -; GFX10-DENORM-NEXT: v_fma_f32 v2, v10, v2, -v6 -; GFX10-DENORM-NEXT: v_fma_f32 v3, v11, v3, -v7 +; GFX10-DENORM-NEXT: v_xor_b32_e32 v8, 0x80008000, v2 +; GFX10-DENORM-NEXT: v_xor_b32_e32 v9, 0x80008000, v3 +; GFX10-DENORM-NEXT: v_fma_mix_f32 v5, v0, -v2, -v5 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v3, v1, -v3, -v7 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, v0, v8, -v4 op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v2, v1, v9, -v6 op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_mov_b32_e32 v1, v5 ; GFX10-DENORM-NEXT: ; return to shader part epilog entry: %a = fmul fast <4 x half> %x, %y @@ -154,20 +131,13 @@ define amdgpu_vs <4 x float> @test_v4f16_to_v4f32_sub_neg_ext_mul(<4 x half> %x, ; ; GFX10-DENORM-LABEL: test_v4f16_to_v4f32_sub_neg_ext_mul: ; GFX10-DENORM: ; %bb.0: ; %entry -; GFX10-DENORM-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 -; GFX10-DENORM-NEXT: v_xor_b32_e32 v3, 0x80008000, v3 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v8, v0 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v9, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v10, v1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v11, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v0, v2 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v2, v3 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_fma_f32 v0, v8, v0, -v4 -; GFX10-DENORM-NEXT: v_fma_f32 v1, v9, v1, -v5 -; GFX10-DENORM-NEXT: v_fma_f32 v2, v10, v2, -v6 -; GFX10-DENORM-NEXT: v_fma_f32 v3, v11, v3, -v7 +; GFX10-DENORM-NEXT: v_xor_b32_e32 v8, 0x80008000, v2 +; GFX10-DENORM-NEXT: v_xor_b32_e32 v9, 0x80008000, v3 +; GFX10-DENORM-NEXT: v_fma_mix_f32 v5, v0, -v2, -v5 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v3, v1, -v3, -v7 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, v0, v8, -v4 op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v2, v1, v9, -v6 op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_mov_b32_e32 v1, v5 ; GFX10-DENORM-NEXT: ; return to shader part epilog entry: %a = fmul fast <4 x half> %x, %y @@ -196,20 +166,12 @@ define amdgpu_vs <4 x float> @test_v4f16_to_v4f32_sub_ext_neg_mul2(<4 x float> % ; ; GFX10-DENORM-LABEL: test_v4f16_to_v4f32_sub_ext_neg_mul2: ; GFX10-DENORM: ; %bb.0: ; %entry -; GFX10-DENORM-NEXT: v_xor_b32_e32 v6, 0x80008000, v6 -; GFX10-DENORM-NEXT: v_xor_b32_e32 v7, 0x80008000, v7 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v8, v4 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v9, v5 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v10, v6 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v11, v7 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_fma_f32 v0, -v8, v10, v0 -; GFX10-DENORM-NEXT: v_fma_f32 v1, -v4, v6, v1 -; GFX10-DENORM-NEXT: v_fma_f32 v2, -v9, v11, v2 -; GFX10-DENORM-NEXT: v_fma_f32 v3, -v5, v7, v3 +; GFX10-DENORM-NEXT: v_xor_b32_e32 v8, 0x80008000, v6 +; GFX10-DENORM-NEXT: v_xor_b32_e32 v9, 0x80008000, v7 +; GFX10-DENORM-NEXT: v_fma_mix_f32 v1, -v4, -v6, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v3, -v5, -v7, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, -v4, v8, v0 op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v2, -v5, v9, v2 op_sel_hi:[1,1,0] ; GFX10-DENORM-NEXT: ; return to shader part epilog entry: %a = fmul fast <4 x half> %y, %z @@ -237,20 +199,12 @@ define amdgpu_vs <4 x float> @test_v4f16_to_v4f32_sub_neg_ext_mul2(<4 x float> % ; ; GFX10-DENORM-LABEL: test_v4f16_to_v4f32_sub_neg_ext_mul2: ; GFX10-DENORM: ; %bb.0: ; %entry -; GFX10-DENORM-NEXT: v_xor_b32_e32 v6, 0x80008000, v6 -; GFX10-DENORM-NEXT: v_xor_b32_e32 v7, 0x80008000, v7 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v8, v4 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v9, v5 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v10, v6 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_e32 v11, v7 -; GFX10-DENORM-NEXT: v_cvt_f32_f16_sdwa v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-DENORM-NEXT: v_fma_f32 v0, -v8, v10, v0 -; GFX10-DENORM-NEXT: v_fma_f32 v1, -v4, v6, v1 -; GFX10-DENORM-NEXT: v_fma_f32 v2, -v9, v11, v2 -; GFX10-DENORM-NEXT: v_fma_f32 v3, -v5, v7, v3 +; GFX10-DENORM-NEXT: v_xor_b32_e32 v8, 0x80008000, v6 +; GFX10-DENORM-NEXT: v_xor_b32_e32 v9, 0x80008000, v7 +; GFX10-DENORM-NEXT: v_fma_mix_f32 v1, -v4, -v6, v1 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v3, -v5, -v7, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v0, -v4, v8, v0 op_sel_hi:[1,1,0] +; GFX10-DENORM-NEXT: v_fma_mix_f32 v2, -v5, v9, v2 op_sel_hi:[1,1,0] ; GFX10-DENORM-NEXT: ; return to shader part epilog entry: %a = fmul fast <4 x half> %y, %z diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fsub-fneg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fsub-fneg.mir new file mode 100644 index 0000000000000..2bce205735299 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fsub-fneg.mir @@ -0,0 +1,387 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -run-pass=amdgpu-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: test_f16_poszero_nsz +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_f16_poszero_nsz + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %input:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG %input + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: %res:_(s32) = G_ANYEXT [[FCANONICALIZE]](s16) + ; CHECK-NEXT: $vgpr0 = COPY %res(s32) + %0:_(s32) = COPY $vgpr0 + %input:_(s16) = G_TRUNC %0 + %cst:_(s16) = G_FCONSTANT half 0.0 + %sub:_(s16) = nsz G_FSUB %cst, %input + %res:_(s32) = G_ANYEXT %sub + $vgpr0 = COPY %res +... + +--- +name: test_f16_poszero_nonsz_nofold +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_f16_poszero_nonsz_nofold + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %input:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: %cst:_(s16) = G_FCONSTANT half 0xH0000 + ; CHECK-NEXT: %sub:_(s16) = G_FSUB %cst, %input + ; CHECK-NEXT: %res:_(s32) = G_ANYEXT %sub(s16) + ; CHECK-NEXT: $vgpr0 = COPY %res(s32) + %0:_(s32) = COPY $vgpr0 + %input:_(s16) = G_TRUNC %0 + %cst:_(s16) = G_FCONSTANT half 0.0 + %sub:_(s16) = G_FSUB %cst, %input + %res:_(s32) = G_ANYEXT %sub + $vgpr0 = COPY %res +... + +--- +name: test_f16_negzero +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_f16_negzero + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %input:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG %input + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: %res:_(s32) = G_ANYEXT [[FCANONICALIZE]](s16) + ; CHECK-NEXT: $vgpr0 = COPY %res(s32) + %0:_(s32) = COPY $vgpr0 + %input:_(s16) = G_TRUNC %0 + %cst:_(s16) = G_FCONSTANT half -0.0 + %sub:_(s16) = G_FSUB %cst, %input + %res:_(s32) = G_ANYEXT %sub + $vgpr0 = COPY %res +... + +--- +name: test_f32_poszero_nsz +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_f32_poszero_nsz + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %input:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG %input + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: $vgpr0 = COPY [[FCANONICALIZE]](s32) + %input:_(s32) = COPY $vgpr0 + %cst:_(s32) = G_FCONSTANT float 0.0 + %sub:_(s32) = nsz G_FSUB %cst, %input + $vgpr0 = COPY %sub +... + +--- +name: test_f32_poszero_nonsz_nofold +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_f32_poszero_nonsz_nofold + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %input:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: %cst:_(s32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: %sub:_(s32) = G_FSUB %cst, %input + ; CHECK-NEXT: $vgpr0 = COPY %sub(s32) + %input:_(s32) = COPY $vgpr0 + %cst:_(s32) = G_FCONSTANT float 0.0 + %sub:_(s32) = G_FSUB %cst, %input + $vgpr0 = COPY %sub +... + +--- +name: test_f32_negzero +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_f32_negzero + ; CHECK: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %input:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG %input + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: $vgpr0 = COPY [[FCANONICALIZE]](s32) + %input:_(s32) = COPY $vgpr0 + %cst:_(s32) = G_FCONSTANT float -0.0 + %sub:_(s32) = G_FSUB %cst, %input + $vgpr0 = COPY %sub +... + +--- +name: test_f64_poszero_nsz +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_f64_poszero_nsz + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %input:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG %input + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FCANONICALIZE]](s64) + %input:_(s64) = COPY $vgpr0_vgpr1 + %cst:_(s64) = G_FCONSTANT double 0.0 + %sub:_(s64) = nsz G_FSUB %cst, %input + $vgpr0_vgpr1 = COPY %sub +... + +--- +name: test_f64_poszero_nonsz_nofold +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_f64_poszero_nonsz_nofold + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %input:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: %cst:_(s64) = G_FCONSTANT double 0.000000e+00 + ; CHECK-NEXT: %sub:_(s64) = G_FSUB %cst, %input + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %sub(s64) + %input:_(s64) = COPY $vgpr0_vgpr1 + %cst:_(s64) = G_FCONSTANT double 0.0 + %sub:_(s64) = G_FSUB %cst, %input + $vgpr0_vgpr1 = COPY %sub +... + +--- +name: test_f64_negzero +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_f64_negzero + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %input:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG %input + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FCANONICALIZE]](s64) + %input:_(s64) = COPY $vgpr0_vgpr1 + %cst:_(s64) = G_FCONSTANT double -0.0 + %sub:_(s64) = G_FSUB %cst, %input + $vgpr0_vgpr1 = COPY %sub +... + +--- +name: test_v4f16_poszero_nsz +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_v4f16_poszero_nsz + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %input:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(<4 x s16>) = G_FNEG %input + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<4 x s16>) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FCANONICALIZE]](<4 x s16>) + %input:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %cst:_(s16) = G_FCONSTANT half 0.0 + %veccst:_(<4 x s16>) = G_BUILD_VECTOR %cst, %cst, %cst, %cst + %sub:_(<4 x s16>) = nsz G_FSUB %veccst, %input + $vgpr0_vgpr1 = COPY %sub +... + +--- +name: test_v4f16_poszero_nonsz_nofold +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_v4f16_poszero_nonsz_nofold + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %input:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: %cst:_(s16) = G_FCONSTANT half 0xH0000 + ; CHECK-NEXT: %veccst:_(<4 x s16>) = G_BUILD_VECTOR %cst(s16), %cst(s16), %cst(s16), %cst(s16) + ; CHECK-NEXT: %sub:_(<4 x s16>) = G_FSUB %veccst, %input + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %sub(<4 x s16>) + %input:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %cst:_(s16) = G_FCONSTANT half 0.0 + %veccst:_(<4 x s16>) = G_BUILD_VECTOR %cst, %cst, %cst, %cst + %sub:_(<4 x s16>) = G_FSUB %veccst, %input + $vgpr0_vgpr1 = COPY %sub +... + +--- +name: test_v4f16_negzero +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_v4f16_negzero + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %input:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(<4 x s16>) = G_FNEG %input + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<4 x s16>) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[FCANONICALIZE]](<4 x s16>) + %input:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %cst:_(s16) = G_FCONSTANT half -0.0 + %veccst:_(<4 x s16>) = G_BUILD_VECTOR %cst, %cst, %cst, %cst + %sub:_(<4 x s16>) = G_FSUB %veccst, %input + $vgpr0_vgpr1 = COPY %sub +... + +--- +name: test_v4f32 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_v4f32 + ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %input:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(<4 x s32>) = G_FNEG %input + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<4 x s32>) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FCANONICALIZE]](<4 x s32>) + %input:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %cst:_(s32) = G_FCONSTANT float 0.0 + %veccst:_(<4 x s32>) = G_BUILD_VECTOR %cst, %cst, %cst, %cst + %sub:_(<4 x s32>) = nsz G_FSUB %veccst, %input + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %sub +... + +--- +name: test_v4f32_negzero +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_v4f32_negzero + ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %input:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(<4 x s32>) = G_FNEG %input + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<4 x s32>) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FCANONICALIZE]](<4 x s32>) + %input:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %cst:_(s32) = G_FCONSTANT float -0.0 + %veccst:_(<4 x s32>) = G_BUILD_VECTOR %cst, %cst, %cst, %cst + %sub:_(<4 x s32>) = G_FSUB %veccst, %input + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %sub +... + +--- +name: test_v4f32_negzero_undef_elt +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_v4f32_negzero_undef_elt + ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %input:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(<4 x s32>) = G_FNEG %input + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<4 x s32>) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FCANONICALIZE]](<4 x s32>) + %input:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %cst:_(s32) = G_FCONSTANT float -0.0 + %undef:_(s32) = G_IMPLICIT_DEF + %veccst:_(<4 x s32>) = G_BUILD_VECTOR %cst, %undef, %cst, %cst + %sub:_(<4 x s32>) = G_FSUB %veccst, %input + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %sub +... + +--- +name: test_v4f32_poszero_undef_elt +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_v4f32_poszero_undef_elt + ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %input:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(<4 x s32>) = G_FNEG %input + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<4 x s32>) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FCANONICALIZE]](<4 x s32>) + %input:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %cst:_(s32) = G_FCONSTANT float 0.0 + %undef:_(s32) = G_IMPLICIT_DEF + %veccst:_(<4 x s32>) = G_BUILD_VECTOR %cst, %undef, %cst, %cst + %sub:_(<4 x s32>) = nsz G_FSUB %veccst, %input + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %sub +... + +--- +name: test_v2f64 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_v2f64 + ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %input:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s64>) = G_FNEG %input + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s64>) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FCANONICALIZE]](<2 x s64>) + %input:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %cst:_(s64) = G_FCONSTANT double 0.0 + %veccst:_(<2 x s64>) = G_BUILD_VECTOR %cst, %cst + %sub:_(<2 x s64>) = nsz G_FSUB %veccst, %input + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %sub +... + +--- +name: test_v2f64_negzero +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_v2f64_negzero + ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %input:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s64>) = G_FNEG %input + ; CHECK-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s64>) = G_FCANONICALIZE [[FNEG]] + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FCANONICALIZE]](<2 x s64>) + %input:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %cst:_(s64) = G_FCONSTANT double -0.0 + %veccst:_(<2 x s64>) = G_BUILD_VECTOR %cst, %cst + %sub:_(<2 x s64>) = G_FSUB %veccst, %input + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %sub +... + diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll index 5194410266813..093b40114d5ff 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll @@ -968,12 +968,8 @@ define i128 @extractelement_vgpr_v4i128_idx1(<4 x i128> addrspace(1)* %ptr) { ; GFX9-LABEL: extractelement_vgpr_v4i128_idx1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 +; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:16 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, v4 -; GFX9-NEXT: v_mov_b32_e32 v1, v5 -; GFX9-NEXT: v_mov_b32_e32 v2, v6 -; GFX9-NEXT: v_mov_b32_e32 v3, v7 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: extractelement_vgpr_v4i128_idx1: @@ -981,12 +977,8 @@ define i128 @extractelement_vgpr_v4i128_idx1(<4 x i128> addrspace(1)* %ptr) { ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[0:1] +; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v0, v4 -; GFX8-NEXT: v_mov_b32_e32 v1, v5 -; GFX8-NEXT: v_mov_b32_e32 v2, v6 -; GFX8-NEXT: v_mov_b32_e32 v3, v7 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-LABEL: extractelement_vgpr_v4i128_idx1: @@ -995,34 +987,24 @@ define i128 @extractelement_vgpr_v4i128_idx1(<4 x i128> addrspace(1)* %ptr) { ; GFX7-NEXT: s_mov_b32 s6, 0 ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 -; GFX7-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[4:7], 0 addr64 offset:16 +; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 offset:16 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v0, v4 -; GFX7-NEXT: v_mov_b32_e32 v1, v5 -; GFX7-NEXT: v_mov_b32_e32 v2, v6 -; GFX7-NEXT: v_mov_b32_e32 v3, v7 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: extractelement_vgpr_v4i128_idx1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:16 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v0, v4 -; GFX10-NEXT: v_mov_b32_e32 v1, v5 -; GFX10-NEXT: v_mov_b32_e32 v2, v6 -; GFX10-NEXT: v_mov_b32_e32 v3, v7 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: extractelement_vgpr_v4i128_idx1: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16 +; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off offset:16 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5 -; GFX11-NEXT: v_dual_mov_b32 v2, v6 :: v_dual_mov_b32 v3, v7 ; GFX11-NEXT: s_setpc_b64 s[30:31] %vector = load <4 x i128>, <4 x i128> addrspace(1)* %ptr %element = extractelement <4 x i128> %vector, i32 1 @@ -1033,12 +1015,8 @@ define i128 @extractelement_vgpr_v4i128_idx2(<4 x i128> addrspace(1)* %ptr) { ; GFX9-LABEL: extractelement_vgpr_v4i128_idx2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:32 +; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:32 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, v8 -; GFX9-NEXT: v_mov_b32_e32 v1, v9 -; GFX9-NEXT: v_mov_b32_e32 v2, v10 -; GFX9-NEXT: v_mov_b32_e32 v3, v11 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: extractelement_vgpr_v4i128_idx2: @@ -1046,12 +1024,8 @@ define i128 @extractelement_vgpr_v4i128_idx2(<4 x i128> addrspace(1)* %ptr) { ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 32, v0 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GFX8-NEXT: flat_load_dwordx4 v[8:11], v[0:1] +; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v0, v8 -; GFX8-NEXT: v_mov_b32_e32 v1, v9 -; GFX8-NEXT: v_mov_b32_e32 v2, v10 -; GFX8-NEXT: v_mov_b32_e32 v3, v11 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-LABEL: extractelement_vgpr_v4i128_idx2: @@ -1060,34 +1034,24 @@ define i128 @extractelement_vgpr_v4i128_idx2(<4 x i128> addrspace(1)* %ptr) { ; GFX7-NEXT: s_mov_b32 s6, 0 ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 -; GFX7-NEXT: buffer_load_dwordx4 v[8:11], v[0:1], s[4:7], 0 addr64 offset:32 +; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 offset:32 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v0, v8 -; GFX7-NEXT: v_mov_b32_e32 v1, v9 -; GFX7-NEXT: v_mov_b32_e32 v2, v10 -; GFX7-NEXT: v_mov_b32_e32 v3, v11 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: extractelement_vgpr_v4i128_idx2: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:32 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:32 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v0, v8 -; GFX10-NEXT: v_mov_b32_e32 v1, v9 -; GFX10-NEXT: v_mov_b32_e32 v2, v10 -; GFX10-NEXT: v_mov_b32_e32 v3, v11 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: extractelement_vgpr_v4i128_idx2: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: global_load_b128 v[8:11], v[0:1], off offset:32 +; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off offset:32 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_dual_mov_b32 v0, v8 :: v_dual_mov_b32 v1, v9 -; GFX11-NEXT: v_dual_mov_b32 v2, v10 :: v_dual_mov_b32 v3, v11 ; GFX11-NEXT: s_setpc_b64 s[30:31] %vector = load <4 x i128>, <4 x i128> addrspace(1)* %ptr %element = extractelement <4 x i128> %vector, i32 2 @@ -1098,12 +1062,8 @@ define i128 @extractelement_vgpr_v4i128_idx3(<4 x i128> addrspace(1)* %ptr) { ; GFX9-LABEL: extractelement_vgpr_v4i128_idx3: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:48 +; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:48 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, v12 -; GFX9-NEXT: v_mov_b32_e32 v1, v13 -; GFX9-NEXT: v_mov_b32_e32 v2, v14 -; GFX9-NEXT: v_mov_b32_e32 v3, v15 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: extractelement_vgpr_v4i128_idx3: @@ -1111,12 +1071,8 @@ define i128 @extractelement_vgpr_v4i128_idx3(<4 x i128> addrspace(1)* %ptr) { ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 48, v0 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GFX8-NEXT: flat_load_dwordx4 v[12:15], v[0:1] +; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v0, v12 -; GFX8-NEXT: v_mov_b32_e32 v1, v13 -; GFX8-NEXT: v_mov_b32_e32 v2, v14 -; GFX8-NEXT: v_mov_b32_e32 v3, v15 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-LABEL: extractelement_vgpr_v4i128_idx3: @@ -1125,34 +1081,24 @@ define i128 @extractelement_vgpr_v4i128_idx3(<4 x i128> addrspace(1)* %ptr) { ; GFX7-NEXT: s_mov_b32 s6, 0 ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 -; GFX7-NEXT: buffer_load_dwordx4 v[12:15], v[0:1], s[4:7], 0 addr64 offset:48 +; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 offset:48 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v0, v12 -; GFX7-NEXT: v_mov_b32_e32 v1, v13 -; GFX7-NEXT: v_mov_b32_e32 v2, v14 -; GFX7-NEXT: v_mov_b32_e32 v3, v15 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: extractelement_vgpr_v4i128_idx3: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:48 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:48 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v0, v12 -; GFX10-NEXT: v_mov_b32_e32 v1, v13 -; GFX10-NEXT: v_mov_b32_e32 v2, v14 -; GFX10-NEXT: v_mov_b32_e32 v3, v15 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: extractelement_vgpr_v4i128_idx3: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: global_load_b128 v[12:15], v[0:1], off offset:48 +; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off offset:48 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_dual_mov_b32 v0, v12 :: v_dual_mov_b32 v1, v13 -; GFX11-NEXT: v_dual_mov_b32 v2, v14 :: v_dual_mov_b32 v3, v15 ; GFX11-NEXT: s_setpc_b64 s[30:31] %vector = load <4 x i128>, <4 x i128> addrspace(1)* %ptr %element = extractelement <4 x i128> %vector, i32 3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3.ll index 2e9a66c579cbe..92961ab1c4dda 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3.ll @@ -23,7 +23,7 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0(float addrs ; SI-NEXT: s_mov_b64 s[8:9], s[6:7] ; SI-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_sub_f32_e32 v2, 0x80000000, v2 +; SI-NEXT: v_mul_f32_e32 v2, -1.0, v2 ; SI-NEXT: v_med3_f32 v2, v2, v3, v4 ; SI-NEXT: s_mov_b64 s[2:3], s[10:11] ; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 @@ -56,7 +56,7 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0(float addrs ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v6 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: v_sub_f32_e32 v4, 0x80000000, v7 +; VI-NEXT: v_mul_f32_e32 v4, -1.0, v7 ; VI-NEXT: v_med3_f32 v2, v4, v2, v3 ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm @@ -72,7 +72,7 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0(float addrs ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_load_dword v3, v0, s[6:7] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_sub_f32_e32 v1, 0x80000000, v1 +; GFX9-NEXT: v_max_f32_e64 v1, -v1, -v1 ; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1] ; GFX9-NEXT: s_endpgm @@ -88,7 +88,7 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0(float addrs ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_load_dword v3, v0, s[6:7] glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_sub_f32_e32 v1, 0x80000000, v1 +; GFX10-NEXT: v_max_f32_e64 v1, -v1, -v1 ; GFX10-NEXT: v_med3_f32 v1, v1, v2, v3 ; GFX10-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-NEXT: s_endpgm @@ -104,7 +104,7 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0(float addrs ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_sub_f32_e32 v1, 0x80000000, v1 +; GFX11-NEXT: v_max_f32_e64 v1, -v1, -v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] @@ -145,7 +145,7 @@ define amdgpu_kernel void @v_test_no_global_nnans_med3_f32_pat0_srcmod0(float ad ; SI-NEXT: s_mov_b64 s[8:9], s[6:7] ; SI-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_sub_f32_e32 v2, 0x80000000, v2 +; SI-NEXT: v_mul_f32_e32 v2, -1.0, v2 ; SI-NEXT: v_mul_f32_e32 v3, 1.0, v3 ; SI-NEXT: v_min_f32_e32 v5, v2, v3 ; SI-NEXT: v_max_f32_e32 v2, v2, v3 @@ -183,7 +183,7 @@ define amdgpu_kernel void @v_test_no_global_nnans_med3_f32_pat0_srcmod0(float ad ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v6 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: v_sub_f32_e32 v4, 0x80000000, v7 +; VI-NEXT: v_mul_f32_e32 v4, -1.0, v7 ; VI-NEXT: v_mul_f32_e32 v2, 1.0, v2 ; VI-NEXT: v_min_f32_e32 v5, v4, v2 ; VI-NEXT: v_max_f32_e32 v2, v4, v2 @@ -204,7 +204,7 @@ define amdgpu_kernel void @v_test_no_global_nnans_med3_f32_pat0_srcmod0(float ad ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_load_dword v3, v0, s[6:7] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_sub_f32_e32 v1, 0x80000000, v1 +; GFX9-NEXT: v_max_f32_e64 v1, -v1, -v1 ; GFX9-NEXT: v_max_f32_e32 v2, v2, v2 ; GFX9-NEXT: v_min_f32_e32 v4, v1, v2 ; GFX9-NEXT: v_max_f32_e32 v1, v1, v2 @@ -225,7 +225,7 @@ define amdgpu_kernel void @v_test_no_global_nnans_med3_f32_pat0_srcmod0(float ad ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_load_dword v3, v0, s[6:7] glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_sub_f32_e32 v1, 0x80000000, v1 +; GFX10-NEXT: v_max_f32_e64 v1, -v1, -v1 ; GFX10-NEXT: v_max_f32_e32 v2, v2, v2 ; GFX10-NEXT: v_max_f32_e32 v3, v3, v3 ; GFX10-NEXT: v_max_f32_e32 v4, v1, v2 @@ -246,7 +246,8 @@ define amdgpu_kernel void @v_test_no_global_nnans_med3_f32_pat0_srcmod0(float ad ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_dual_sub_f32 v1, 0x80000000, v1 :: v_dual_max_f32 v2, v2, v2 +; GFX11-NEXT: v_max_f32_e64 v1, -v1, -v1 +; GFX11-NEXT: v_max_f32_e32 v2, v2, v2 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_min_f32_e32 v4, v1, v2 ; GFX11-NEXT: v_dual_max_f32 v1, v1, v2 :: v_dual_max_f32 v2, v3, v3 @@ -289,9 +290,8 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod012(float add ; SI-NEXT: s_mov_b64 s[8:9], s[6:7] ; SI-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: s_mov_b32 s2, 0x80000000 -; SI-NEXT: v_sub_f32_e32 v2, 0x80000000, v2 -; SI-NEXT: v_sub_f32_e64 v4, s2, |v4| +; SI-NEXT: v_mul_f32_e32 v2, -1.0, v2 +; SI-NEXT: v_mul_f32_e64 v4, -1.0, |v4| ; SI-NEXT: v_med3_f32 v2, v2, |v3|, v4 ; SI-NEXT: s_mov_b64 s[2:3], s[10:11] ; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 @@ -320,13 +320,12 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod012(float add ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: flat_load_dword v3, v[4:5] glc ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: s_mov_b32 s2, 0x80000000 ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v6 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: v_sub_f32_e32 v4, 0x80000000, v7 -; VI-NEXT: v_sub_f32_e64 v3, s2, |v3| +; VI-NEXT: v_mul_f32_e32 v4, -1.0, v7 +; VI-NEXT: v_mul_f32_e64 v3, -1.0, |v3| ; VI-NEXT: v_med3_f32 v2, v4, |v2|, v3 ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm @@ -342,9 +341,8 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod012(float add ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_load_dword v3, v0, s[6:7] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_mov_b32 s2, 0x80000000 -; GFX9-NEXT: v_sub_f32_e32 v1, 0x80000000, v1 -; GFX9-NEXT: v_sub_f32_e64 v3, s2, |v3| +; GFX9-NEXT: v_max_f32_e64 v1, -v1, -v1 +; GFX9-NEXT: v_max_f32_e64 v3, -|v3|, -|v3| ; GFX9-NEXT: v_med3_f32 v1, v1, |v2|, v3 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1] ; GFX9-NEXT: s_endpgm @@ -360,8 +358,8 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod012(float add ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_load_dword v3, v0, s[6:7] glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_sub_f32_e32 v1, 0x80000000, v1 -; GFX10-NEXT: v_sub_f32_e64 v3, 0x80000000, |v3| +; GFX10-NEXT: v_max_f32_e64 v1, -v1, -v1 +; GFX10-NEXT: v_max_f32_e64 v3, -|v3|, -|v3| ; GFX10-NEXT: v_med3_f32 v1, v1, |v2|, v3 ; GFX10-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-NEXT: s_endpgm @@ -377,8 +375,8 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod012(float add ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_sub_f32_e32 v1, 0x80000000, v1 -; GFX11-NEXT: v_sub_f32_e64 v3, 0x80000000, |v3| +; GFX11-NEXT: v_max_f32_e64 v1, -v1, -v1 +; GFX11-NEXT: v_max_f32_e64 v3, -|v3|, -|v3| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_med3_f32 v1, v1, |v2|, v3 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] @@ -425,10 +423,9 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_negabs012(float add ; SI-NEXT: s_mov_b64 s[8:9], s[6:7] ; SI-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: s_mov_b32 s2, 0x80000000 -; SI-NEXT: v_sub_f32_e64 v2, s2, |v2| -; SI-NEXT: v_sub_f32_e64 v3, s2, |v3| -; SI-NEXT: v_sub_f32_e64 v4, s2, |v4| +; SI-NEXT: v_mul_f32_e64 v2, -1.0, |v2| +; SI-NEXT: v_mul_f32_e64 v3, -1.0, |v3| +; SI-NEXT: v_mul_f32_e64 v4, -1.0, |v4| ; SI-NEXT: v_med3_f32 v2, v2, v3, v4 ; SI-NEXT: s_mov_b64 s[2:3], s[10:11] ; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 @@ -457,14 +454,13 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_negabs012(float add ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: flat_load_dword v3, v[4:5] glc ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: s_mov_b32 s2, 0x80000000 ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v6 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: v_sub_f32_e64 v4, s2, |v7| -; VI-NEXT: v_sub_f32_e64 v2, s2, |v2| -; VI-NEXT: v_sub_f32_e64 v3, s2, |v3| +; VI-NEXT: v_mul_f32_e64 v4, -1.0, |v7| +; VI-NEXT: v_mul_f32_e64 v2, -1.0, |v2| +; VI-NEXT: v_mul_f32_e64 v3, -1.0, |v3| ; VI-NEXT: v_med3_f32 v2, v4, v2, v3 ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm @@ -480,10 +476,9 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_negabs012(float add ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_load_dword v3, v0, s[6:7] glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_mov_b32 s2, 0x80000000 -; GFX9-NEXT: v_sub_f32_e64 v1, s2, |v1| -; GFX9-NEXT: v_sub_f32_e64 v2, s2, |v2| -; GFX9-NEXT: v_sub_f32_e64 v3, s2, |v3| +; GFX9-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| +; GFX9-NEXT: v_max_f32_e64 v2, -|v2|, -|v2| +; GFX9-NEXT: v_max_f32_e64 v3, -|v3|, -|v3| ; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3 ; GFX9-NEXT: global_store_dword v0, v1, s[0:1] ; GFX9-NEXT: s_endpgm @@ -499,9 +494,9 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_negabs012(float add ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_load_dword v3, v0, s[6:7] glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_sub_f32_e64 v1, 0x80000000, |v1| -; GFX10-NEXT: v_sub_f32_e64 v2, 0x80000000, |v2| -; GFX10-NEXT: v_sub_f32_e64 v3, 0x80000000, |v3| +; GFX10-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| +; GFX10-NEXT: v_max_f32_e64 v2, -|v2|, -|v2| +; GFX10-NEXT: v_max_f32_e64 v3, -|v3|, -|v3| ; GFX10-NEXT: v_med3_f32 v1, v1, v2, v3 ; GFX10-NEXT: global_store_dword v0, v1, s[0:1] ; GFX10-NEXT: s_endpgm @@ -517,9 +512,9 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_negabs012(float add ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_sub_f32_e64 v1, 0x80000000, |v1| -; GFX11-NEXT: v_sub_f32_e64 v2, 0x80000000, |v2| -; GFX11-NEXT: v_sub_f32_e64 v3, 0x80000000, |v3| +; GFX11-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| +; GFX11-NEXT: v_max_f32_e64 v2, -|v2|, -|v2| +; GFX11-NEXT: v_max_f32_e64 v3, -|v3|, -|v3| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3 ; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/hip.extern.shared.array.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/hip.extern.shared.array.ll index a6a3237ee929f..d6c675a636e9a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/hip.extern.shared.array.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/hip.extern.shared.array.ll @@ -22,8 +22,6 @@ define amdgpu_kernel void @dynamic_shared_array_0(float addrspace(1)* %out) { } ; CHECK-LABEL: {{^}}dynamic_shared_array_1: -; CHECK: v_lshlrev_b32_e32 {{v[0-9]+}}, 2, {{v[0-9]+}} -; CHECK: v_lshlrev_b32_e32 {{v[0-9]+}}, 2, {{v[0-9]+}} ; CHECK: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}} ; CHECK: v_add_u32_e32 {{v[0-9]+}}, 0xc00, [[IDX]] define amdgpu_kernel void @dynamic_shared_array_1(float addrspace(1)* %out, i32 %cond) { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll index d33155534c284..f4b821ca602c7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll @@ -819,22 +819,22 @@ define void @dyn_insertelement_v8f64_const_s_v_v(double %val, i32 %idx) { ; GPRIDX-NEXT: s_mov_b32 s9, 0x40080000 ; GPRIDX-NEXT: s_mov_b32 s8, s18 ; GPRIDX-NEXT: s_mov_b64 s[6:7], 2.0 -; GPRIDX-NEXT: v_mov_b32_e32 v4, s4 -; GPRIDX-NEXT: v_mov_b32_e32 v5, s5 -; GPRIDX-NEXT: v_mov_b32_e32 v6, s6 -; GPRIDX-NEXT: v_mov_b32_e32 v7, s7 -; GPRIDX-NEXT: v_mov_b32_e32 v8, s8 -; GPRIDX-NEXT: v_mov_b32_e32 v9, s9 -; GPRIDX-NEXT: v_mov_b32_e32 v10, s10 -; GPRIDX-NEXT: v_mov_b32_e32 v11, s11 -; GPRIDX-NEXT: v_mov_b32_e32 v12, s12 -; GPRIDX-NEXT: v_mov_b32_e32 v13, s13 -; GPRIDX-NEXT: v_mov_b32_e32 v14, s14 -; GPRIDX-NEXT: v_mov_b32_e32 v15, s15 -; GPRIDX-NEXT: v_mov_b32_e32 v16, s16 -; GPRIDX-NEXT: v_mov_b32_e32 v17, s17 -; GPRIDX-NEXT: v_mov_b32_e32 v18, s18 -; GPRIDX-NEXT: v_mov_b32_e32 v19, s19 +; GPRIDX-NEXT: v_mov_b32_e32 v3, s4 +; GPRIDX-NEXT: v_mov_b32_e32 v4, s5 +; GPRIDX-NEXT: v_mov_b32_e32 v5, s6 +; GPRIDX-NEXT: v_mov_b32_e32 v6, s7 +; GPRIDX-NEXT: v_mov_b32_e32 v7, s8 +; GPRIDX-NEXT: v_mov_b32_e32 v8, s9 +; GPRIDX-NEXT: v_mov_b32_e32 v9, s10 +; GPRIDX-NEXT: v_mov_b32_e32 v10, s11 +; GPRIDX-NEXT: v_mov_b32_e32 v11, s12 +; GPRIDX-NEXT: v_mov_b32_e32 v12, s13 +; GPRIDX-NEXT: v_mov_b32_e32 v13, s14 +; GPRIDX-NEXT: v_mov_b32_e32 v14, s15 +; GPRIDX-NEXT: v_mov_b32_e32 v15, s16 +; GPRIDX-NEXT: v_mov_b32_e32 v16, s17 +; GPRIDX-NEXT: v_mov_b32_e32 v17, s18 +; GPRIDX-NEXT: v_mov_b32_e32 v18, s19 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[16:17], 0, v2 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[4:5], 2, v2 @@ -843,29 +843,29 @@ define void @dyn_insertelement_v8f64_const_s_v_v(double %val, i32 %idx) { ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[10:11], 5, v2 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[12:13], 6, v2 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[14:15], 7, v2 -; GPRIDX-NEXT: v_cndmask_b32_e64 v2, v4, v0, s[16:17] -; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v6, v0, vcc -; GPRIDX-NEXT: v_cndmask_b32_e64 v3, v5, v1, s[16:17] -; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v7, v1, vcc -; GPRIDX-NEXT: v_cndmask_b32_e64 v6, v8, v0, s[4:5] -; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v10, v0, s[6:7] -; GPRIDX-NEXT: v_cndmask_b32_e64 v10, v12, v0, s[8:9] -; GPRIDX-NEXT: v_cndmask_b32_e64 v12, v14, v0, s[10:11] -; GPRIDX-NEXT: v_cndmask_b32_e64 v14, v16, v0, s[12:13] -; GPRIDX-NEXT: v_cndmask_b32_e64 v16, v18, v0, s[14:15] -; GPRIDX-NEXT: v_cndmask_b32_e64 v7, v9, v1, s[4:5] -; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v11, v1, s[6:7] -; GPRIDX-NEXT: v_cndmask_b32_e64 v11, v13, v1, s[8:9] -; GPRIDX-NEXT: v_cndmask_b32_e64 v13, v15, v1, s[10:11] -; GPRIDX-NEXT: v_cndmask_b32_e64 v15, v17, v1, s[12:13] -; GPRIDX-NEXT: v_cndmask_b32_e64 v17, v19, v1, s[14:15] -; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[2:5], off +; GPRIDX-NEXT: v_cndmask_b32_e64 v3, v3, v0, s[16:17] +; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v0, vcc +; GPRIDX-NEXT: v_cndmask_b32_e64 v4, v4, v1, s[16:17] +; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v1, vcc +; GPRIDX-NEXT: v_cndmask_b32_e64 v7, v7, v0, s[4:5] +; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v9, v0, s[6:7] +; GPRIDX-NEXT: v_cndmask_b32_e64 v11, v11, v0, s[8:9] +; GPRIDX-NEXT: v_cndmask_b32_e64 v13, v13, v0, s[10:11] +; GPRIDX-NEXT: v_cndmask_b32_e64 v15, v15, v0, s[12:13] +; GPRIDX-NEXT: v_cndmask_b32_e64 v17, v17, v0, s[14:15] +; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v8, v1, s[4:5] +; GPRIDX-NEXT: v_cndmask_b32_e64 v10, v10, v1, s[6:7] +; GPRIDX-NEXT: v_cndmask_b32_e64 v12, v12, v1, s[8:9] +; GPRIDX-NEXT: v_cndmask_b32_e64 v14, v14, v1, s[10:11] +; GPRIDX-NEXT: v_cndmask_b32_e64 v16, v16, v1, s[12:13] +; GPRIDX-NEXT: v_cndmask_b32_e64 v18, v18, v1, s[14:15] +; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[3:6], off ; GPRIDX-NEXT: s_waitcnt vmcnt(0) -; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[6:9], off +; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[7:10], off ; GPRIDX-NEXT: s_waitcnt vmcnt(0) -; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[10:13], off +; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[11:14], off ; GPRIDX-NEXT: s_waitcnt vmcnt(0) -; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[14:17], off +; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[15:18], off ; GPRIDX-NEXT: s_waitcnt vmcnt(0) ; GPRIDX-NEXT: s_setpc_b64 s[30:31] ; @@ -1022,23 +1022,23 @@ define amdgpu_ps void @dyn_insertelement_v8f64_s_s_v(<8 x double> inreg %vec, do ; GPRIDX-NEXT: s_mov_b32 s10, s12 ; GPRIDX-NEXT: s_mov_b32 s12, s14 ; GPRIDX-NEXT: s_mov_b32 s14, s16 -; GPRIDX-NEXT: v_mov_b32_e32 v17, s15 -; GPRIDX-NEXT: v_mov_b32_e32 v16, s14 -; GPRIDX-NEXT: v_mov_b32_e32 v15, s13 -; GPRIDX-NEXT: v_mov_b32_e32 v14, s12 -; GPRIDX-NEXT: v_mov_b32_e32 v13, s11 -; GPRIDX-NEXT: v_mov_b32_e32 v12, s10 -; GPRIDX-NEXT: v_mov_b32_e32 v11, s9 -; GPRIDX-NEXT: v_mov_b32_e32 v10, s8 -; GPRIDX-NEXT: v_mov_b32_e32 v9, s7 -; GPRIDX-NEXT: v_mov_b32_e32 v8, s6 -; GPRIDX-NEXT: v_mov_b32_e32 v7, s5 -; GPRIDX-NEXT: v_mov_b32_e32 v6, s4 -; GPRIDX-NEXT: v_mov_b32_e32 v5, s3 -; GPRIDX-NEXT: v_mov_b32_e32 v4, s2 -; GPRIDX-NEXT: v_mov_b32_e32 v3, s1 -; GPRIDX-NEXT: v_mov_b32_e32 v2, s0 -; GPRIDX-NEXT: v_mov_b32_e32 v1, s18 +; GPRIDX-NEXT: v_mov_b32_e32 v16, s15 +; GPRIDX-NEXT: v_mov_b32_e32 v15, s14 +; GPRIDX-NEXT: v_mov_b32_e32 v14, s13 +; GPRIDX-NEXT: v_mov_b32_e32 v13, s12 +; GPRIDX-NEXT: v_mov_b32_e32 v12, s11 +; GPRIDX-NEXT: v_mov_b32_e32 v11, s10 +; GPRIDX-NEXT: v_mov_b32_e32 v10, s9 +; GPRIDX-NEXT: v_mov_b32_e32 v9, s8 +; GPRIDX-NEXT: v_mov_b32_e32 v8, s7 +; GPRIDX-NEXT: v_mov_b32_e32 v7, s6 +; GPRIDX-NEXT: v_mov_b32_e32 v6, s5 +; GPRIDX-NEXT: v_mov_b32_e32 v5, s4 +; GPRIDX-NEXT: v_mov_b32_e32 v4, s3 +; GPRIDX-NEXT: v_mov_b32_e32 v3, s2 +; GPRIDX-NEXT: v_mov_b32_e32 v2, s1 +; GPRIDX-NEXT: v_mov_b32_e32 v1, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v17, s18 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v0 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v0 @@ -1047,30 +1047,30 @@ define amdgpu_ps void @dyn_insertelement_v8f64_s_s_v(<8 x double> inreg %vec, do ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[8:9], 6, v0 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[10:11], 7, v0 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[12:13], 0, v0 -; GPRIDX-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[12:13] -; GPRIDX-NEXT: v_cndmask_b32_e32 v2, v4, v1, vcc -; GPRIDX-NEXT: v_cndmask_b32_e64 v4, v6, v1, s[0:1] -; GPRIDX-NEXT: v_cndmask_b32_e64 v6, v8, v1, s[2:3] -; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v10, v1, s[4:5] -; GPRIDX-NEXT: v_cndmask_b32_e64 v10, v12, v1, s[6:7] -; GPRIDX-NEXT: v_cndmask_b32_e64 v12, v14, v1, s[8:9] -; GPRIDX-NEXT: v_cndmask_b32_e64 v14, v16, v1, s[10:11] -; GPRIDX-NEXT: v_mov_b32_e32 v16, s19 -; GPRIDX-NEXT: v_cndmask_b32_e64 v1, v3, v16, s[12:13] -; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v5, v16, vcc -; GPRIDX-NEXT: v_cndmask_b32_e64 v5, v7, v16, s[0:1] -; GPRIDX-NEXT: v_cndmask_b32_e64 v7, v9, v16, s[2:3] -; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v11, v16, s[4:5] -; GPRIDX-NEXT: v_cndmask_b32_e64 v11, v13, v16, s[6:7] -; GPRIDX-NEXT: v_cndmask_b32_e64 v13, v15, v16, s[8:9] -; GPRIDX-NEXT: v_cndmask_b32_e64 v15, v17, v16, s[10:11] -; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[0:3], off +; GPRIDX-NEXT: v_mov_b32_e32 v0, s19 +; GPRIDX-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[12:13] +; GPRIDX-NEXT: v_cndmask_b32_e32 v3, v3, v17, vcc +; GPRIDX-NEXT: v_cndmask_b32_e64 v2, v2, v0, s[12:13] +; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc +; GPRIDX-NEXT: v_cndmask_b32_e64 v5, v5, v17, s[0:1] +; GPRIDX-NEXT: v_cndmask_b32_e64 v7, v7, v17, s[2:3] +; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v9, v17, s[4:5] +; GPRIDX-NEXT: v_cndmask_b32_e64 v11, v11, v17, s[6:7] +; GPRIDX-NEXT: v_cndmask_b32_e64 v13, v13, v17, s[8:9] +; GPRIDX-NEXT: v_cndmask_b32_e64 v15, v15, v17, s[10:11] +; GPRIDX-NEXT: v_cndmask_b32_e64 v6, v6, v0, s[0:1] +; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v8, v0, s[2:3] +; GPRIDX-NEXT: v_cndmask_b32_e64 v10, v10, v0, s[4:5] +; GPRIDX-NEXT: v_cndmask_b32_e64 v12, v12, v0, s[6:7] +; GPRIDX-NEXT: v_cndmask_b32_e64 v14, v14, v0, s[8:9] +; GPRIDX-NEXT: v_cndmask_b32_e64 v16, v16, v0, s[10:11] +; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[1:4], off ; GPRIDX-NEXT: s_waitcnt vmcnt(0) -; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[4:7], off +; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[5:8], off ; GPRIDX-NEXT: s_waitcnt vmcnt(0) -; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[8:11], off +; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[9:12], off ; GPRIDX-NEXT: s_waitcnt vmcnt(0) -; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[12:15], off +; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[13:16], off ; GPRIDX-NEXT: s_waitcnt vmcnt(0) ; GPRIDX-NEXT: s_endpgm ; @@ -1444,22 +1444,22 @@ define amdgpu_ps void @dyn_insertelement_v8f64_s_v_v(<8 x double> inreg %vec, do ; GPRIDX-NEXT: s_mov_b32 s10, s12 ; GPRIDX-NEXT: s_mov_b32 s12, s14 ; GPRIDX-NEXT: s_mov_b32 s14, s16 -; GPRIDX-NEXT: v_mov_b32_e32 v19, s15 -; GPRIDX-NEXT: v_mov_b32_e32 v18, s14 -; GPRIDX-NEXT: v_mov_b32_e32 v17, s13 -; GPRIDX-NEXT: v_mov_b32_e32 v16, s12 -; GPRIDX-NEXT: v_mov_b32_e32 v15, s11 -; GPRIDX-NEXT: v_mov_b32_e32 v14, s10 -; GPRIDX-NEXT: v_mov_b32_e32 v13, s9 -; GPRIDX-NEXT: v_mov_b32_e32 v12, s8 -; GPRIDX-NEXT: v_mov_b32_e32 v11, s7 -; GPRIDX-NEXT: v_mov_b32_e32 v10, s6 -; GPRIDX-NEXT: v_mov_b32_e32 v9, s5 -; GPRIDX-NEXT: v_mov_b32_e32 v8, s4 -; GPRIDX-NEXT: v_mov_b32_e32 v7, s3 -; GPRIDX-NEXT: v_mov_b32_e32 v6, s2 -; GPRIDX-NEXT: v_mov_b32_e32 v5, s1 -; GPRIDX-NEXT: v_mov_b32_e32 v4, s0 +; GPRIDX-NEXT: v_mov_b32_e32 v18, s15 +; GPRIDX-NEXT: v_mov_b32_e32 v17, s14 +; GPRIDX-NEXT: v_mov_b32_e32 v16, s13 +; GPRIDX-NEXT: v_mov_b32_e32 v15, s12 +; GPRIDX-NEXT: v_mov_b32_e32 v14, s11 +; GPRIDX-NEXT: v_mov_b32_e32 v13, s10 +; GPRIDX-NEXT: v_mov_b32_e32 v12, s9 +; GPRIDX-NEXT: v_mov_b32_e32 v11, s8 +; GPRIDX-NEXT: v_mov_b32_e32 v10, s7 +; GPRIDX-NEXT: v_mov_b32_e32 v9, s6 +; GPRIDX-NEXT: v_mov_b32_e32 v8, s5 +; GPRIDX-NEXT: v_mov_b32_e32 v7, s4 +; GPRIDX-NEXT: v_mov_b32_e32 v6, s3 +; GPRIDX-NEXT: v_mov_b32_e32 v5, s2 +; GPRIDX-NEXT: v_mov_b32_e32 v4, s1 +; GPRIDX-NEXT: v_mov_b32_e32 v3, s0 ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[12:13], 0, v2 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v2 @@ -1468,29 +1468,29 @@ define amdgpu_ps void @dyn_insertelement_v8f64_s_v_v(<8 x double> inreg %vec, do ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[6:7], 5, v2 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[8:9], 6, v2 ; GPRIDX-NEXT: v_cmp_eq_u32_e64 s[10:11], 7, v2 -; GPRIDX-NEXT: v_cndmask_b32_e64 v2, v4, v0, s[12:13] -; GPRIDX-NEXT: v_cndmask_b32_e32 v4, v6, v0, vcc -; GPRIDX-NEXT: v_cndmask_b32_e64 v3, v5, v1, s[12:13] -; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v7, v1, vcc -; GPRIDX-NEXT: v_cndmask_b32_e64 v6, v8, v0, s[0:1] -; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v10, v0, s[2:3] -; GPRIDX-NEXT: v_cndmask_b32_e64 v10, v12, v0, s[4:5] -; GPRIDX-NEXT: v_cndmask_b32_e64 v12, v14, v0, s[6:7] -; GPRIDX-NEXT: v_cndmask_b32_e64 v14, v16, v0, s[8:9] -; GPRIDX-NEXT: v_cndmask_b32_e64 v16, v18, v0, s[10:11] -; GPRIDX-NEXT: v_cndmask_b32_e64 v7, v9, v1, s[0:1] -; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v11, v1, s[2:3] -; GPRIDX-NEXT: v_cndmask_b32_e64 v11, v13, v1, s[4:5] -; GPRIDX-NEXT: v_cndmask_b32_e64 v13, v15, v1, s[6:7] -; GPRIDX-NEXT: v_cndmask_b32_e64 v15, v17, v1, s[8:9] -; GPRIDX-NEXT: v_cndmask_b32_e64 v17, v19, v1, s[10:11] -; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[2:5], off +; GPRIDX-NEXT: v_cndmask_b32_e64 v3, v3, v0, s[12:13] +; GPRIDX-NEXT: v_cndmask_b32_e32 v5, v5, v0, vcc +; GPRIDX-NEXT: v_cndmask_b32_e64 v4, v4, v1, s[12:13] +; GPRIDX-NEXT: v_cndmask_b32_e32 v6, v6, v1, vcc +; GPRIDX-NEXT: v_cndmask_b32_e64 v7, v7, v0, s[0:1] +; GPRIDX-NEXT: v_cndmask_b32_e64 v9, v9, v0, s[2:3] +; GPRIDX-NEXT: v_cndmask_b32_e64 v11, v11, v0, s[4:5] +; GPRIDX-NEXT: v_cndmask_b32_e64 v13, v13, v0, s[6:7] +; GPRIDX-NEXT: v_cndmask_b32_e64 v15, v15, v0, s[8:9] +; GPRIDX-NEXT: v_cndmask_b32_e64 v17, v17, v0, s[10:11] +; GPRIDX-NEXT: v_cndmask_b32_e64 v8, v8, v1, s[0:1] +; GPRIDX-NEXT: v_cndmask_b32_e64 v10, v10, v1, s[2:3] +; GPRIDX-NEXT: v_cndmask_b32_e64 v12, v12, v1, s[4:5] +; GPRIDX-NEXT: v_cndmask_b32_e64 v14, v14, v1, s[6:7] +; GPRIDX-NEXT: v_cndmask_b32_e64 v16, v16, v1, s[8:9] +; GPRIDX-NEXT: v_cndmask_b32_e64 v18, v18, v1, s[10:11] +; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[3:6], off ; GPRIDX-NEXT: s_waitcnt vmcnt(0) -; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[6:9], off +; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[7:10], off ; GPRIDX-NEXT: s_waitcnt vmcnt(0) -; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[10:13], off +; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[11:14], off ; GPRIDX-NEXT: s_waitcnt vmcnt(0) -; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[14:17], off +; GPRIDX-NEXT: global_store_dwordx4 v[0:1], v[15:18], off ; GPRIDX-NEXT: s_waitcnt vmcnt(0) ; GPRIDX-NEXT: s_endpgm ; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-size.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-size.ll deleted file mode 100644 index 208500c28b4b3..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-size.ll +++ /dev/null @@ -1 +0,0 @@ -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 < %S/../lds-size.ll | FileCheck -check-prefix=ALL -check-prefix=HSA %S/../lds-size.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.append.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.append.ll deleted file mode 100644 index 1b09c62519127..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.append.ll +++ /dev/null @@ -1,4 +0,0 @@ -; XUN: llc -global-isel -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,SI,NOTGFX9,CIPLUS-GISEL,GCN-GISEL %S/../llvm.amdgcn.ds.append.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=bonaire -mattr=+flat-for-global -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9,CIPLUS-GISEL,GCN-GISEL %S/../llvm.amdgcn.ds.append.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -mattr=+flat-for-global -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9,CIPLUS-GISEL,GCN-GISEL %S/../llvm.amdgcn.ds.append.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,CIPLUS,GFX9,CIPLUS-GISEL,GCN-GISEL %S/../llvm.amdgcn.ds.append.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.consume.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.consume.ll deleted file mode 100644 index 7aea170ed1ef8..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.consume.ll +++ /dev/null @@ -1,4 +0,0 @@ -; XUN: llc -global-isel -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,SI,NOTGFX9 %S/../llvm.amdgcn.ds.append.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=bonaire -mattr=+flat-for-global -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9 %S/../llvm.amdgcn.ds.append.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -mattr=+flat-for-global -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9 %S/../llvm.amdgcn.ds.append.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,CIPLUS,GFX9 %S/../llvm.amdgcn.ds.append.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.barrier.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.barrier.ll deleted file mode 100644 index 449a8ab04ba03..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.barrier.ll +++ /dev/null @@ -1,10 +0,0 @@ -; XUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.barrier.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.barrier.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.barrier.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL %S/../llvm.amdgcn.ds.gws.barrier.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL,GFX10 %S/../llvm.amdgcn.ds.gws.barrier.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -asm-verbose=0 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL,GFX10 %S/../llvm.amdgcn.ds.gws.barrier.ll - -; Make sure the op is emitted bundled with a waitcnt with and without the retry loop, and the bundle is not removed by ExpandPostRAPseudos. -; XUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -stop-after=postrapseudos -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefix=MIR %S/../llvm.amdgcn.ds.gws.barrier.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=postrapseudos -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefix=MIR %S/../llvm.amdgcn.ds.gws.barrier.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.init.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.init.ll deleted file mode 100644 index 3dceb31d92721..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.init.ll +++ /dev/null @@ -1,6 +0,0 @@ -; XUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.init.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.init.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.init.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.init.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.init.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.init.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.init.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL %S/../llvm.amdgcn.ds.gws.init.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.init.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL %S/../llvm.amdgcn.ds.gws.init.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -asm-verbose=0 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.init.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL %S/../llvm.amdgcn.ds.gws.init.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.br.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.br.ll deleted file mode 100644 index ada1267253714..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.br.ll +++ /dev/null @@ -1,6 +0,0 @@ -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.br.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.sema.br.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.br.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.sema.br.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.br.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.sema.br.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.br.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %S/../llvm.amdgcn.ds.gws.sema.br.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.br.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %S/../llvm.amdgcn.ds.gws.sema.br.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.br.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %S/../llvm.amdgcn.ds.gws.sema.br.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.v.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.v.ll deleted file mode 100644 index 20725da516790..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.v.ll +++ /dev/null @@ -1,6 +0,0 @@ -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.v.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.sema.v.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.v.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.sema.v.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.v.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.sema.v.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.v.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %S/../llvm.amdgcn.ds.gws.sema.v.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.v.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %S/../llvm.amdgcn.ds.gws.sema.v.ll -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.v.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %S/../llvm.amdgcn.ds.gws.sema.v.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.gfx10.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.gfx10.ll deleted file mode 100644 index ba67a6e6365e4..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.gfx10.ll +++ /dev/null @@ -1,2 +0,0 @@ -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %S/../llvm.amdgcn.ds.ordered.add.gfx10.ll | FileCheck -check-prefixes=GCN %S/../llvm.amdgcn.ds.ordered.add.gfx10.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -show-mc-encoding < %S/../llvm.amdgcn.ds.ordered.add.gfx10.ll | FileCheck -check-prefixes=GCN %S/../llvm.amdgcn.ds.ordered.add.gfx10.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.ll deleted file mode 100644 index 4193d976afd65..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.ll +++ /dev/null @@ -1,4 +0,0 @@ -; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.add.ll | FileCheck -check-prefixes=GCN,FUNC %S/../llvm.amdgcn.ds.ordered.add.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.add.ll | FileCheck -check-prefixes=GCN,FUNC %S/../llvm.amdgcn.ds.ordered.add.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.add.ll | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %S/../llvm.amdgcn.ds.ordered.add.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.add.ll | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %S/../llvm.amdgcn.ds.ordered.add.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.swap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.swap.ll deleted file mode 100644 index e2c3b625395a7..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.swap.ll +++ /dev/null @@ -1,4 +0,0 @@ -; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.swap.ll | FileCheck -check-prefixes=GCN,FUNC %S/../llvm.amdgcn.ds.ordered.swap.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.swap.ll | FileCheck -check-prefixes=GCN,FUNC %S/../llvm.amdgcn.ds.ordered.swap.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.swap.ll | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %S/../llvm.amdgcn.ds.ordered.swap.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.swap.ll | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %S/../llvm.amdgcn.ds.ordered.swap.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.ll deleted file mode 100644 index 9a6bebd5a31c6..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.ll +++ /dev/null @@ -1,3 +0,0 @@ -; RUN: llc -march=amdgcn -mcpu=gfx900 -global-isel -verify-machineinstrs < %S/../llvm.amdgcn.init.exec.ll | FileCheck -check-prefix=GCN %S/../llvm.amdgcn.init.exec.ll -; RUN: llc -march=amdgcn -mcpu=gfx1010 -global-isel -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %S/../llvm.amdgcn.init.exec.ll | FileCheck -check-prefix=GCN %S/../llvm.amdgcn.init.exec.ll -; RUN: llc -march=amdgcn -mcpu=gfx1100 -global-isel -amdgpu-enable-delay-alu=0 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %S/../llvm.amdgcn.init.exec.ll | FileCheck -check-prefix=GCN %S/../llvm.amdgcn.init.exec.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.wave32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.wave32.ll deleted file mode 100644 index d6d20abcd8aa2..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.init.exec.wave32.ll +++ /dev/null @@ -1,5 +0,0 @@ -; Runs original SDAG test with -global-isel -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %S/../llvm.amdgcn.init.exec.wave32.ll | FileCheck -check-prefixes=GCN,GFX1032 %S/../llvm.amdgcn.init.exec.wave32.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %S/../llvm.amdgcn.init.exec.wave32.ll | FileCheck -check-prefixes=GCN,GFX1032 %S/../llvm.amdgcn.init.exec.wave32.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %S/../llvm.amdgcn.init.exec.wave32.ll | FileCheck -check-prefixes=GCN,GFX1064 %S/../llvm.amdgcn.init.exec.wave32.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %S/../llvm.amdgcn.init.exec.wave32.ll | FileCheck -check-prefixes=GCN,GFX1064 %S/../llvm.amdgcn.init.exec.wave32.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.permlane.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.permlane.ll deleted file mode 100644 index e644b907824ad..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.permlane.ll +++ /dev/null @@ -1,2 +0,0 @@ -; RUN: llc -global-isel -amdgpu-load-store-vectorizer=0 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %S/../llvm.amdgcn.permlane.ll | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX10 %S/../llvm.amdgcn.permlane.ll -; RUN: llc -global-isel -amdgpu-load-store-vectorizer=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %S/../llvm.amdgcn.permlane.ll | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX11 %S/../llvm.amdgcn.permlane.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.vote.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.vote.ll deleted file mode 100644 index 715f3820787fb..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.vote.ll +++ /dev/null @@ -1,4 +0,0 @@ -; Runs original SDAG test with -global-isel -; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %S/../llvm.amdgcn.wqm.vote.ll | FileCheck -enable-var-scope -check-prefixes=CHECK,WAVE64 %S/../llvm.amdgcn.wqm.vote.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %S/../llvm.amdgcn.wqm.vote.ll | FileCheck -enable-var-scope -check-prefixes=CHECK,WAVE32 %S/../llvm.amdgcn.wqm.vote.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %S/../llvm.amdgcn.wqm.vote.ll | FileCheck -enable-var-scope -check-prefixes=CHECK,WAVE32 %S/../llvm.amdgcn.wqm.vote.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.trap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.trap.ll deleted file mode 100644 index f1ff3825c5faf..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.trap.ll +++ /dev/null @@ -1,16 +0,0 @@ -; Runs original SDAG test with -global-isel -; RUN: llc -global-isel -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs < %S/../trap.ll | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP -enable-var-scope %S/../trap.ll - -; RUN: llc -global-isel -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mattr=+trap-handler -verify-machineinstrs < %S/../trap.ll | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP -enable-var-scope %S/../trap.ll -; RUN: llc -global-isel -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mattr=-trap-handler -verify-machineinstrs < %S/../trap.ll | FileCheck -check-prefix=GCN -check-prefix=NO-HSA-TRAP -enable-var-scope %S/../trap.ll -; RUN: llc -global-isel -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mattr=-trap-handler -verify-machineinstrs < %S/../trap.ll 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -enable-var-scope %S/../trap.ll - -; enable trap handler feature -; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler -verify-machineinstrs < %S/../trap.ll | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=TRAP-BIT -check-prefix=MESA-TRAP -enable-var-scope %S/../trap.ll -; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler -verify-machineinstrs < %S/../trap.ll 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=TRAP-BIT -enable-var-scope %S/../trap.ll - -; disable trap handler feature -; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler -verify-machineinstrs < %S/../trap.ll | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=NO-TRAP-BIT -check-prefix=NOMESA-TRAP -enable-var-scope %S/../trap.ll -; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler -verify-machineinstrs < %S/../trap.ll 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=NO-TRAP-BIT -enable-var-scope %S/../trap.ll - -; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs < %S/../trap.ll 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -enable-var-scope %S/../trap.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/read_register.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/read_register.ll deleted file mode 100644 index 3bd16996f8e33..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/read_register.ll +++ /dev/null @@ -1,2 +0,0 @@ -; Runs original SDAG test with -global-isel -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=bonaire -verify-machineinstrs < %S/../read_register.ll | FileCheck -enable-var-scope %S/../read_register.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/readcyclecounter.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/readcyclecounter.ll deleted file mode 100644 index 8d9ab9cada75e..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/readcyclecounter.ll +++ /dev/null @@ -1,5 +0,0 @@ -; SI run line skipped since store not yet implemented. -; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %S/../readcyclecounter.ll | FileCheck -enable-var-scope -check-prefix=MEMTIME -check-prefix=SIVI -check-prefix=GCN %S/../readcyclecounter.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %S/../readcyclecounter.ll | FileCheck -enable-var-scope -check-prefix=MEMTIME -check-prefix=GCN %S/../readcyclecounter.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %S/../readcyclecounter.ll | FileCheck -enable-var-scope -check-prefixes=GETREG,GETREG-GISEL -check-prefix=GCN %S/../readcyclecounter.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %S/../readcyclecounter.ll | FileCheck -enable-var-scope -check-prefixes=GETREG,GETREG-GISEL -check-prefix=GCN %S/../readcyclecounter.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ret.ll deleted file mode 100644 index c6c1a87177fff..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ret.ll +++ /dev/null @@ -1,3 +0,0 @@ -; Runs original SDAG test with -global-isel -; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %S/../ret.ll | FileCheck -check-prefix=GCN %S/../ret.ll -; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %S/../ret.ll | FileCheck -check-prefix=GCN %S/../ret.ll diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll index 346b7d2deb18b..4ddd0c6583104 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll @@ -191,131 +191,131 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: s_mov_b32 s5, -1 ; CHECK-NEXT: s_and_b64 s[6:7], s[6:7], s[4:5] ; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[6:7], 0 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s2 ; CHECK-NEXT: s_cbranch_vccz .LBB1_2 ; CHECK-NEXT: ; %bb.1: -; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2 -; CHECK-NEXT: v_mov_b32_e32 v1, s3 -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s3 +; CHECK-NEXT: v_mov_b32_e32 v0, s3 +; CHECK-NEXT: v_cvt_f32_u32_e32 v1, s3 ; CHECK-NEXT: s_sub_u32 s4, 0, s2 ; CHECK-NEXT: v_mov_b32_e32 v3, s1 -; CHECK-NEXT: v_mac_f32_e32 v0, 0x4f800000, v2 +; CHECK-NEXT: v_madmk_f32 v1, v1, 0x4f800000, v2 ; CHECK-NEXT: s_subb_u32 s5, 0, s3 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 -; CHECK-NEXT: v_mul_f32_e32 v2, 0x2f800000, v0 -; CHECK-NEXT: v_trunc_f32_e32 v2, v2 -; CHECK-NEXT: v_mac_f32_e32 v0, 0xcf800000, v2 -; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 -; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 -; CHECK-NEXT: v_mul_lo_u32 v4, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v5, s4, v0 -; CHECK-NEXT: v_mul_lo_u32 v6, s5, v0 -; CHECK-NEXT: v_mul_hi_u32 v7, s4, v0 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; CHECK-NEXT: v_mul_lo_u32 v6, v2, v5 -; CHECK-NEXT: v_mul_hi_u32 v8, v0, v5 -; CHECK-NEXT: v_mul_hi_u32 v5, v2, v5 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 -; CHECK-NEXT: v_mul_lo_u32 v7, v0, v4 -; CHECK-NEXT: v_mul_lo_u32 v9, v2, v4 -; CHECK-NEXT: v_mul_hi_u32 v10, v0, v4 -; CHECK-NEXT: v_mul_hi_u32 v4, v2, v4 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 +; CHECK-NEXT: v_mul_f32_e32 v4, 0x2f800000, v1 +; CHECK-NEXT: v_trunc_f32_e32 v4, v4 +; CHECK-NEXT: v_mac_f32_e32 v1, 0xcf800000, v4 +; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 +; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_lo_u32 v5, s4, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, s4, v1 +; CHECK-NEXT: v_mul_lo_u32 v7, s5, v1 +; CHECK-NEXT: v_mul_hi_u32 v8, s4, v1 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CHECK-NEXT: v_mul_lo_u32 v7, v4, v6 +; CHECK-NEXT: v_mul_hi_u32 v9, v1, v6 +; CHECK-NEXT: v_mul_hi_u32 v6, v4, v6 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; CHECK-NEXT: v_mul_lo_u32 v8, v1, v5 +; CHECK-NEXT: v_mul_lo_u32 v10, v4, v5 +; CHECK-NEXT: v_mul_hi_u32 v11, v1, v5 +; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v10, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v9, v5 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v11 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v9 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v6 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc +; CHECK-NEXT: v_mul_lo_u32 v5, s4, v1 +; CHECK-NEXT: v_mul_lo_u32 v6, s5, v1 +; CHECK-NEXT: v_mul_hi_u32 v7, s4, v1 +; CHECK-NEXT: v_mul_lo_u32 v8, s4, v4 +; CHECK-NEXT: v_mul_lo_u32 v9, v4, v5 +; CHECK-NEXT: v_mul_hi_u32 v10, v1, v5 +; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v10 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_mul_lo_u32 v7, v1, v6 +; CHECK-NEXT: v_mul_lo_u32 v8, v4, v6 +; CHECK-NEXT: v_mul_hi_u32 v11, v1, v6 +; CHECK-NEXT: v_mul_hi_u32 v6, v4, v6 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v8, v5 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v8 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v5 -; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v4, vcc -; CHECK-NEXT: v_mul_lo_u32 v4, s4, v0 -; CHECK-NEXT: v_mul_lo_u32 v5, s5, v0 -; CHECK-NEXT: v_mul_hi_u32 v6, s4, v0 -; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, v2, v4 -; CHECK-NEXT: v_mul_hi_u32 v9, v0, v4 -; CHECK-NEXT: v_mul_hi_u32 v4, v2, v4 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_mul_lo_u32 v6, v0, v5 -; CHECK-NEXT: v_mul_lo_u32 v7, v2, v5 -; CHECK-NEXT: v_mul_hi_u32 v10, v0, v5 -; CHECK-NEXT: v_mul_hi_u32 v5, v2, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v7, v4 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v5, vcc -; CHECK-NEXT: v_mul_lo_u32 v4, s1, v0 -; CHECK-NEXT: v_mul_hi_u32 v5, s0, v0 -; CHECK-NEXT: v_mul_hi_u32 v0, s1, v0 -; CHECK-NEXT: v_mul_lo_u32 v6, s0, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s1, v2 -; CHECK-NEXT: v_mul_hi_u32 v8, s0, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, s1, v2 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v6, vcc +; CHECK-NEXT: v_mul_lo_u32 v5, s1, v1 +; CHECK-NEXT: v_mul_hi_u32 v6, s0, v1 +; CHECK-NEXT: v_mul_hi_u32 v1, s1, v1 +; CHECK-NEXT: v_mul_lo_u32 v7, s0, v4 +; CHECK-NEXT: v_mul_lo_u32 v8, s1, v4 +; CHECK-NEXT: v_mul_hi_u32 v9, s0, v4 +; CHECK-NEXT: v_mul_hi_u32 v4, s1, v4 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v8 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v8, v1 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v9 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CHECK-NEXT: v_mul_lo_u32 v5, s2, v0 -; CHECK-NEXT: v_mul_lo_u32 v6, s3, v0 -; CHECK-NEXT: v_mul_hi_u32 v7, s2, v0 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v0 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v8 -; CHECK-NEXT: v_mul_lo_u32 v2, s2, v2 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v6, v2 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7 -; CHECK-NEXT: v_sub_i32_e32 v5, vcc, s0, v5 -; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v2, vcc -; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], s1, v2 -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s2, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s3, v3 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; CHECK-NEXT: v_mul_lo_u32 v6, s2, v1 +; CHECK-NEXT: v_mul_lo_u32 v7, s3, v1 +; CHECK-NEXT: v_mul_hi_u32 v8, s2, v1 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, 1, v1 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, 1, v9 +; CHECK-NEXT: v_mul_lo_u32 v4, s2, v4 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v7, v4 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v8 +; CHECK-NEXT: v_sub_i32_e32 v6, vcc, s0, v6 +; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v4, vcc +; CHECK-NEXT: v_sub_i32_e64 v4, s[4:5], s1, v4 +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s2, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v2, v1, vcc +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s3, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; CHECK-NEXT: v_subb_u32_e32 v0, vcc, v4, v0, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s3, v3 -; CHECK-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc -; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, s2, v5 -; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s3, v1 -; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s3, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v4, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v3, v8, v7, vcc +; CHECK-NEXT: v_subrev_i32_e32 v4, vcc, s2, v6 +; CHECK-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s3, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s3, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v9, v5, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; CHECK-NEXT: s_mov_b32 s5, 0 ; CHECK-NEXT: s_branch .LBB1_3 ; CHECK-NEXT: .LBB1_2: @@ -326,9 +326,8 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: s_cmp_lg_u32 s1, 0 ; CHECK-NEXT: s_cbranch_scc1 .LBB1_5 ; CHECK-NEXT: ; %bb.4: -; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v2 ; CHECK-NEXT: s_sub_i32 s1, 0, s2 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 ; CHECK-NEXT: v_mul_lo_u32 v1, s1, v0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll index f2ad41481eca4..3fd860ab72e39 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll @@ -188,130 +188,130 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: s_mov_b32 s5, -1 ; CHECK-NEXT: s_and_b64 s[6:7], s[6:7], s[4:5] ; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[6:7], 0 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s2 ; CHECK-NEXT: s_cbranch_vccz .LBB1_2 ; CHECK-NEXT: ; %bb.1: -; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2 -; CHECK-NEXT: v_mov_b32_e32 v1, s3 -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s3 +; CHECK-NEXT: v_mov_b32_e32 v0, s3 +; CHECK-NEXT: v_cvt_f32_u32_e32 v1, s3 ; CHECK-NEXT: s_sub_u32 s4, 0, s2 ; CHECK-NEXT: v_mov_b32_e32 v3, s1 -; CHECK-NEXT: v_mac_f32_e32 v0, 0x4f800000, v2 +; CHECK-NEXT: v_madmk_f32 v1, v1, 0x4f800000, v2 ; CHECK-NEXT: s_subb_u32 s5, 0, s3 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 -; CHECK-NEXT: v_mul_f32_e32 v2, 0x2f800000, v0 -; CHECK-NEXT: v_trunc_f32_e32 v2, v2 -; CHECK-NEXT: v_mac_f32_e32 v0, 0xcf800000, v2 -; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 -; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 -; CHECK-NEXT: v_mul_lo_u32 v4, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v5, s4, v0 -; CHECK-NEXT: v_mul_lo_u32 v6, s5, v0 -; CHECK-NEXT: v_mul_hi_u32 v7, s4, v0 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; CHECK-NEXT: v_mul_lo_u32 v6, v2, v5 -; CHECK-NEXT: v_mul_hi_u32 v8, v0, v5 -; CHECK-NEXT: v_mul_hi_u32 v5, v2, v5 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 -; CHECK-NEXT: v_mul_lo_u32 v7, v0, v4 -; CHECK-NEXT: v_mul_lo_u32 v9, v2, v4 -; CHECK-NEXT: v_mul_hi_u32 v10, v0, v4 -; CHECK-NEXT: v_mul_hi_u32 v4, v2, v4 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 +; CHECK-NEXT: v_mul_f32_e32 v4, 0x2f800000, v1 +; CHECK-NEXT: v_trunc_f32_e32 v4, v4 +; CHECK-NEXT: v_mac_f32_e32 v1, 0xcf800000, v4 +; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 +; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_lo_u32 v5, s4, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, s4, v1 +; CHECK-NEXT: v_mul_lo_u32 v7, s5, v1 +; CHECK-NEXT: v_mul_hi_u32 v8, s4, v1 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CHECK-NEXT: v_mul_lo_u32 v7, v4, v6 +; CHECK-NEXT: v_mul_hi_u32 v9, v1, v6 +; CHECK-NEXT: v_mul_hi_u32 v6, v4, v6 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; CHECK-NEXT: v_mul_lo_u32 v8, v1, v5 +; CHECK-NEXT: v_mul_lo_u32 v10, v4, v5 +; CHECK-NEXT: v_mul_hi_u32 v11, v1, v5 +; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v10, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v9, v5 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v11 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v9 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v6 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc +; CHECK-NEXT: v_mul_lo_u32 v5, s4, v1 +; CHECK-NEXT: v_mul_lo_u32 v6, s5, v1 +; CHECK-NEXT: v_mul_hi_u32 v7, s4, v1 +; CHECK-NEXT: v_mul_lo_u32 v8, s4, v4 +; CHECK-NEXT: v_mul_lo_u32 v9, v4, v5 +; CHECK-NEXT: v_mul_hi_u32 v10, v1, v5 +; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v10 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_mul_lo_u32 v7, v1, v6 +; CHECK-NEXT: v_mul_lo_u32 v8, v4, v6 +; CHECK-NEXT: v_mul_hi_u32 v11, v1, v6 +; CHECK-NEXT: v_mul_hi_u32 v6, v4, v6 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v8, v5 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v8 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v5 -; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v4, vcc -; CHECK-NEXT: v_mul_lo_u32 v4, s4, v0 -; CHECK-NEXT: v_mul_lo_u32 v5, s5, v0 -; CHECK-NEXT: v_mul_hi_u32 v6, s4, v0 -; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, v2, v4 -; CHECK-NEXT: v_mul_hi_u32 v9, v0, v4 -; CHECK-NEXT: v_mul_hi_u32 v4, v2, v4 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_mul_lo_u32 v6, v0, v5 -; CHECK-NEXT: v_mul_lo_u32 v7, v2, v5 -; CHECK-NEXT: v_mul_hi_u32 v10, v0, v5 -; CHECK-NEXT: v_mul_hi_u32 v5, v2, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v7, v4 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v5, vcc -; CHECK-NEXT: v_mul_lo_u32 v4, s1, v0 -; CHECK-NEXT: v_mul_hi_u32 v5, s0, v0 -; CHECK-NEXT: v_mul_hi_u32 v0, s1, v0 -; CHECK-NEXT: v_mul_lo_u32 v6, s0, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s1, v2 -; CHECK-NEXT: v_mul_hi_u32 v8, s0, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, s1, v2 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v6, vcc +; CHECK-NEXT: v_mul_lo_u32 v5, s1, v1 +; CHECK-NEXT: v_mul_hi_u32 v6, s0, v1 +; CHECK-NEXT: v_mul_hi_u32 v1, s1, v1 +; CHECK-NEXT: v_mul_lo_u32 v7, s0, v4 +; CHECK-NEXT: v_mul_lo_u32 v8, s1, v4 +; CHECK-NEXT: v_mul_hi_u32 v9, s0, v4 +; CHECK-NEXT: v_mul_hi_u32 v4, s1, v4 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v8 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v8, v1 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v9 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CHECK-NEXT: v_mul_lo_u32 v5, s2, v0 -; CHECK-NEXT: v_mul_lo_u32 v6, s3, v0 -; CHECK-NEXT: v_mul_hi_u32 v0, s2, v0 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; CHECK-NEXT: v_mul_lo_u32 v2, s2, v2 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v6, v2 -; CHECK-NEXT: v_add_i32_e32 v0, vcc, v2, v0 -; CHECK-NEXT: v_sub_i32_e32 v2, vcc, s0, v5 -; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v0, vcc -; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], s1, v0 -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s2, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s3, v3 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; CHECK-NEXT: v_mul_lo_u32 v6, s2, v1 +; CHECK-NEXT: v_mul_lo_u32 v7, s3, v1 +; CHECK-NEXT: v_mul_hi_u32 v1, s2, v1 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; CHECK-NEXT: v_mul_lo_u32 v4, s2, v4 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v7, v4 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v4, v1 +; CHECK-NEXT: v_sub_i32_e32 v4, vcc, s0, v6 +; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v1, vcc +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], s1, v1 +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s2, v4 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] -; CHECK-NEXT: v_subb_u32_e32 v0, vcc, v0, v1, vcc +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s3, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] +; CHECK-NEXT: v_subb_u32_e32 v0, vcc, v1, v0, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s3, v3 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc -; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, s2, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc +; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, s2, v4 ; CHECK-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc -; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, s2, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc +; CHECK-NEXT: v_subrev_i32_e32 v6, vcc, s2, v3 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s3, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s3, v0 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v0, v7, v5, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v6, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc ; CHECK-NEXT: s_mov_b32 s5, 0 ; CHECK-NEXT: s_branch .LBB1_3 ; CHECK-NEXT: .LBB1_2: @@ -322,9 +322,8 @@ define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) { ; CHECK-NEXT: s_cmp_lg_u32 s1, 0 ; CHECK-NEXT: s_cbranch_scc1 .LBB1_5 ; CHECK-NEXT: ; %bb.4: -; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v2 ; CHECK-NEXT: s_sub_i32 s1, 0, s2 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 ; CHECK-NEXT: v_mul_lo_u32 v1, s1, v0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/write_register.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/write_register.ll deleted file mode 100644 index 865b4c1817cdc..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/write_register.ll +++ /dev/null @@ -1,2 +0,0 @@ -; Runs original SDAG test with -global-isel -; RUN: llc -global-isel -march=amdgcn -mcpu=bonaire -enable-misched=0 -verify-machineinstrs < %S/../write_register.ll | FileCheck -enable-var-scope %S/../write_register.ll diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll index 1d1048ada8709..b3be3702b3f52 100644 --- a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll +++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll @@ -226,10 +226,10 @@ define i32 addrspace(3)* @ret_constant_cast_group_gv_gep_to_flat_to_group() #1 { attributes #0 = { argmemonly nounwind } attributes #1 = { nounwind } ;. -; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn } +; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } ; AKF_HSA: attributes #[[ATTR1]] = { nounwind } ;. -; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn } +; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } ; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll index b05054d8a03d5..163bd3ee063fb 100644 --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll @@ -928,7 +928,7 @@ attributes #4 = { nounwind sanitize_address } attributes #5 = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" } ;. -; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } +; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind speculatable willreturn memory(none) } ; AKF_HSA: attributes #[[ATTR1]] = { nounwind "target-cpu"="fiji" } ; AKF_HSA: attributes #[[ATTR2]] = { nounwind "target-cpu"="gfx900" } ; AKF_HSA: attributes #[[ATTR3]] = { nounwind } @@ -936,7 +936,7 @@ attributes #5 = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" } ; AKF_HSA: attributes #[[ATTR5]] = { nounwind sanitize_address } ; AKF_HSA: attributes #[[ATTR6:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" } ;. -; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } +; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind speculatable willreturn memory(none) } ; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" } diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll index 61ba99bc16f7d..4acf31b249590 100644 --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll @@ -642,11 +642,11 @@ attributes #0 = { nounwind readnone speculatable } attributes #1 = { nounwind } ;. -; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } +; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind speculatable willreturn memory(none) } ; AKF_HSA: attributes #[[ATTR1]] = { nounwind } ; AKF_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-stack-objects" } ;. -; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } +; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind speculatable willreturn memory(none) } ; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll index 33ad439b0d977..9d8d4a1064032 100644 --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll @@ -414,10 +414,10 @@ attributes #1 = { nounwind } ; NOHSA: attributes #[[ATTR8]] = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } ; NOHSA: attributes #[[ATTR9]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" } ;. -; AKF_CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } +; AKF_CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind speculatable willreturn memory(none) } ; AKF_CHECK: attributes #[[ATTR1]] = { nounwind } ;. -; ATTRIBUTOR_CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } +; ATTRIBUTOR_CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind speculatable willreturn memory(none) } ; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } ; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } diff --git a/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll b/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll new file mode 100644 index 0000000000000..a7584ac5cd787 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll @@ -0,0 +1,431 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX908 %s +; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefix=GFX90A %s +; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX940 %s +; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX1100 %s + +define float @syncscope_system(float* %addr, float %val) #0 { +; GFX908-LABEL: syncscope_system: +; GFX908: ; %bb.0: +; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX908-NEXT: flat_load_dword v3, v[0:1] +; GFX908-NEXT: s_mov_b64 s[4:5], 0 +; GFX908-NEXT: .LBB0_1: ; %atomicrmw.start +; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX908-NEXT: v_mov_b32_e32 v4, v3 +; GFX908-NEXT: v_add_f32_e32 v3, v4, v2 +; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX908-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc +; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX908-NEXT: buffer_wbinvl1_vol +; GFX908-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4 +; GFX908-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX908-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX908-NEXT: s_cbranch_execnz .LBB0_1 +; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX908-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX908-NEXT: v_mov_b32_e32 v0, v3 +; GFX908-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: syncscope_system: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: flat_load_dword v3, v[0:1] +; GFX90A-NEXT: s_mov_b64 s[4:5], 0 +; GFX90A-NEXT: .LBB0_1: ; %atomicrmw.start +; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v5, v3 +; GFX90A-NEXT: v_add_f32_e32 v4, v5, v2 +; GFX90A-NEXT: buffer_wbl2 +; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: buffer_invl2 +; GFX90A-NEXT: buffer_wbinvl1_vol +; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX90A-NEXT: s_cbranch_execnz .LBB0_1 +; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX90A-NEXT: v_mov_b32_e32 v0, v3 +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX940-LABEL: syncscope_system: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: flat_load_dword v3, v[0:1] +; GFX940-NEXT: s_mov_b64 s[0:1], 0 +; GFX940-NEXT: .LBB0_1: ; %atomicrmw.start +; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_mov_b32_e32 v5, v3 +; GFX940-NEXT: v_add_f32_e32 v4, v5, v2 +; GFX940-NEXT: buffer_wbl2 sc0 sc1 +; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 sc1 +; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX940-NEXT: buffer_inv sc0 sc1 +; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX940-NEXT: s_cbranch_execnz .LBB0_1 +; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX940-NEXT: v_mov_b32_e32 v0, v3 +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: syncscope_system: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: flat_load_b32 v3, v[0:1] +; GFX1100-NEXT: s_mov_b32 s0, 0 +; GFX1100-NEXT: .LBB0_1: ; %atomicrmw.start +; GFX1100-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX1100-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v4, v3 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_add_f32_e32 v3, v4, v2 +; GFX1100-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: flat_atomic_cmpswap_b32 v3, v[0:1], v[3:4] glc +; GFX1100-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX1100-NEXT: buffer_gl0_inv +; GFX1100-NEXT: buffer_gl1_inv +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v4 +; GFX1100-NEXT: s_or_b32 s0, vcc_lo, s0 +; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1100-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0 +; GFX1100-NEXT: s_cbranch_execnz .LBB0_1 +; GFX1100-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX1100-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX1100-NEXT: v_mov_b32_e32 v0, v3 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %res = atomicrmw fadd float* %addr, float %val seq_cst + ret float %res +} + +define float @syncscope_workgroup_rtn(float* %addr, float %val) #0 { +; GFX908-LABEL: syncscope_workgroup_rtn: +; GFX908: ; %bb.0: +; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX908-NEXT: flat_load_dword v3, v[0:1] +; GFX908-NEXT: s_mov_b64 s[4:5], 0 +; GFX908-NEXT: .LBB1_1: ; %atomicrmw.start +; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX908-NEXT: v_mov_b32_e32 v4, v3 +; GFX908-NEXT: v_add_f32_e32 v3, v4, v2 +; GFX908-NEXT: s_waitcnt lgkmcnt(0) +; GFX908-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc +; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX908-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4 +; GFX908-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX908-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX908-NEXT: s_cbranch_execnz .LBB1_1 +; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX908-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX908-NEXT: v_mov_b32_e32 v0, v3 +; GFX908-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: syncscope_workgroup_rtn: +; GFX90A: ; %bb.0: ; %atomicrmw.check.shared +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: s_getreg_b32 s4, hwreg(HW_REG_SH_MEM_BASES, 16, 16) +; GFX90A-NEXT: s_lshl_b32 s4, s4, 16 +; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s4, v1 +; GFX90A-NEXT: ; implicit-def: $vgpr3 +; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GFX90A-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; GFX90A-NEXT: s_cbranch_execz .LBB1_6 +; GFX90A-NEXT: ; %bb.1: ; %atomicrmw.check.private +; GFX90A-NEXT: s_getreg_b32 s6, hwreg(HW_REG_SH_MEM_BASES, 0, 16) +; GFX90A-NEXT: s_lshl_b32 s6, s6, 16 +; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s6, v1 +; GFX90A-NEXT: ; implicit-def: $vgpr3 +; GFX90A-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GFX90A-NEXT: s_xor_b64 s[6:7], exec, s[6:7] +; GFX90A-NEXT: s_cbranch_execz .LBB1_3 +; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.global +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: global_atomic_add_f32 v3, v[0:1], v2, off glc +; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX90A-NEXT: ; implicit-def: $vgpr2 +; GFX90A-NEXT: .LBB1_3: ; %Flow +; GFX90A-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] +; GFX90A-NEXT: s_cbranch_execz .LBB1_5 +; GFX90A-NEXT: ; %bb.4: ; %atomicrmw.private +; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; GFX90A-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc +; GFX90A-NEXT: buffer_load_dword v3, v0, s[0:3], 0 offen +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: v_add_f32_e32 v1, v3, v2 +; GFX90A-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen +; GFX90A-NEXT: .LBB1_5: ; %Flow1 +; GFX90A-NEXT: s_or_b64 exec, exec, s[6:7] +; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX90A-NEXT: ; implicit-def: $vgpr2 +; GFX90A-NEXT: .LBB1_6: ; %Flow2 +; GFX90A-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] +; GFX90A-NEXT: s_cbranch_execz .LBB1_8 +; GFX90A-NEXT: ; %bb.7: ; %atomicrmw.shared +; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; GFX90A-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc +; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: ds_add_rtn_f32 v3, v0, v2 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: .LBB1_8: ; %atomicrmw.phi +; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v0, v3 +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX940-LABEL: syncscope_workgroup_rtn: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 sc0 +; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: syncscope_workgroup_rtn: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: flat_atomic_add_f32 v0, v[0:1], v2 glc +; GFX1100-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX1100-NEXT: buffer_gl0_inv +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %res = atomicrmw fadd float* %addr, float %val syncscope("workgroup") seq_cst + ret float %res +} + +define void @syncscope_workgroup_nortn(float* %addr, float %val) #0 { +; GFX908-LABEL: syncscope_workgroup_nortn: +; GFX908: ; %bb.0: ; %atomicrmw.check.shared +; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX908-NEXT: s_getreg_b32 s4, hwreg(HW_REG_SH_MEM_BASES, 16, 16) +; GFX908-NEXT: s_lshl_b32 s4, s4, 16 +; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s4, v1 +; GFX908-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GFX908-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; GFX908-NEXT: s_cbranch_execnz .LBB2_3 +; GFX908-NEXT: ; %bb.1: ; %Flow2 +; GFX908-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] +; GFX908-NEXT: s_cbranch_execnz .LBB2_8 +; GFX908-NEXT: .LBB2_2: ; %atomicrmw.phi +; GFX908-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX908-NEXT: s_waitcnt vmcnt(0) +; GFX908-NEXT: s_setpc_b64 s[30:31] +; GFX908-NEXT: .LBB2_3: ; %atomicrmw.check.private +; GFX908-NEXT: s_getreg_b32 s6, hwreg(HW_REG_SH_MEM_BASES, 0, 16) +; GFX908-NEXT: s_lshl_b32 s6, s6, 16 +; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s6, v1 +; GFX908-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GFX908-NEXT: s_xor_b64 s[6:7], exec, s[6:7] +; GFX908-NEXT: s_cbranch_execz .LBB2_5 +; GFX908-NEXT: ; %bb.4: ; %atomicrmw.global +; GFX908-NEXT: s_waitcnt lgkmcnt(0) +; GFX908-NEXT: global_atomic_add_f32 v[0:1], v2, off +; GFX908-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX908-NEXT: ; implicit-def: $vgpr2 +; GFX908-NEXT: .LBB2_5: ; %Flow +; GFX908-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] +; GFX908-NEXT: s_cbranch_execz .LBB2_7 +; GFX908-NEXT: ; %bb.6: ; %atomicrmw.private +; GFX908-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; GFX908-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc +; GFX908-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen +; GFX908-NEXT: s_waitcnt vmcnt(0) +; GFX908-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX908-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen +; GFX908-NEXT: .LBB2_7: ; %Flow1 +; GFX908-NEXT: s_or_b64 exec, exec, s[6:7] +; GFX908-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX908-NEXT: ; implicit-def: $vgpr2 +; GFX908-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] +; GFX908-NEXT: s_cbranch_execz .LBB2_2 +; GFX908-NEXT: .LBB2_8: ; %atomicrmw.shared +; GFX908-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; GFX908-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc +; GFX908-NEXT: s_waitcnt lgkmcnt(0) +; GFX908-NEXT: ds_add_f32 v0, v2 +; GFX908-NEXT: s_waitcnt lgkmcnt(0) +; GFX908-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX908-NEXT: s_waitcnt vmcnt(0) +; GFX908-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: syncscope_workgroup_nortn: +; GFX90A: ; %bb.0: ; %atomicrmw.check.shared +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: s_getreg_b32 s4, hwreg(HW_REG_SH_MEM_BASES, 16, 16) +; GFX90A-NEXT: s_lshl_b32 s4, s4, 16 +; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s4, v1 +; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GFX90A-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; GFX90A-NEXT: s_cbranch_execnz .LBB2_3 +; GFX90A-NEXT: ; %bb.1: ; %Flow2 +; GFX90A-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] +; GFX90A-NEXT: s_cbranch_execnz .LBB2_8 +; GFX90A-NEXT: .LBB2_2: ; %atomicrmw.phi +; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; GFX90A-NEXT: .LBB2_3: ; %atomicrmw.check.private +; GFX90A-NEXT: s_getreg_b32 s6, hwreg(HW_REG_SH_MEM_BASES, 0, 16) +; GFX90A-NEXT: s_lshl_b32 s6, s6, 16 +; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s6, v1 +; GFX90A-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GFX90A-NEXT: s_xor_b64 s[6:7], exec, s[6:7] +; GFX90A-NEXT: s_cbranch_execz .LBB2_5 +; GFX90A-NEXT: ; %bb.4: ; %atomicrmw.global +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: global_atomic_add_f32 v[0:1], v2, off +; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX90A-NEXT: ; implicit-def: $vgpr2 +; GFX90A-NEXT: .LBB2_5: ; %Flow +; GFX90A-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7] +; GFX90A-NEXT: s_cbranch_execz .LBB2_7 +; GFX90A-NEXT: ; %bb.6: ; %atomicrmw.private +; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; GFX90A-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc +; GFX90A-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX90A-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen +; GFX90A-NEXT: .LBB2_7: ; %Flow1 +; GFX90A-NEXT: s_or_b64 exec, exec, s[6:7] +; GFX90A-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GFX90A-NEXT: ; implicit-def: $vgpr2 +; GFX90A-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] +; GFX90A-NEXT: s_cbranch_execz .LBB2_2 +; GFX90A-NEXT: .LBB2_8: ; %atomicrmw.shared +; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; GFX90A-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: ds_add_f32 v0, v2 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX90A-NEXT: s_waitcnt vmcnt(0) +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX940-LABEL: syncscope_workgroup_nortn: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: flat_atomic_add_f32 v[0:1], v2 +; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: syncscope_workgroup_nortn: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: flat_atomic_add_f32 v[0:1], v2 +; GFX1100-NEXT: s_waitcnt lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: buffer_gl0_inv +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %res = atomicrmw fadd float* %addr, float %val syncscope("workgroup") seq_cst + ret void +} + +define float @no_unsafe(float* %addr, float %val) { +; GFX908-LABEL: no_unsafe: +; GFX908: ; %bb.0: +; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX908-NEXT: flat_load_dword v3, v[0:1] +; GFX908-NEXT: s_mov_b64 s[4:5], 0 +; GFX908-NEXT: .LBB3_1: ; %atomicrmw.start +; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX908-NEXT: v_mov_b32_e32 v4, v3 +; GFX908-NEXT: v_add_f32_e32 v3, v4, v2 +; GFX908-NEXT: s_waitcnt lgkmcnt(0) +; GFX908-NEXT: flat_atomic_cmpswap v3, v[0:1], v[3:4] glc +; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX908-NEXT: v_cmp_eq_u32_e32 vcc, v3, v4 +; GFX908-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX908-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX908-NEXT: s_cbranch_execnz .LBB3_1 +; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX908-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX908-NEXT: v_mov_b32_e32 v0, v3 +; GFX908-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-LABEL: no_unsafe: +; GFX90A: ; %bb.0: +; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-NEXT: flat_load_dword v3, v[0:1] +; GFX90A-NEXT: s_mov_b64 s[4:5], 0 +; GFX90A-NEXT: .LBB3_1: ; %atomicrmw.start +; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_mov_b32_e32 v5, v3 +; GFX90A-NEXT: v_add_f32_e32 v4, v5, v2 +; GFX90A-NEXT: s_waitcnt lgkmcnt(0) +; GFX90A-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc +; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5] +; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5] +; GFX90A-NEXT: s_cbranch_execnz .LBB3_1 +; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX90A-NEXT: v_mov_b32_e32 v0, v3 +; GFX90A-NEXT: s_setpc_b64 s[30:31] +; +; GFX940-LABEL: no_unsafe: +; GFX940: ; %bb.0: +; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX940-NEXT: flat_load_dword v3, v[0:1] +; GFX940-NEXT: s_mov_b64 s[0:1], 0 +; GFX940-NEXT: .LBB3_1: ; %atomicrmw.start +; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_mov_b32_e32 v5, v3 +; GFX940-NEXT: v_add_f32_e32 v4, v5, v2 +; GFX940-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NEXT: flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0 +; GFX940-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5 +; GFX940-NEXT: s_or_b64 s[0:1], vcc, s[0:1] +; GFX940-NEXT: s_andn2_b64 exec, exec, s[0:1] +; GFX940-NEXT: s_cbranch_execnz .LBB3_1 +; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX940-NEXT: s_or_b64 exec, exec, s[0:1] +; GFX940-NEXT: v_mov_b32_e32 v0, v3 +; GFX940-NEXT: s_setpc_b64 s[30:31] +; +; GFX1100-LABEL: no_unsafe: +; GFX1100: ; %bb.0: +; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: flat_load_b32 v3, v[0:1] +; GFX1100-NEXT: s_mov_b32 s0, 0 +; GFX1100-NEXT: .LBB3_1: ; %atomicrmw.start +; GFX1100-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX1100-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX1100-NEXT: v_mov_b32_e32 v4, v3 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_add_f32_e32 v3, v4, v2 +; GFX1100-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX1100-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1100-NEXT: flat_atomic_cmpswap_b32 v3, v[0:1], v[3:4] glc +; GFX1100-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX1100-NEXT: buffer_gl0_inv +; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v4 +; GFX1100-NEXT: s_or_b32 s0, vcc_lo, s0 +; GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1100-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0 +; GFX1100-NEXT: s_cbranch_execnz .LBB3_1 +; GFX1100-NEXT: ; %bb.2: ; %atomicrmw.end +; GFX1100-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; GFX1100-NEXT: v_mov_b32_e32 v0, v3 +; GFX1100-NEXT: s_setpc_b64 s[30:31] + %res = atomicrmw fadd float* %addr, float %val syncscope("workgroup") seq_cst + ret float %res +} + +attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" } diff --git a/llvm/test/CodeGen/AMDGPU/exec-mask-opt-cannot-create-empty-or-backward-segment.ll b/llvm/test/CodeGen/AMDGPU/exec-mask-opt-cannot-create-empty-or-backward-segment.ll new file mode 100644 index 0000000000000..8c32b6c8f1b0b --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/exec-mask-opt-cannot-create-empty-or-backward-segment.ll @@ -0,0 +1,176 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 < %s | FileCheck %s + +define amdgpu_kernel void @cannot_create_empty_or_backwards_segment(i1 %arg, i1 %arg1, i1 %arg2, i1 %arg3, i1 %arg4, i1 %arg5) { +; CHECK-LABEL: cannot_create_empty_or_backwards_segment: +; CHECK: ; %bb.0: ; %bb +; CHECK-NEXT: s_mov_b64 s[26:27], s[2:3] +; CHECK-NEXT: s_mov_b64 s[24:25], s[0:1] +; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; CHECK-NEXT: s_add_u32 s24, s24, s7 +; CHECK-NEXT: s_addc_u32 s25, s25, 0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_bitcmp1_b32 s0, 0 +; CHECK-NEXT: s_cselect_b64 s[14:15], -1, 0 +; CHECK-NEXT: s_bitcmp1_b32 s0, 8 +; CHECK-NEXT: s_cselect_b64 s[8:9], -1, 0 +; CHECK-NEXT: s_bitcmp1_b32 s0, 16 +; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0 +; CHECK-NEXT: s_bitcmp1_b32 s0, 24 +; CHECK-NEXT: s_cselect_b64 s[6:7], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[2:3] +; CHECK-NEXT: s_xor_b64 s[2:3], s[6:7], -1 +; CHECK-NEXT: s_bitcmp1_b32 s1, 0 +; CHECK-NEXT: s_cselect_b64 s[10:11], -1, 0 +; CHECK-NEXT: s_bitcmp1_b32 s1, 8 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[14:15] +; CHECK-NEXT: s_cselect_b64 s[12:13], -1, 0 +; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v1 +; CHECK-NEXT: s_and_b64 s[2:3], exec, s[2:3] +; CHECK-NEXT: s_and_b64 s[4:5], exec, s[8:9] +; CHECK-NEXT: v_mov_b32_e32 v1, 0 +; CHECK-NEXT: s_branch .LBB0_3 +; CHECK-NEXT: .LBB0_1: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: s_mov_b64 s[18:19], 0 +; CHECK-NEXT: s_mov_b64 s[20:21], -1 +; CHECK-NEXT: s_mov_b64 s[16:17], -1 +; CHECK-NEXT: s_mov_b64 s[22:23], -1 +; CHECK-NEXT: .LBB0_2: ; %Flow7 +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: s_and_b64 vcc, exec, s[22:23] +; CHECK-NEXT: s_cbranch_vccnz .LBB0_12 +; CHECK-NEXT: .LBB0_3: ; %bb7 +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: s_and_b64 vcc, exec, s[0:1] +; CHECK-NEXT: s_cbranch_vccnz .LBB0_1 +; CHECK-NEXT: ; %bb.4: ; %bb8 +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: s_mov_b64 vcc, s[2:3] +; CHECK-NEXT: s_cbranch_vccz .LBB0_6 +; CHECK-NEXT: ; %bb.5: ; %bb9 +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: s_mov_b64 s[16:17], 0 +; CHECK-NEXT: s_mov_b64 s[18:19], -1 +; CHECK-NEXT: s_mov_b64 s[22:23], s[8:9] +; CHECK-NEXT: s_cbranch_execz .LBB0_7 +; CHECK-NEXT: s_branch .LBB0_8 +; CHECK-NEXT: .LBB0_6: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: s_mov_b64 s[16:17], -1 +; CHECK-NEXT: s_mov_b64 s[18:19], 0 +; CHECK-NEXT: s_mov_b64 s[22:23], 0 +; CHECK-NEXT: .LBB0_7: ; %bb10 +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: s_mov_b64 s[18:19], -1 +; CHECK-NEXT: s_mov_b64 s[16:17], 0 +; CHECK-NEXT: s_mov_b64 s[22:23], s[12:13] +; CHECK-NEXT: .LBB0_8: ; %Flow9 +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: s_mov_b64 s[20:21], -1 +; CHECK-NEXT: s_andn2_b64 vcc, exec, s[22:23] +; CHECK-NEXT: s_mov_b64 s[22:23], -1 +; CHECK-NEXT: s_cbranch_vccnz .LBB0_2 +; CHECK-NEXT: ; %bb.9: ; %bb13 +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: s_mov_b64 vcc, s[4:5] +; CHECK-NEXT: s_cbranch_vccz .LBB0_11 +; CHECK-NEXT: ; %bb.10: ; %bb16 +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: s_mov_b64 s[16:17], 0 +; CHECK-NEXT: s_mov_b64 s[20:21], -1 +; CHECK-NEXT: s_mov_b64 s[22:23], s[10:11] +; CHECK-NEXT: s_mov_b64 s[18:19], s[16:17] +; CHECK-NEXT: s_branch .LBB0_2 +; CHECK-NEXT: .LBB0_11: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: s_mov_b64 s[22:23], -1 +; CHECK-NEXT: s_mov_b64 s[20:21], 0 +; CHECK-NEXT: ; implicit-def: $sgpr16_sgpr17 +; CHECK-NEXT: s_mov_b64 s[18:19], s[16:17] +; CHECK-NEXT: s_branch .LBB0_2 +; CHECK-NEXT: .LBB0_12: ; %loop.exit.guard6 +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: s_xor_b64 s[14:15], s[20:21], -1 +; CHECK-NEXT: s_mov_b64 s[20:21], -1 +; CHECK-NEXT: s_and_b64 vcc, exec, s[14:15] +; CHECK-NEXT: s_cbranch_vccz .LBB0_16 +; CHECK-NEXT: ; %bb.13: ; %bb14 +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: s_andn2_b64 vcc, exec, s[14:15] +; CHECK-NEXT: s_cbranch_vccnz .LBB0_15 +; CHECK-NEXT: ; %bb.14: ; %bb15 +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: buffer_store_dword v1, off, s[24:27], 0 offset:4 +; CHECK-NEXT: buffer_store_dword v1, off, s[24:27], 0 +; CHECK-NEXT: .LBB0_15: ; %Flow +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: s_mov_b64 s[20:21], 0 +; CHECK-NEXT: .LBB0_16: ; %Flow13 +; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: s_andn2_b64 vcc, exec, s[20:21] +; CHECK-NEXT: s_cbranch_vccnz .LBB0_3 +; CHECK-NEXT: ; %bb.17: ; %loop.exit.guard +; CHECK-NEXT: s_and_b64 vcc, exec, s[16:17] +; CHECK-NEXT: s_cbranch_vccnz .LBB0_22 +; CHECK-NEXT: ; %bb.18: ; %loop.exit.guard5 +; CHECK-NEXT: s_and_b64 vcc, exec, s[18:19] +; CHECK-NEXT: s_cbranch_vccnz .LBB0_22 +; CHECK-NEXT: ; %bb.19: ; %bb17 +; CHECK-NEXT: s_and_b64 vcc, exec, s[6:7] +; CHECK-NEXT: s_cbranch_vccz .LBB0_21 +; CHECK-NEXT: ; %bb.20: ; %bb19 +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0 +; CHECK-NEXT: s_cbranch_vccz .LBB0_22 +; CHECK-NEXT: .LBB0_21: ; %bb21 +; CHECK-NEXT: s_endpgm +; CHECK-NEXT: .LBB0_22: ; %UnifiedUnreachableBlock +bb: + br label %bb6 + +bb6: ; preds = %bb15, %bb14, %bb + br label %bb7 + +bb7: ; preds = %bb16, %bb6 + br i1 %arg2, label %bb8, label %bb20 + +bb8: ; preds = %bb7 + br i1 %arg3, label %bb10, label %bb9 + +bb9: ; preds = %bb8 + br i1 %arg1, label %bb13, label %bb12 + +bb10: ; preds = %bb8 + br i1 %arg5, label %bb11, label %bb12 + +bb11: ; preds = %bb10 + br label %bb13 + +bb12: ; preds = %bb10, %bb9 + unreachable + +bb13: ; preds = %bb11, %bb9 + br i1 %arg1, label %bb16, label %bb14 + +bb14: ; preds = %bb13 + br i1 %arg, label %bb15, label %bb6 + +bb15: ; preds = %bb14 + store double 0.000000e+00, ptr addrspace(5) null, align 2147483648 + br label %bb6 + +bb16: ; preds = %bb13 + br i1 %arg4, label %bb17, label %bb7 + +bb17: ; preds = %bb16 + br i1 %arg3, label %bb19, label %bb18 + +bb18: ; preds = %bb17 + ret void + +bb19: ; preds = %bb17 + br i1 %arg, label %bb20, label %bb21 + +bb20: ; preds = %bb19, %bb7 + unreachable + +bb21: ; preds = %bb19 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/inline-attr.ll b/llvm/test/CodeGen/AMDGPU/inline-attr.ll index 68bc38bd9f4a0..222a8a26f7f82 100644 --- a/llvm/test/CodeGen/AMDGPU/inline-attr.ll +++ b/llvm/test/CodeGen/AMDGPU/inline-attr.ll @@ -6,14 +6,14 @@ ; GCN: define amdgpu_kernel void @caller(float addrspace(1)* nocapture %p) local_unnamed_addr #1 { ; GCN: %mul.i = fmul float %load, 1.500000e+01 -; UNSAFE: attributes #0 = { mustprogress nofree norecurse nosync nounwind readnone willreturn "unsafe-fp-math"="true" } -; UNSAFE: attributes #1 = { argmemonly mustprogress nofree norecurse nosync nounwind willreturn "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="true" } +; UNSAFE: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "unsafe-fp-math"="true" } +; UNSAFE: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="true" } -; NOINFS: attributes #0 = { mustprogress nofree norecurse nosync nounwind readnone willreturn "no-infs-fp-math"="true" } -; NOINFS: attributes #1 = { argmemonly mustprogress nofree norecurse nosync nounwind willreturn "less-precise-fpmad"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="false" "unsafe-fp-math"="false" } +; NOINFS: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "no-infs-fp-math"="true" } +; NOINFS: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) "less-precise-fpmad"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="false" "unsafe-fp-math"="false" } -; NONANS: attributes #0 = { mustprogress nofree norecurse nosync nounwind readnone willreturn "no-nans-fp-math"="true" } -; NONANS: attributes #1 = { argmemonly mustprogress nofree norecurse nosync nounwind willreturn "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="true" "unsafe-fp-math"="false" } +; NONANS: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "no-nans-fp-math"="true" } +; NONANS: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="true" "unsafe-fp-math"="false" } define float @foo(float %x) #0 { entry: diff --git a/llvm/test/CodeGen/AMDGPU/lds-size.ll b/llvm/test/CodeGen/AMDGPU/lds-size.ll index 4a94a95f081bc..313e4d0e07426 100644 --- a/llvm/test/CodeGen/AMDGPU/lds-size.ll +++ b/llvm/test/CodeGen/AMDGPU/lds-size.ll @@ -1,4 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 < %s | FileCheck -check-prefix=ALL -check-prefix=HSA %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 < %s | FileCheck -check-prefix=ALL -check-prefix=HSA %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 < %s | FileCheck -check-prefix=ALL -check-prefix=HSA %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=ALL -check-prefix=EG %s ; This test makes sure we do not double count global values when they are diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll index 1ad00dd639385..23792c6df0bc8 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll @@ -1,7 +1,6 @@ ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CIVI %s ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -stop-before=machine-scheduler < %s | FileCheck -enable-var-scope -check-prefixes=MIR %s declare i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* nocapture, i32, i32, i32, i1) #2 declare i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #2 @@ -19,18 +18,12 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] -; MIR-LABEL: @lds_atomic_inc_ret_i32 -; MIR: DS_INC_RTN_U32 {{.*}} :: (load store (s32) on %{{.*}}, !noalias !{{[0-9]+}}, addrspace 3) define amdgpu_kernel void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 { - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false), !noalias !0 + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false) store i32 %result, i32 addrspace(1)* %out ret void } -!0 = !{!1} -!1 = distinct !{!1, !2} -!2 = distinct !{!2} - ; GCN-LABEL: {{^}}lds_atomic_inc_ret_i32_offset: ; CIVI-DAG: s_mov_b32 m0 ; GFX9-NOT: m0 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.append.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.append.ll index 2da96c4480608..3005437edd73e 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.append.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.append.ll @@ -1,7 +1,11 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,NOTGFX9,GCN-SDAG %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9,CIPLUS-SDAG,GCN-SDAG %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9,CIPLUS-SDAG,GCN-SDAG %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,GFX9,CIPLUS-SDAG,GCN-SDAG %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,NOTGFX9,GCN-SDAG %s +; XUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,NOTGFX9,GCN-GISEL %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9,CIPLUS-SDAG,GCN-SDAG %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9,CIPLUS-GISEL,GCN-GISEL %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9,CIPLUS-SDAG,GCN-SDAG %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9,CIPLUS-GISEL,GCN-GISEL %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,GFX9,CIPLUS-SDAG,GCN-SDAG %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,GFX9,CIPLUS-GISEL,GCN-GISEL %s ; GCN-LABEL: {{^}}ds_append_lds: ; GCN: s_load_dword [[PTR:s[0-9]+]] diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.consume.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.consume.ll index 40f20bc795222..59c6549ad6ad9 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.consume.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.consume.ll @@ -1,7 +1,11 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,NOTGFX9 %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9 %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9 %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,GFX9 %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,NOTGFX9,GCN-SDAG %s +; XUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,NOTGFX9,GCN-GISEL %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9,GCN-SDAG %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9,GCN-GISEL %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9,GCN-SDAG %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9,GCN-GISEL %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,GFX9,GCN-SDAG %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,GFX9,GCN-GISEL %s ; GCN-LABEL: {{^}}ds_consume_lds: ; GCN: s_load_dword [[PTR:s[0-9]+]] @@ -52,9 +56,10 @@ define amdgpu_kernel void @ds_consume_no_fold_offset_si(i32 addrspace(3)* addrsp ; GCN: s_load_dword [[PTR:s[0-9]+]] ; SI: s_bitset1_b32 [[PTR]], 16 -; CIPLUS: s_add_i32 [[PTR]], [[PTR]], 0x10000 +; CIPLUS-SDAG: s_add_i32 [[PTR]], [[PTR]], 0x10000 +; CIPLUS-GISEL: s_add_u32 [[PTR]], [[PTR]], 0x10000 -; GCN: s_mov_b32 m0, [[PTR]] +; GCN-SDAG: s_mov_b32 m0, [[PTR]] ; GCN: ds_consume [[RESULT:v[0-9]+]]{{$}} ; GCN-NOT: buffer_wbinvl1 ; GCN: {{.*}}store{{.*}} [[RESULT]] @@ -66,8 +71,9 @@ define amdgpu_kernel void @ds_consume_lds_over_max_offset(i32 addrspace(3)* %lds } ; GCN-LABEL: {{^}}ds_consume_lds_vgpr_addr: -; GCN: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0 -; GCN: s_mov_b32 m0, [[READLANE]] +; GCN-SDAG: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0 +; GCN-SDAG: s_mov_b32 m0, [[READLANE]] +; GCN-GISEL: v_readfirstlane_b32 m0, v0 ; GCN: ds_consume [[RESULT:v[0-9]+]]{{$}} ; GCN-NOT: buffer_wbinvl1 ; GCN: {{.*}}store{{.*}} [[RESULT]] diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll index 8ca8148e88206..d7a4ba9dc5eb2 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll @@ -1,13 +1,21 @@ -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG,GFX10 %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -asm-verbose=0 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG,GFX10 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG,GFX10 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL,GFX10 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG,GFX10 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL,GFX10 %s ; Make sure the op is emitted bundled with a waitcnt with and without the retry loop, and the bundle is not removed by ExpandPostRAPseudos. -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -stop-after=postrapseudos -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=MIR %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=postrapseudos -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=MIR %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -stop-after=postrapseudos -verify-machineinstrs < %s | FileCheck -check-prefix=MIR %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -stop-after=postrapseudos -verify-machineinstrs < %s | FileCheck -check-prefix=MIR %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=postrapseudos -verify-machineinstrs < %s | FileCheck -check-prefix=MIR %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=postrapseudos -verify-machineinstrs < %s | FileCheck -check-prefix=MIR %s ; Minimum offset diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll index f656af44746fb..f87a3eaad63a9 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll @@ -1,9 +1,15 @@ -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -asm-verbose=0 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL %s ; Minimum offset ; GCN-LABEL: {{^}}gws_init_offset0: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.br.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.br.ll index 18f187a0bb71b..da64f7350a921 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.br.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.br.ll @@ -1,9 +1,15 @@ -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP %s ; GCN-LABEL: {{^}}gws_sema_br_offset0: ; NOLOOP-DAG: s_load_{{dword|b32}} [[BAR_NUM:s[0-9]+]] diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.v.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.v.ll index 14c6a478d8a52..215c394409ac0 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.v.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.v.ll @@ -1,9 +1,15 @@ -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %s -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,LOOP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,NOLOOP %s ; GCN-LABEL: {{^}}gws_sema_v_offset0: ; NOLOOP-DAG: s_mov_b32 m0, 0{{$}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx10.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx10.ll index 4296455a018db..6a9d10fbfb3da 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx10.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx10.ll @@ -1,5 +1,7 @@ -; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN %s -; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s ; GCN-LABEL: {{^}}ds_ordered_add: ; GCN-DAG: v_{{(dual_)?}}mov_b32{{(_e32)?}} v[[INCR:[0-9]+]], 31 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll index 4009c5a63449f..76bd2270a47bf 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll @@ -1,7 +1,11 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s +; RUN: llc -global-isel=0 -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s +; RUN: llc -global-isel=1 -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s ; FUNC-LABEL: {{^}}ds_ordered_add: ; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll index 79933d2159d82..76266919b5ac9 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll @@ -1,7 +1,11 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s +; RUN: llc -global-isel=0 -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s +; RUN: llc -global-isel=1 -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s ; FUNC-LABEL: {{^}}ds_ordered_swap: ; GCN: s_mov_b32 m0, s0 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll index aaf73ea8d6c99..f7f1f96f4d3da 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll @@ -1,6 +1,9 @@ -; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck %s --check-prefix=GCN -; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}full_mask: ; GCN: s_mov_b64 exec, -1 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.wave32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.wave32.ll index 1355274ae9ead..4098b2b337232 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.wave32.ll @@ -1,7 +1,11 @@ -; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1032 %s -; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1064 %s -; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1032 %s -; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1064 %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1032 %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1032 %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1064 %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1032 %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1032 %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1064 %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1064 %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1064 %s ; GCN-LABEL: {{^}}test_init_exec: ; GFX1032: s_mov_b32 exec_lo, 0x12345 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll index 862dfe7154fd3..ca48ce8a08c4a 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll @@ -1,5 +1,7 @@ -; RUN: llc -amdgpu-load-store-vectorizer=0 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX10 %s -; RUN: llc -amdgpu-load-store-vectorizer=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX11 %s +; RUN: llc -global-isel=0 -amdgpu-load-store-vectorizer=0 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX10 %s +; RUN: llc -global-isel=1 -amdgpu-load-store-vectorizer=0 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX10 %s +; RUN: llc -global-isel=0 -amdgpu-load-store-vectorizer=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX11 %s +; RUN: llc -global-isel=1 -amdgpu-load-store-vectorizer=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX11 %s declare i32 @llvm.amdgcn.permlane16(i32, i32, i32, i32, i1, i1) #1 declare i32 @llvm.amdgcn.permlanex16(i32, i32, i32, i32, i1, i1) #1 @@ -27,9 +29,8 @@ define amdgpu_kernel void @v_permlane16_b32_vii(i32 addrspace(1)* %out, i32 %src ; GCN-LABEL: {{^}}v_permlane16_b32_vll: ; FIXME-GFX10PLUS: It is allowed to have both immediates as literals ; GFX10PLUS-DAG: s_movk_i32 [[SRC1:s[0-9]+]], 0x1234 -; GFX10PLUS-DAG: s_mov_b32 [[SRC2:s[0-9]+]], 0xc1d1 ; GFX10PLUS-NOT: v_readfirstlane_b32 -; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], [[SRC2]]{{$}} +; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], 0xc1d1{{$}} define amdgpu_kernel void @v_permlane16_b32_vll(i32 addrspace(1)* %out, i32 %src0) #1 { %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 4660, i32 49617, i1 0, i1 0) store i32 %v, i32 addrspace(1)* %out @@ -124,9 +125,8 @@ define amdgpu_kernel void @v_permlanex16_b32_vii(i32 addrspace(1)* %out, i32 %sr ; GCN-LABEL: {{^}}v_permlanex16_b32_vll: ; FIXME-GFX10PLUS: It is allowed to have both immediates as literals ; GFX10PLUS-DAG: s_movk_i32 [[SRC1:s[0-9]+]], 0x1234 -; GFX10PLUS-DAG: s_mov_b32 [[SRC2:s[0-9]+]], 0xc1d1 ; GFX10PLUS-NOT: v_readfirstlane_b32 -; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], [[SRC2]]{{$}} +; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], 0xc1d1{{$}} define amdgpu_kernel void @v_permlanex16_b32_vll(i32 addrspace(1)* %out, i32 %src0) #1 { %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 4660, i32 49617, i1 0, i1 0) store i32 %v, i32 addrspace(1)* %out diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll index db6ee7bd0aeb6..929f935f69108 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll @@ -7,7 +7,9 @@ declare double @llvm.sqrt.f64(double) #0 declare float @llvm.sqrt.f32(float) #0 ; FUNC-LABEL: {{^}}rcp_undef_f32: -; SI-NOT: v_rcp_f32 +; SI: v_mov_b32_e32 [[NAN:v[0-9]+]], 0x7fc00000 +; SI-NOT: [[NAN]] +; SI: buffer_store_dword [[NAN]] define amdgpu_kernel void @rcp_undef_f32(float addrspace(1)* %out) #1 { %rcp = call float @llvm.amdgcn.rcp.f32(float undef) store float %rcp, float addrspace(1)* %out, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll index c04ab319cc8c5..e6b6bd62edf4c 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll @@ -12,7 +12,7 @@ define amdgpu_ps float @test1(i32 inreg %idx0, i32 inreg %idx1) { ; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 idxen ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_add_f32_e32 v0, v0, v1 -; CHECK-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec +; CHECK-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; CHECK-NEXT: ; return to shader part epilog main_body: %src0 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0) @@ -33,7 +33,7 @@ define amdgpu_ps float @test2(i32 inreg %idx0, i32 inreg %idx1) { ; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 idxen ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_add_f32_e32 v0, v0, v1 -; CHECK-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec +; CHECK-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; CHECK-NEXT: ; return to shader part epilog main_body: %src0 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0) @@ -58,7 +58,7 @@ define amdgpu_ps float @test_softwqm1(i32 inreg %idx0, i32 inreg %idx1) { ; CHECK-NEXT: v_add_f32_e32 v1, v1, v2 ; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 idxen ; CHECK-NEXT: v_add_f32_e32 v0, v1, v1 -; CHECK-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec +; CHECK-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: ; return to shader part epilog main_body: @@ -124,7 +124,7 @@ define amdgpu_ps float @test_wwm1(i32 inreg %idx0, i32 inreg %idx1) { ; CHECK-NEXT: s_mov_b64 exec, s[2:3] ; CHECK-NEXT: v_mov_b32_e32 v0, v1 ; CHECK-NEXT: v_add_f32_e32 v0, v0, v0 -; CHECK-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec +; CHECK-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; CHECK-NEXT: ; return to shader part epilog main_body: %src0 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0) @@ -156,7 +156,7 @@ define amdgpu_ps float @test_strict_wwm1(i32 inreg %idx0, i32 inreg %idx1) { ; CHECK-NEXT: s_mov_b64 exec, s[2:3] ; CHECK-NEXT: v_mov_b32_e32 v0, v1 ; CHECK-NEXT: v_add_f32_e32 v0, v0, v0 -; CHECK-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec +; CHECK-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; CHECK-NEXT: ; return to shader part epilog main_body: %src0 = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i32 0, i32 0) @@ -191,7 +191,7 @@ define amdgpu_ps float @test_control_flow_0(<8 x i32> inreg %rsrc, <4 x i32> inr ; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 idxen ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_add_f32_e32 v2, v0, v1 -; CHECK-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $exec killed $exec +; CHECK-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $exec ; CHECK-NEXT: .LBB6_4: ; %END ; CHECK-NEXT: s_or_b64 exec, exec, s[0:1] ; CHECK-NEXT: v_mov_b32_e32 v0, v2 @@ -246,7 +246,7 @@ define amdgpu_ps float @test_control_flow_1(<8 x i32> inreg %rsrc, <4 x i32> inr ; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 idxen ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_add_f32_e32 v2, v0, v1 -; CHECK-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $exec killed $exec +; CHECK-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $exec ; CHECK-NEXT: .LBB7_4: ; %END ; CHECK-NEXT: s_or_b64 exec, exec, s[0:1] ; CHECK-NEXT: s_and_b64 exec, exec, s[14:15] diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll index 768165368fb03..182275b687a68 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll @@ -1,6 +1,9 @@ -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CHECK,WAVE64 %s -; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CHECK,WAVE32 %s -; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CHECK,WAVE32 %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,WAVE64 %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,WAVE64 %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,WAVE32 %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,WAVE32 %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,WAVE32 %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,WAVE32 %s ;CHECK-LABEL: {{^}}ret: ;CHECK: v_cmp_eq_u32_e32 [[CMP:[^,]+]], v0, v1 diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll b/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll index 0917022f84080..f46aa7736108c 100644 --- a/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll +++ b/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll @@ -1,7 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=VI %s -; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=CI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,SDAG-GFX9 %s +; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,SDAG-VI %s +; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,SDAG-CI %s + +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GISEL-GFX9 %s +; RUN: llc -global-isel -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,GISEL-VI %s +; RUN: llc -global-isel -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,GISEL-CI %s define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo(half %src0, half %src1, half %src2) #0 { ; GFX9-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo: @@ -10,23 +14,45 @@ define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo(half %src0, half %s ; GFX9-NEXT: v_mad_mixhi_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_mac_f32_e32 v2, v0, v1 -; VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: s_setpc_b64 s[30:31] -; -; CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mac_f32_e32 v2, v0, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v0 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v0 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.ext = fpext half %src2 to float @@ -45,25 +71,51 @@ define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo(half %src0, half %s ; GFX9-NEXT: v_mov_b32_e32 v0, v3 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_mac_f32_e32 v2, v0, v1 -; VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: v_or_b32_e32 v0, 0x3c00, v0 -; VI-NEXT: s_setpc_b64 s[30:31] -; -; CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mac_f32_e32 v2, v0, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v0 -; CI-NEXT: v_mov_b32_e32 v0, 1.0 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: v_or_b32_e32 v0, 0x3c00, v0 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v0 +; SDAG-CI-NEXT: v_mov_b32_e32 v0, 1.0 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-VI-NEXT: s_movk_i32 s4, 0x3c00 +; GISEL-VI-NEXT: s_bfe_u32 s4, s4, 0x100000 +; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: v_or_b32_e32 v0, s4, v0 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_constlo: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GISEL-CI-NEXT: v_mov_b32_e32 v0, 0x3c00 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.ext = fpext half %src2 to float @@ -81,25 +133,49 @@ define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo(half %src0, half %src ; GFX9-NEXT: v_mov_b32_e32 v0, v3 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_mac_f32_e32 v2, v0, v1 -; VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: s_setpc_b64 s[30:31] -; -; CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mac_f32_e32 v2, v0, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v0 -; CI-NEXT: v_mov_b32_e32 v0, v3 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v0 +; SDAG-CI-NEXT: v_mov_b32_e32 v0, v3 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GISEL-CI-NEXT: v_mov_b32_e32 v0, v3 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.ext = fpext half %src2 to float @@ -111,30 +187,62 @@ define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_reglo(half %src0, half %src } define i32 @v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack(half %src0, half %src1, half %src2) #0 { -; GFX9-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] -; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX9-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack: +; SDAG-GFX9: ; %bb.0: +; SDAG-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] +; SDAG-GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; SDAG-GFX9-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; SDAG-CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX9-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack: +; GISEL-GFX9: ; %bb.0: +; GISEL-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] +; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, 16 +; GISEL-GFX9-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_mac_f32_e32 v2, v0, v1 -; VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: s_setpc_b64 s[30:31] -; -; CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mac_f32_e32 v2, v0, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v2 -; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; CI-NEXT: s_setpc_b64 s[30:31] +; GISEL-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; GISEL-CI-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GISEL-CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.ext = fpext half %src2 to float @@ -147,30 +255,62 @@ define i32 @v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack(half %src0, half %src1, ha } define i32 @v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext(half %src0, half %src1, half %src2) #0 { -; GFX9-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] -; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX9-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext: +; SDAG-GFX9: ; %bb.0: +; SDAG-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] +; SDAG-GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; SDAG-GFX9-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_mac_f32_e32 v2, v0, v1 -; VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: s_setpc_b64 s[30:31] -; -; CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mac_f32_e32 v2, v0, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v2 -; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; SDAG-CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX9-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext: +; GISEL-GFX9: ; %bb.0: +; GISEL-GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX9-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] +; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, 16 +; GISEL-GFX9-NEXT: v_lshlrev_b32_sdwa v0, v1, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v1, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_intpack_sext: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; GISEL-CI-NEXT: v_bfe_i32 v0, v0, 0, 16 +; GISEL-CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.ext = fpext half %src2 to float @@ -190,23 +330,45 @@ define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt(half % ; GFX9-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp -; VI-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: s_setpc_b64 s[30:31] -; -; CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v0 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v0 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-VI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-VI-NEXT: v_mov_b32_e32 v1, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v0 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.ext = fpext half %src2 to float @@ -225,23 +387,53 @@ define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt(half ; GFX9-NEXT: v_mad_mixhi_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_mac_f32_e32 v2, v0, v1 -; VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: s_setpc_b64 s[30:31] -; -; CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mac_f32_e32 v2, v0, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v2 -; CI-NEXT: v_cvt_f32_f16_e64 v1, v0 clamp -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v2 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v1, v0 clamp +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v0, v2 clamp +; GISEL-VI-NEXT: v_mov_b32_e32 v1, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, 0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_max_f32_e32 v0, v0, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, 1.0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_min_f32_e32 v0, v0, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v0 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.ext = fpext half %src2 to float @@ -263,30 +455,67 @@ define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi ; GFX9-NEXT: v_mad_mixhi_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_mac_f32_e32 v2, v0, v1 -; VI-NEXT: v_cvt_f16_f32_e32 v0, v2 -; VI-NEXT: flat_store_short v[0:1], v0 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_max_f16_sdwa v0, v0, v0 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; VI-NEXT: s_setpc_b64 s[30:31] -; -; CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mac_f32_e32 v2, v0, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v2 -; CI-NEXT: s_mov_b32 s7, 0xf000 -; CI-NEXT: s_mov_b32 s6, -1 -; CI-NEXT: v_cvt_f32_f16_e64 v1, v0 clamp -; CI-NEXT: buffer_store_short v0, off, s[4:7], 0 -; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; SDAG-VI-NEXT: flat_store_short v[0:1], v0 +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) +; SDAG-VI-NEXT: v_max_f16_sdwa v0, v0, v0 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; SDAG-CI-NEXT: s_mov_b32 s7, 0xf000 +; SDAG-CI-NEXT: s_mov_b32 s6, -1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v1, v0 clamp +; SDAG-CI-NEXT: buffer_store_short v0, off, s[4:7], 0 +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; GISEL-VI-NEXT: flat_store_short v[0:1], v0 +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) +; GISEL-VI-NEXT: v_max_f16_e64 v0, v0, v0 clamp +; GISEL-VI-NEXT: v_mov_b32_e32 v1, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi_use: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: s_mov_b32 s6, -1 +; GISEL-CI-NEXT: s_mov_b32 s7, 0xf000 +; GISEL-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, 0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GISEL-CI-NEXT: buffer_store_short v0, off, s[4:7], 0 +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) +; GISEL-CI-NEXT: v_max_f32_e32 v1, v2, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, 1.0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_min_f32_e32 v1, v1, v2 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.ext = fpext half %src2 to float @@ -308,3 +537,6 @@ declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) # attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" } attributes #1 = { nounwind readnone speculatable } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CI: {{.*}} +; VI: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll index 451cc98f7ada9..004f9abdee8dc 100644 --- a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll +++ b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll @@ -1,22 +1,27 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs -enable-misched=false < %s | FileCheck -check-prefixes=GFX906 %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -enable-misched=false < %s | FileCheck -check-prefixes=GFX900 %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -enable-misched=false < %s | FileCheck -check-prefixes=VI %s -; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs -enable-misched=false < %s | FileCheck -check-prefixes=CI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,SDAG-GFX900 %s +; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906,SDAG-GFX906 %s +; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,SDAG-VI %s +; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,SDAG-CI %s + +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,GISEL-GFX900 %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906,GISEL-GFX906 %s +; RUN: llc -global-isel -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,GISEL-VI %s +; RUN: llc -global-isel -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,GISEL-CI %s define half @mixlo_simple(float %src0, float %src1, float %src2) #0 { -; GFX906-LABEL: mixlo_simple: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 -; GFX906-NEXT: s_setpc_b64 s[30:31] -; ; GFX900-LABEL: mixlo_simple: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; +; GFX906-LABEL: mixlo_simple: +; GFX906: ; %bb.0: +; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 +; GFX906-NEXT: s_setpc_b64 s[30:31] +; ; VI-LABEL: mixlo_simple: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -24,31 +29,38 @@ define half @mixlo_simple(float %src0, float %src1, float %src2) #0 { ; VI-NEXT: v_cvt_f16_f32_e32 v0, v2 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: mixlo_simple: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mac_f32_e32 v2, v0, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: mixlo_simple: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: mixlo_simple: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %result = call float @llvm.fmuladd.f32(float %src0, float %src1, float %src2) %cvt.result = fptrunc float %result to half ret half %cvt.result } define half @v_mad_mixlo_f16_f16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 { -; GFX906-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f16lo: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] -; GFX906-NEXT: s_setpc_b64 s[30:31] -; ; GFX900-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f16lo: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] ; GFX900-NEXT: s_setpc_b64 s[30:31] ; +; GFX906-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f16lo: +; GFX906: ; %bb.0: +; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,1] +; GFX906-NEXT: s_setpc_b64 s[30:31] +; ; VI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f16lo: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -59,13 +71,23 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f16lo(half %src0, half %src1, half %src ; VI-NEXT: v_cvt_f16_f32_e32 v0, v2 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f16lo: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mac_f32_e32 v2, v0, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f16lo: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f16lo: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.ext = fpext half %src2 to float @@ -75,18 +97,18 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f16lo(half %src0, half %src1, half %src } define half @v_mad_mixlo_f16_f16lo_f16lo_f32(half %src0, half %src1, float %src2) #0 { -; GFX906-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] -; GFX906-NEXT: s_setpc_b64 s[30:31] -; ; GFX900-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] ; GFX900-NEXT: s_setpc_b64 s[30:31] ; +; GFX906-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32: +; GFX906: ; %bb.0: +; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] +; GFX906-NEXT: s_setpc_b64 s[30:31] +; ; VI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -96,13 +118,22 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f32(half %src0, half %src1, float %src2 ; VI-NEXT: v_cvt_f16_f32_e32 v0, v2 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mac_f32_e32 v2, v0, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2) @@ -111,18 +142,18 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f32(half %src0, half %src1, float %src2 } define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt(half %src0, half %src1, float %src2) #0 { -; GFX906-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp -; GFX906-NEXT: s_setpc_b64 s[30:31] -; ; GFX900-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_mad_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp ; GFX900-NEXT: s_setpc_b64 s[30:31] ; +; GFX906-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt: +; GFX906: ; %bb.0: +; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp +; GFX906-NEXT: s_setpc_b64 s[30:31] +; ; VI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -132,13 +163,30 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt(half %src0, half %sr ; VI-NEXT: v_cvt_f16_f32_e64 v0, v2 clamp ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mac_f32_e32 v2, v0, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v2 -; CI-NEXT: v_cvt_f32_f16_e64 v0, v0 clamp -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, v0 clamp +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, 0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_max_f32_e32 v0, v0, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, 1.0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_min_f32_e32 v0, v0, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2) @@ -149,13 +197,6 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_post_cvt(half %src0, half %sr } define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt(half %src0, half %src1, float %src2) #0 { -; GFX906-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp -; GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX906-NEXT: s_setpc_b64 s[30:31] -; ; GFX900-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -163,6 +204,13 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt(half %src0, half %src ; GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; +; GFX906-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt: +; GFX906: ; %bb.0: +; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] clamp +; GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX906-NEXT: s_setpc_b64 s[30:31] +; ; VI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -172,13 +220,22 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt(half %src0, half %src ; VI-NEXT: v_cvt_f16_f32_e32 v0, v0 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2) @@ -192,6 +249,14 @@ define half @v_mad_mixlo_f16_f16lo_f16lo_f32_clamp_pre_cvt(half %src0, half %src ; operation only clobbers relevant lane. define <2 x half> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { +; GFX900-LABEL: v_mad_mix_v2f32: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] +; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; GFX900-NEXT: v_mov_b32_e32 v0, v3 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; ; GFX906-LABEL: v_mad_mix_v2f32: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -200,52 +265,77 @@ define <2 x half> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x half ; GFX906-NEXT: v_mov_b32_e32 v0, v3 ; GFX906-NEXT: s_setpc_b64 s[30:31] ; -; GFX900-LABEL: v_mad_mix_v2f32: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] -; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] -; GFX900-NEXT: v_mov_b32_e32 v0, v3 -; GFX900-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mix_v2f32: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_mac_f32_e32 v5, v3, v4 +; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v1, v2 +; SDAG-VI-NEXT: v_or_b32_e32 v0, v1, v0 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mix_v2f32: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_sdwa v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_mac_f32_e32 v5, v3, v4 -; VI-NEXT: v_mac_f32_e32 v2, v0, v1 -; VI-NEXT: v_cvt_f16_f32_sdwa v0, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: v_cvt_f16_f32_e32 v1, v2 -; VI-NEXT: v_or_b32_e32 v0, v1, v0 -; VI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_v2f32: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_mac_f32_e32 v5, v1, v3 +; SDAG-CI-NEXT: v_mac_f32_e32 v4, v0, v2 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v4 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v5 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_v2f32: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f16_f32_e32 v5, v5 -; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v5, v5 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_mac_f32_e32 v4, v0, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v4 -; CI-NEXT: v_mac_f32_e32 v5, v1, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v5 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: s_setpc_b64 s[30:31] +; GISEL-VI-LABEL: v_mad_mix_v2f32: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v4, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v2 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_mac_f32_e32 v5, v3, v4 +; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v1, v5 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_v2f32: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GISEL-CI-NEXT: v_mac_f32_e32 v4, v0, v2 +; GISEL-CI-NEXT: v_mac_f32_e32 v5, v1, v3 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v4 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v5 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext <2 x half> %src0 to <2 x float> %src1.ext = fpext <2 x half> %src1 to <2 x float> %src2.ext = fpext <2 x half> %src2 to <2 x float> @@ -255,76 +345,138 @@ define <2 x half> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x half } define <3 x half> @v_mad_mix_v3f32(<3 x half> %src0, <3 x half> %src1, <3 x half> %src2) #0 { -; GFX906-LABEL: v_mad_mix_v3f32: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1] -; GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] -; GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] -; GFX906-NEXT: v_mov_b32_e32 v0, v3 -; GFX906-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX900-LABEL: v_mad_mix_v3f32: +; SDAG-GFX900: ; %bb.0: +; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1] +; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] +; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; SDAG-GFX900-NEXT: v_mov_b32_e32 v0, v3 +; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX900-LABEL: v_mad_mix_v3f32: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1] -; GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] -; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] -; GFX900-NEXT: v_mov_b32_e32 v0, v3 -; GFX900-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX906-LABEL: v_mad_mix_v3f32: +; SDAG-GFX906: ; %bb.0: +; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1] +; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] +; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; SDAG-GFX906-NEXT: v_mov_b32_e32 v0, v3 +; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mix_v3f32: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_sdwa v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_sdwa v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_cvt_f32_f16_sdwa v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v4, v4 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; VI-NEXT: v_cvt_f32_f16_e32 v5, v5 -; VI-NEXT: v_mac_f32_e32 v8, v6, v7 -; VI-NEXT: v_mac_f32_e32 v4, v0, v2 -; VI-NEXT: v_cvt_f16_f32_sdwa v0, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: v_mac_f32_e32 v5, v1, v3 -; VI-NEXT: v_cvt_f16_f32_e32 v2, v4 -; VI-NEXT: v_cvt_f16_f32_e32 v1, v5 -; VI-NEXT: v_or_b32_e32 v0, v2, v0 -; VI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mix_v3f32: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SDAG-VI-NEXT: v_mac_f32_e32 v8, v6, v7 +; SDAG-VI-NEXT: v_mac_f32_e32 v4, v0, v2 +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v2, v4 +; SDAG-VI-NEXT: v_mac_f32_e32 v5, v1, v3 +; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v1, v5 +; SDAG-VI-NEXT: v_or_b32_e32 v0, v2, v0 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mix_v3f32: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v6 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v7, v7 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v8, v8 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v6, v6 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v7, v7 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v8, v8 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_mac_f32_e32 v8, v2, v5 +; SDAG-CI-NEXT: v_mac_f32_e32 v7, v1, v4 +; SDAG-CI-NEXT: v_mac_f32_e32 v6, v0, v3 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v6 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v7 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v8 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX900-LABEL: v_mad_mix_v3f32: +; GISEL-GFX900: ; %bb.0: +; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] +; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1] +; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v6 +; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX906-LABEL: v_mad_mix_v3f32: +; GISEL-GFX906: ; %bb.0: +; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] +; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1] +; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v6 +; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_v3f32: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v6, v6 -; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f16_f32_e32 v7, v7 -; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v8, v8 -; CI-NEXT: v_cvt_f16_f32_e32 v5, v5 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v6, v6 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v7, v7 -; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v8, v8 -; CI-NEXT: v_cvt_f32_f16_e32 v5, v5 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_mac_f32_e32 v7, v1, v4 -; CI-NEXT: v_mac_f32_e32 v6, v0, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v6 -; CI-NEXT: v_mac_f32_e32 v8, v2, v5 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v7 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v8 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; GISEL-VI-LABEL: v_mad_mix_v3f32: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v6, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v7, v2 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v8, v4 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GISEL-VI-NEXT: v_mac_f32_e32 v4, v0, v2 +; GISEL-VI-NEXT: v_mac_f32_e32 v8, v6, v7 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v4 +; GISEL-VI-NEXT: v_mac_f32_e32 v5, v1, v3 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v1, v8 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v2, v5 +; GISEL-VI-NEXT: v_mov_b32_e32 v3, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GISEL-VI-NEXT: v_bfe_u32 v1, v2, 0, 16 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_v3f32: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v8, v8 +; GISEL-CI-NEXT: v_mac_f32_e32 v6, v0, v3 +; GISEL-CI-NEXT: v_mac_f32_e32 v7, v1, v4 +; GISEL-CI-NEXT: v_mac_f32_e32 v8, v2, v5 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v6 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v7 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v2, v8 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext <3 x half> %src0 to <3 x float> %src1.ext = fpext <3 x half> %src1 to <3 x float> %src2.ext = fpext <3 x half> %src2 to <3 x float> @@ -334,95 +486,172 @@ define <3 x half> @v_mad_mix_v3f32(<3 x half> %src0, <3 x half> %src1, <3 x half } define <4 x half> @v_mad_mix_v4f32(<4 x half> %src0, <4 x half> %src1, <4 x half> %src2) #0 { -; GFX906-LABEL: v_mad_mix_v4f32: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_fma_mixlo_f16 v6, v1, v3, v5 op_sel_hi:[1,1,1] -; GFX906-NEXT: v_fma_mixlo_f16 v7, v0, v2, v4 op_sel_hi:[1,1,1] -; GFX906-NEXT: v_fma_mixhi_f16 v7, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] -; GFX906-NEXT: v_fma_mixhi_f16 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] -; GFX906-NEXT: v_mov_b32_e32 v0, v7 -; GFX906-NEXT: v_mov_b32_e32 v1, v6 -; GFX906-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX900-LABEL: v_mad_mix_v4f32: +; SDAG-GFX900: ; %bb.0: +; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v6, v1, v3, v5 op_sel_hi:[1,1,1] +; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v7, v0, v2, v4 op_sel_hi:[1,1,1] +; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v7, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; SDAG-GFX900-NEXT: v_mov_b32_e32 v0, v7 +; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v6 +; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX900-LABEL: v_mad_mix_v4f32: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_mad_mixlo_f16 v6, v1, v3, v5 op_sel_hi:[1,1,1] -; GFX900-NEXT: v_mad_mixlo_f16 v7, v0, v2, v4 op_sel_hi:[1,1,1] -; GFX900-NEXT: v_mad_mixhi_f16 v7, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] -; GFX900-NEXT: v_mad_mixhi_f16 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] -; GFX900-NEXT: v_mov_b32_e32 v0, v7 -; GFX900-NEXT: v_mov_b32_e32 v1, v6 -; GFX900-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX906-LABEL: v_mad_mix_v4f32: +; SDAG-GFX906: ; %bb.0: +; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v6, v1, v3, v5 op_sel_hi:[1,1,1] +; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v7, v0, v2, v4 op_sel_hi:[1,1,1] +; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v7, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; SDAG-GFX906-NEXT: v_mov_b32_e32 v0, v7 +; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v6 +; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mix_v4f32: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_sdwa v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_sdwa v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_sdwa v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_sdwa v9, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_cvt_f32_f16_sdwa v10, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_sdwa v11, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v5, v5 -; VI-NEXT: v_cvt_f32_f16_e32 v4, v4 -; VI-NEXT: v_mac_f32_e32 v10, v6, v8 -; VI-NEXT: v_mac_f32_e32 v11, v7, v9 -; VI-NEXT: v_mac_f32_e32 v5, v1, v3 -; VI-NEXT: v_mac_f32_e32 v4, v0, v2 -; VI-NEXT: v_cvt_f16_f32_sdwa v1, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: v_cvt_f16_f32_sdwa v0, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: v_cvt_f16_f32_e32 v2, v5 -; VI-NEXT: v_cvt_f16_f32_e32 v3, v4 -; VI-NEXT: v_or_b32_e32 v1, v2, v1 -; VI-NEXT: v_or_b32_e32 v0, v3, v0 -; VI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mix_v4f32: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v9, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v10, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v11, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SDAG-VI-NEXT: v_mac_f32_e32 v10, v7, v9 +; SDAG-VI-NEXT: v_mac_f32_e32 v11, v6, v8 +; SDAG-VI-NEXT: v_mac_f32_e32 v4, v0, v2 +; SDAG-VI-NEXT: v_mac_f32_e32 v5, v1, v3 +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v1, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v2, v4 +; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v3, v5 +; SDAG-VI-NEXT: v_or_b32_e32 v0, v2, v0 +; SDAG-VI-NEXT: v_or_b32_e32 v1, v3, v1 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mix_v4f32: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v8, v8 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v9, v9 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v10, v10 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v6 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v11, v11 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v7, v7 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v8, v8 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v9, v9 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v10, v10 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v6, v6 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v11, v11 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v7, v7 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_mac_f32_e32 v11, v3, v7 +; SDAG-CI-NEXT: v_mac_f32_e32 v10, v2, v6 +; SDAG-CI-NEXT: v_mac_f32_e32 v9, v1, v5 +; SDAG-CI-NEXT: v_mac_f32_e32 v8, v0, v4 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v8 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v9 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v10 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v11 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_v4f32: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v8, v8 -; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f16_f32_e32 v9, v9 -; CI-NEXT: v_cvt_f16_f32_e32 v5, v5 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v10, v10 -; CI-NEXT: v_cvt_f16_f32_e32 v6, v6 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v11, v11 -; CI-NEXT: v_cvt_f16_f32_e32 v7, v7 -; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NEXT: v_cvt_f32_f16_e32 v8, v8 -; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v9, v9 -; CI-NEXT: v_cvt_f32_f16_e32 v5, v5 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v10, v10 -; CI-NEXT: v_cvt_f32_f16_e32 v6, v6 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v11, v11 -; CI-NEXT: v_cvt_f32_f16_e32 v7, v7 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NEXT: v_mac_f32_e32 v10, v2, v6 -; CI-NEXT: v_mac_f32_e32 v9, v1, v5 -; CI-NEXT: v_mac_f32_e32 v8, v0, v4 -; CI-NEXT: v_mac_f32_e32 v11, v3, v7 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v8 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v9 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v10 -; CI-NEXT: v_cvt_f16_f32_e32 v3, v11 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NEXT: s_setpc_b64 s[30:31] +; GISEL-GFX900-LABEL: v_mad_mix_v4f32: +; GISEL-GFX900: ; %bb.0: +; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] +; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1] +; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v6 +; GISEL-GFX900-NEXT: v_mov_b32_e32 v1, v7 +; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX906-LABEL: v_mad_mix_v4f32: +; GISEL-GFX906: ; %bb.0: +; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] +; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v7, v1, v3, v5 op_sel_hi:[1,1,1] +; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v7, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v6 +; GISEL-GFX906-NEXT: v_mov_b32_e32 v1, v7 +; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mix_v4f32: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v6, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v8, v2 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v10, v4 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v7, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v9, v3 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v11, v5 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_mac_f32_e32 v4, v0, v2 +; GISEL-VI-NEXT: v_mac_f32_e32 v10, v6, v8 +; GISEL-VI-NEXT: v_mac_f32_e32 v11, v7, v9 +; GISEL-VI-NEXT: v_mac_f32_e32 v5, v1, v3 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v1, v4 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v10 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v3, v5 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v2, v11 +; GISEL-VI-NEXT: v_mov_b32_e32 v4, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v1, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_v4f32: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v8, v8 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v9, v9 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v10, v10 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v11, v11 +; GISEL-CI-NEXT: v_mac_f32_e32 v8, v0, v4 +; GISEL-CI-NEXT: v_mac_f32_e32 v9, v1, v5 +; GISEL-CI-NEXT: v_mac_f32_e32 v10, v2, v6 +; GISEL-CI-NEXT: v_mac_f32_e32 v11, v3, v7 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v8 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v9 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v2, v10 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v3, v11 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext <4 x half> %src0 to <4 x float> %src1.ext = fpext <4 x half> %src1 to <4 x float> %src2.ext = fpext <4 x half> %src2 to <4 x float> @@ -434,6 +663,14 @@ define <4 x half> @v_mad_mix_v4f32(<4 x half> %src0, <4 x half> %src1, <4 x half ; FIXME (DAG): Fold clamp define <2 x half> @v_mad_mix_v2f32_clamp_postcvt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { +; GFX900-LABEL: v_mad_mix_v2f32_clamp_postcvt: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp +; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; GFX900-NEXT: v_mov_b32_e32 v0, v3 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; ; GFX906-LABEL: v_mad_mix_v2f32_clamp_postcvt: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -442,52 +679,91 @@ define <2 x half> @v_mad_mix_v2f32_clamp_postcvt(<2 x half> %src0, <2 x half> %s ; GFX906-NEXT: v_mov_b32_e32 v0, v3 ; GFX906-NEXT: s_setpc_b64 s[30:31] ; -; GFX900-LABEL: v_mad_mix_v2f32_clamp_postcvt: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp -; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp -; GFX900-NEXT: v_mov_b32_e32 v0, v3 -; GFX900-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mix_v2f32_clamp_postcvt: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_mac_f32_e32 v5, v3, v4 +; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v5 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: v_cvt_f16_f32_e64 v1, v2 clamp +; SDAG-VI-NEXT: v_or_b32_e32 v0, v1, v0 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mix_v2f32_clamp_postcvt: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_sdwa v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_mac_f32_e32 v5, v3, v4 -; VI-NEXT: v_mac_f32_e32 v2, v0, v1 -; VI-NEXT: v_cvt_f16_f32_sdwa v0, v5 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: v_cvt_f16_f32_e64 v1, v2 clamp -; VI-NEXT: v_or_b32_e32 v0, v1, v0 -; VI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_v2f32_clamp_postcvt: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: v_mac_f32_e32 v4, v0, v2 +; SDAG-CI-NEXT: v_mac_f32_e32 v5, v1, v3 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v4 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v5 +; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, v0 clamp +; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v1, v1 clamp +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_v2f32_clamp_postcvt: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v5, v5 -; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v5, v5 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_mac_f32_e32 v5, v1, v3 -; CI-NEXT: v_mac_f32_e32 v4, v0, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v5 -; CI-NEXT: v_cvt_f32_f16_e64 v1, v0 clamp -; CI-NEXT: v_cvt_f16_f32_e32 v0, v4 -; CI-NEXT: v_cvt_f32_f16_e64 v0, v0 clamp -; CI-NEXT: s_setpc_b64 s[30:31] +; GISEL-VI-LABEL: v_mad_mix_v2f32_clamp_postcvt: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v4, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v2 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_mac_f32_e32 v5, v3, v4 +; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v0, v2 clamp +; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v1, v5 clamp +; GISEL-VI-NEXT: v_mov_b32_e32 v2, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_v2f32_clamp_postcvt: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GISEL-CI-NEXT: v_mac_f32_e32 v4, v0, v2 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v4 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, 0 +; GISEL-CI-NEXT: v_mac_f32_e32 v5, v1, v3 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v5 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_max_f32_e32 v0, v0, v2 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-CI-NEXT: v_max_f32_e32 v1, v1, v2 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, 1.0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_min_f32_e32 v0, v0, v2 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-CI-NEXT: v_min_f32_e32 v1, v1, v2 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext <2 x half> %src0 to <2 x float> %src1.ext = fpext <2 x half> %src1 to <2 x float> %src2.ext = fpext <2 x half> %src2 to <2 x float> @@ -499,82 +775,167 @@ define <2 x half> @v_mad_mix_v2f32_clamp_postcvt(<2 x half> %src0, <2 x half> %s } ; FIXME (DAG): Should be packed into 2 registers per argument? +; FIXME (GIsel): V_PK_MAX clamp could be folded into mixlo define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %src1, <3 x half> %src2) #0 { -; GFX906-LABEL: v_mad_mix_v3f32_clamp_postcvt: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1] -; GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp -; GFX906-NEXT: v_pack_b32_f16 v1, v1, 0 -; GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp -; GFX906-NEXT: v_pk_max_f16 v1, v1, v1 clamp -; GFX906-NEXT: v_mov_b32_e32 v0, v3 -; GFX906-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX900-LABEL: v_mad_mix_v3f32_clamp_postcvt: +; SDAG-GFX900: ; %bb.0: +; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1] +; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp +; SDAG-GFX900-NEXT: v_pack_b32_f16 v1, v1, 0 +; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; SDAG-GFX900-NEXT: v_pk_max_f16 v1, v1, v1 clamp +; SDAG-GFX900-NEXT: v_mov_b32_e32 v0, v3 +; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX900-LABEL: v_mad_mix_v3f32_clamp_postcvt: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1] -; GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp -; GFX900-NEXT: v_pack_b32_f16 v1, v1, 0 -; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp -; GFX900-NEXT: v_pk_max_f16 v1, v1, v1 clamp -; GFX900-NEXT: v_mov_b32_e32 v0, v3 -; GFX900-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX906-LABEL: v_mad_mix_v3f32_clamp_postcvt: +; SDAG-GFX906: ; %bb.0: +; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1] +; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp +; SDAG-GFX906-NEXT: v_pack_b32_f16 v1, v1, 0 +; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; SDAG-GFX906-NEXT: v_pk_max_f16 v1, v1, v1 clamp +; SDAG-GFX906-NEXT: v_mov_b32_e32 v0, v3 +; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mix_v3f32_clamp_postcvt: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_sdwa v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_sdwa v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_cvt_f32_f16_sdwa v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v4, v4 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; VI-NEXT: v_cvt_f32_f16_e32 v5, v5 -; VI-NEXT: v_mac_f32_e32 v8, v6, v7 -; VI-NEXT: v_mac_f32_e32 v4, v0, v2 -; VI-NEXT: v_cvt_f16_f32_sdwa v0, v8 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: v_mac_f32_e32 v5, v1, v3 -; VI-NEXT: v_cvt_f16_f32_e64 v2, v4 clamp -; VI-NEXT: v_cvt_f16_f32_e64 v1, v5 clamp -; VI-NEXT: v_or_b32_e32 v0, v2, v0 -; VI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mix_v3f32_clamp_postcvt: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SDAG-VI-NEXT: v_mac_f32_e32 v8, v6, v7 +; SDAG-VI-NEXT: v_mac_f32_e32 v4, v0, v2 +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v8 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: v_cvt_f16_f32_e64 v2, v4 clamp +; SDAG-VI-NEXT: v_mac_f32_e32 v5, v1, v3 +; SDAG-VI-NEXT: v_cvt_f16_f32_e64 v1, v5 clamp +; SDAG-VI-NEXT: v_or_b32_e32 v0, v2, v0 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mix_v3f32_clamp_postcvt: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v8, v8 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v7, v7 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v6 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v8, v8 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v7, v7 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v6, v6 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: v_mac_f32_e32 v6, v0, v3 +; SDAG-CI-NEXT: v_mac_f32_e32 v7, v1, v4 +; SDAG-CI-NEXT: v_mac_f32_e32 v8, v2, v5 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v6 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v7 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v8 +; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, v0 clamp +; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v1, v1 clamp +; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v2, v2 clamp +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX900-LABEL: v_mad_mix_v3f32_clamp_postcvt: +; GISEL-GFX900: ; %bb.0: +; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1] +; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp +; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; GISEL-GFX900-NEXT: v_pk_max_f16 v1, v1, v1 clamp +; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v3 +; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_v3f32_clamp_postcvt: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v8, v8 -; CI-NEXT: v_cvt_f16_f32_e32 v5, v5 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v6, v6 -; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v8, v8 -; CI-NEXT: v_cvt_f32_f16_e32 v5, v5 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v7, v7 -; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v6, v6 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v7, v7 -; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_mac_f32_e32 v8, v2, v5 -; CI-NEXT: v_mac_f32_e32 v6, v0, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v8 -; CI-NEXT: v_mac_f32_e32 v7, v1, v4 -; CI-NEXT: v_cvt_f32_f16_e64 v2, v0 clamp -; CI-NEXT: v_cvt_f16_f32_e32 v0, v7 -; CI-NEXT: v_cvt_f32_f16_e64 v1, v0 clamp -; CI-NEXT: v_cvt_f16_f32_e32 v0, v6 -; CI-NEXT: v_cvt_f32_f16_e64 v0, v0 clamp -; CI-NEXT: s_setpc_b64 s[30:31] +; GISEL-GFX906-LABEL: v_mad_mix_v3f32_clamp_postcvt: +; GISEL-GFX906: ; %bb.0: +; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v1, v1, v3, v5 op_sel_hi:[1,1,1] +; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v2, v4 op_sel_hi:[1,1,1] clamp +; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; GISEL-GFX906-NEXT: v_pk_max_f16 v1, v1, v1 clamp +; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v3 +; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mix_v3f32_clamp_postcvt: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v6, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v7, v2 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v8, v4 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GISEL-VI-NEXT: v_mac_f32_e32 v4, v0, v2 +; GISEL-VI-NEXT: v_mac_f32_e32 v8, v6, v7 +; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v0, v4 clamp +; GISEL-VI-NEXT: v_mac_f32_e32 v5, v1, v3 +; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v1, v8 clamp +; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v2, v5 clamp +; GISEL-VI-NEXT: v_mov_b32_e32 v3, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GISEL-VI-NEXT: v_bfe_u32 v1, v2, 0, 16 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_v3f32_clamp_postcvt: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v8, v8 +; GISEL-CI-NEXT: v_mac_f32_e32 v6, v0, v3 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v6 +; GISEL-CI-NEXT: v_mac_f32_e32 v7, v1, v4 +; GISEL-CI-NEXT: v_mac_f32_e32 v8, v2, v5 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v7 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v3, v8 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, 0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-CI-NEXT: v_max_f32_e32 v0, v0, v2 +; GISEL-CI-NEXT: v_max_f32_e32 v1, v1, v2 +; GISEL-CI-NEXT: v_max_f32_e32 v2, v3, v2 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, 1.0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_min_f32_e32 v0, v0, v3 +; GISEL-CI-NEXT: v_min_f32_e32 v1, v1, v3 +; GISEL-CI-NEXT: v_min_f32_e32 v2, v2, v3 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext <3 x half> %src0 to <3 x float> %src1.ext = fpext <3 x half> %src1 to <3 x float> %src2.ext = fpext <3 x half> %src2 to <3 x float> @@ -586,6 +947,17 @@ define <3 x half> @v_mad_mix_v3f32_clamp_postcvt(<3 x half> %src0, <3 x half> %s } define <4 x half> @v_mad_mix_v4f32_clamp_postcvt(<4 x half> %src0, <4 x half> %src1, <4 x half> %src2) #0 { +; GFX900-LABEL: v_mad_mix_v4f32_clamp_postcvt: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp +; GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; GFX900-NEXT: v_mad_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp +; GFX900-NEXT: v_mad_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; GFX900-NEXT: v_mov_b32_e32 v0, v6 +; GFX900-NEXT: v_mov_b32_e32 v1, v2 +; GFX900-NEXT: s_setpc_b64 s[30:31] +; ; GFX906-LABEL: v_mad_mix_v4f32_clamp_postcvt: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -597,84 +969,154 @@ define <4 x half> @v_mad_mix_v4f32_clamp_postcvt(<4 x half> %src0, <4 x half> %s ; GFX906-NEXT: v_mov_b32_e32 v1, v2 ; GFX906-NEXT: s_setpc_b64 s[30:31] ; -; GFX900-LABEL: v_mad_mix_v4f32_clamp_postcvt: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_mad_mixlo_f16 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp -; GFX900-NEXT: v_mad_mixhi_f16 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp -; GFX900-NEXT: v_mad_mixlo_f16 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp -; GFX900-NEXT: v_mad_mixhi_f16 v2, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp -; GFX900-NEXT: v_mov_b32_e32 v0, v6 -; GFX900-NEXT: v_mov_b32_e32 v1, v2 -; GFX900-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mix_v4f32_clamp_postcvt: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v9, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v10, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v11, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SDAG-VI-NEXT: v_mac_f32_e32 v10, v7, v9 +; SDAG-VI-NEXT: v_mac_f32_e32 v11, v6, v8 +; SDAG-VI-NEXT: v_mac_f32_e32 v5, v1, v3 +; SDAG-VI-NEXT: v_mac_f32_e32 v4, v0, v2 +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v11 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v1, v10 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: v_cvt_f16_f32_e64 v2, v4 clamp +; SDAG-VI-NEXT: v_cvt_f16_f32_e64 v3, v5 clamp +; SDAG-VI-NEXT: v_or_b32_e32 v0, v2, v0 +; SDAG-VI-NEXT: v_or_b32_e32 v1, v3, v1 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mix_v4f32_clamp_postcvt: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_sdwa v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_sdwa v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_sdwa v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_sdwa v9, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; VI-NEXT: v_cvt_f32_f16_sdwa v10, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_sdwa v11, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v4, v4 -; VI-NEXT: v_cvt_f32_f16_e32 v5, v5 -; VI-NEXT: v_mac_f32_e32 v10, v6, v8 -; VI-NEXT: v_mac_f32_e32 v11, v7, v9 -; VI-NEXT: v_mac_f32_e32 v4, v0, v2 -; VI-NEXT: v_mac_f32_e32 v5, v1, v3 -; VI-NEXT: v_cvt_f16_f32_sdwa v0, v10 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: v_cvt_f16_f32_sdwa v1, v11 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: v_cvt_f16_f32_e64 v2, v4 clamp -; VI-NEXT: v_cvt_f16_f32_e64 v3, v5 clamp -; VI-NEXT: v_or_b32_e32 v0, v2, v0 -; VI-NEXT: v_or_b32_e32 v1, v3, v1 -; VI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_v4f32_clamp_postcvt: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v11, v11 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v7, v7 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v10, v10 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v6 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v9, v9 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v8, v8 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v11, v11 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v7, v7 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v10, v10 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v6, v6 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v9, v9 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v8, v8 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-CI-NEXT: v_mac_f32_e32 v8, v0, v4 +; SDAG-CI-NEXT: v_mac_f32_e32 v9, v1, v5 +; SDAG-CI-NEXT: v_mac_f32_e32 v10, v2, v6 +; SDAG-CI-NEXT: v_mac_f32_e32 v11, v3, v7 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v8 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v9 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v10 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v11 +; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, v0 clamp +; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v1, v1 clamp +; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v2, v2 clamp +; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v3, v3 clamp +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mix_v4f32_clamp_postcvt: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v6, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v8, v2 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v10, v4 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v7, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v9, v3 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v11, v5 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_mac_f32_e32 v4, v0, v2 +; GISEL-VI-NEXT: v_mac_f32_e32 v10, v6, v8 +; GISEL-VI-NEXT: v_mac_f32_e32 v11, v7, v9 +; GISEL-VI-NEXT: v_mac_f32_e32 v5, v1, v3 +; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v1, v4 clamp +; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v0, v10 clamp +; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v3, v5 clamp +; GISEL-VI-NEXT: v_cvt_f16_f32_e64 v2, v11 clamp +; GISEL-VI-NEXT: v_mov_b32_e32 v4, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v1, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_v4f32_clamp_postcvt: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v11, v11 -; CI-NEXT: v_cvt_f16_f32_e32 v7, v7 -; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v8, v8 -; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v11, v11 -; CI-NEXT: v_cvt_f32_f16_e32 v7, v7 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v10, v10 -; CI-NEXT: v_cvt_f16_f32_e32 v6, v6 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v8, v8 -; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v10, v10 -; CI-NEXT: v_cvt_f32_f16_e32 v6, v6 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v9, v9 -; CI-NEXT: v_cvt_f16_f32_e32 v5, v5 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_mac_f32_e32 v11, v3, v7 -; CI-NEXT: v_mac_f32_e32 v8, v0, v4 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v11 -; CI-NEXT: v_cvt_f32_f16_e32 v9, v9 -; CI-NEXT: v_cvt_f32_f16_e32 v5, v5 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_mac_f32_e32 v10, v2, v6 -; CI-NEXT: v_cvt_f32_f16_e64 v3, v0 clamp -; CI-NEXT: v_cvt_f16_f32_e32 v0, v10 -; CI-NEXT: v_mac_f32_e32 v9, v1, v5 -; CI-NEXT: v_cvt_f32_f16_e64 v2, v0 clamp -; CI-NEXT: v_cvt_f16_f32_e32 v0, v9 -; CI-NEXT: v_cvt_f32_f16_e64 v1, v0 clamp -; CI-NEXT: v_cvt_f16_f32_e32 v0, v8 -; CI-NEXT: v_cvt_f32_f16_e64 v0, v0 clamp -; CI-NEXT: s_setpc_b64 s[30:31] +; GISEL-CI-LABEL: v_mad_mix_v4f32_clamp_postcvt: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v8, v8 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v9, v9 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v10, v10 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v11, v11 +; GISEL-CI-NEXT: v_mac_f32_e32 v8, v0, v4 +; GISEL-CI-NEXT: v_mac_f32_e32 v9, v1, v5 +; GISEL-CI-NEXT: v_mac_f32_e32 v10, v2, v6 +; GISEL-CI-NEXT: v_mac_f32_e32 v11, v3, v7 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v8 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v9 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v3, v10 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v4, v11 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, 0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GISEL-CI-NEXT: v_max_f32_e32 v0, v0, v2 +; GISEL-CI-NEXT: v_max_f32_e32 v1, v1, v2 +; GISEL-CI-NEXT: v_max_f32_e32 v3, v3, v2 +; GISEL-CI-NEXT: v_max_f32_e32 v2, v4, v2 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, 1.0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v2 +; GISEL-CI-NEXT: v_min_f32_e32 v0, v0, v5 +; GISEL-CI-NEXT: v_min_f32_e32 v1, v1, v5 +; GISEL-CI-NEXT: v_min_f32_e32 v2, v3, v5 +; GISEL-CI-NEXT: v_min_f32_e32 v3, v4, v5 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext <4 x half> %src0 to <4 x float> %src1.ext = fpext <4 x half> %src1 to <4 x float> %src2.ext = fpext <4 x half> %src2 to <4 x float> @@ -685,61 +1127,140 @@ define <4 x half> @v_mad_mix_v4f32_clamp_postcvt(<4 x half> %src0, <4 x half> %s ret <4 x half> %clamp } +; FIXME (GISel): Packed Vectors handling isn't great for now, so we don't end up with +; a build_vector to select the mixhi. Issue is more specifically with how insert_vector_elt is being +; legalized (bitwise ops instead of shuffle/build_vector for instance). define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_lo(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { -; GFX906-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp -; GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] -; GFX906-NEXT: v_mov_b32_e32 v0, v3 -; GFX906-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX900-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo: +; SDAG-GFX900: ; %bb.0: +; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp +; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; SDAG-GFX900-NEXT: v_mov_b32_e32 v0, v3 +; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX900-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp -; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] -; GFX900-NEXT: v_mov_b32_e32 v0, v3 -; GFX900-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX906-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo: +; SDAG-GFX906: ; %bb.0: +; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp +; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; SDAG-GFX906-NEXT: v_mov_b32_e32 v0, v3 +; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_sdwa v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_mac_f32_e32 v5, v3, v4 -; VI-NEXT: v_mac_f32_e32 v2, v0, v1 -; VI-NEXT: v_cvt_f16_f32_sdwa v0, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: v_cvt_f16_f32_e64 v1, v2 clamp -; VI-NEXT: v_or_b32_e32 v0, v1, v0 -; VI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_mac_f32_e32 v5, v3, v4 +; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: v_cvt_f16_f32_e64 v1, v2 clamp +; SDAG-VI-NEXT: v_or_b32_e32 v0, v1, v0 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: v_mac_f32_e32 v4, v0, v2 +; SDAG-CI-NEXT: v_mac_f32_e32 v5, v1, v3 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v4 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v5 +; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, v0 clamp +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX900-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo: +; GISEL-GFX900: ; %bb.0: +; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] +; GISEL-GFX900-NEXT: v_mov_b32_e32 v4, v3 +; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v4, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; GISEL-GFX900-NEXT: v_max_f16_e64 v0, v3, v3 clamp +; GISEL-GFX900-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GISEL-GFX900-NEXT: v_mov_b32_e32 v1, 0xffff0000 +; GISEL-GFX900-NEXT: v_and_or_b32 v0, v4, v1, v0 +; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v5, v5 -; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v5, v5 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_mac_f32_e32 v5, v1, v3 -; CI-NEXT: v_mac_f32_e32 v4, v0, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v5 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v0 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v4 -; CI-NEXT: v_cvt_f32_f16_e64 v0, v0 clamp -; CI-NEXT: s_setpc_b64 s[30:31] +; GISEL-GFX906-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo: +; GISEL-GFX906: ; %bb.0: +; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] +; GISEL-GFX906-NEXT: v_mov_b32_e32 v4, v3 +; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v4, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; GISEL-GFX906-NEXT: v_max_f16_e64 v0, v3, v3 clamp +; GISEL-GFX906-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GISEL-GFX906-NEXT: v_mov_b32_e32 v1, 0xffff0000 +; GISEL-GFX906-NEXT: v_and_or_b32 v0, v4, v1, v0 +; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v4, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v2 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_mac_f32_e32 v5, v3, v4 +; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v1, v5 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GISEL-VI-NEXT: v_max_f16_e64 v1, v0, v0 clamp +; GISEL-VI-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GISEL-VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GISEL-CI-NEXT: v_mac_f32_e32 v5, v1, v3 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v5 +; GISEL-CI-NEXT: v_mac_f32_e32 v4, v0, v2 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v4 +; GISEL-CI-NEXT: v_bfe_u32 v1, v1, 0, 16 +; GISEL-CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, 0 +; GISEL-CI-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GISEL-CI-NEXT: v_or_b32_e32 v0, v0, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v0 +; GISEL-CI-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 +; GISEL-CI-NEXT: v_max_f32_e32 v1, v1, v2 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, 1.0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_min_f32_e32 v1, v1, v2 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-CI-NEXT: v_bfe_u32 v1, v1, 0, 16 +; GISEL-CI-NEXT: v_or_b32_e32 v0, v0, v1 +; GISEL-CI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext <2 x half> %src0 to <2 x float> %src1.ext = fpext <2 x half> %src1 to <2 x float> %src2.ext = fpext <2 x half> %src2 to <2 x float> @@ -753,60 +1274,139 @@ define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_lo(<2 x half> %src0, <2 x half> } define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_hi(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { -; GFX906-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] -; GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp -; GFX906-NEXT: v_mov_b32_e32 v0, v3 -; GFX906-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX900-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi: +; SDAG-GFX900: ; %bb.0: +; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] +; SDAG-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; SDAG-GFX900-NEXT: v_mov_b32_e32 v0, v3 +; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX900-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] -; GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp -; GFX900-NEXT: v_mov_b32_e32 v0, v3 -; GFX900-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX906-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi: +; SDAG-GFX906: ; %bb.0: +; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] +; SDAG-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; SDAG-GFX906-NEXT: v_mov_b32_e32 v0, v3 +; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_sdwa v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_mac_f32_e32 v5, v3, v4 -; VI-NEXT: v_mac_f32_e32 v2, v0, v1 -; VI-NEXT: v_cvt_f16_f32_sdwa v0, v5 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: v_cvt_f16_f32_e32 v1, v2 -; VI-NEXT: v_or_b32_e32 v0, v1, v0 -; VI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_mac_f32_e32 v5, v3, v4 +; SDAG-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v0, v5 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v1, v2 +; SDAG-VI-NEXT: v_or_b32_e32 v0, v1, v0 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_mac_f32_e32 v5, v1, v3 +; SDAG-CI-NEXT: v_mac_f32_e32 v4, v0, v2 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v4 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v5 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v1, v1 clamp +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX900-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi: +; GISEL-GFX900: ; %bb.0: +; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] +; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v4, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, 16 +; GISEL-GFX900-NEXT: v_lshlrev_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-GFX900-NEXT: v_mov_b32_e32 v1, 0xffff +; GISEL-GFX900-NEXT: v_and_or_b32 v0, v3, v1, v0 +; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX906-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi: +; GISEL-GFX906: ; %bb.0: +; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] +; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v4, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, 16 +; GISEL-GFX906-NEXT: v_lshlrev_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-GFX906-NEXT: v_mov_b32_e32 v1, 0xffff +; GISEL-GFX906-NEXT: v_and_or_b32 v0, v3, v1, v0 +; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f16_f32_e32 v5, v5 -; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v5, v5 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_mac_f32_e32 v4, v0, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v4 -; CI-NEXT: v_mac_f32_e32 v5, v1, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v5 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e64 v1, v1 clamp -; CI-NEXT: s_setpc_b64 s[30:31] +; GISEL-VI-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v4, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v2 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_mac_f32_e32 v5, v3, v4 +; GISEL-VI-NEXT: v_mac_f32_e32 v2, v0, v1 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v2 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v1, v5 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GISEL-VI-NEXT: v_max_f16_sdwa v1, v0, v0 clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_v2f32_clamp_postcvt_hi: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GISEL-CI-NEXT: v_mac_f32_e32 v5, v1, v3 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v5 +; GISEL-CI-NEXT: v_mac_f32_e32 v4, v0, v2 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v4 +; GISEL-CI-NEXT: v_bfe_u32 v1, v1, 0, 16 +; GISEL-CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, 0 +; GISEL-CI-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GISEL-CI-NEXT: v_or_b32_e32 v0, v0, v1 +; GISEL-CI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GISEL-CI-NEXT: v_max_f32_e32 v1, v1, v2 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, 1.0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_min_f32_e32 v1, v1, v2 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-CI-NEXT: v_bfe_u32 v1, v1, 0, 16 +; GISEL-CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GISEL-CI-NEXT: v_or_b32_e32 v0, v0, v1 +; GISEL-CI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext <2 x half> %src0 to <2 x float> %src1.ext = fpext <2 x half> %src1 to <2 x float> %src2.ext = fpext <2 x half> %src2 to <2 x float> @@ -822,64 +1422,117 @@ define <2 x half> @v_mad_mix_v2f32_clamp_postcvt_hi(<2 x half> %src0, <2 x half> ; FIXME (DAG): Should be able to use mixlo/mixhi define <2 x half> @v_mad_mix_v2f32_clamp_precvt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { -; GFX906-LABEL: v_mad_mix_v2f32_clamp_precvt: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp -; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp -; GFX906-NEXT: v_cvt_f16_f32_e32 v1, v3 -; GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX906-NEXT: v_pack_b32_f16 v0, v0, v1 -; GFX906-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX900-LABEL: v_mad_mix_v2f32_clamp_precvt: +; SDAG-GFX900: ; %bb.0: +; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX900-NEXT: v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp +; SDAG-GFX900-NEXT: v_cvt_f16_f32_e32 v1, v3 +; SDAG-GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-GFX900-NEXT: v_pack_b32_f16 v0, v0, v1 +; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX900-LABEL: v_mad_mix_v2f32_clamp_precvt: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp -; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp -; GFX900-NEXT: v_cvt_f16_f32_e32 v1, v3 -; GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX900-NEXT: v_pack_b32_f16 v0, v0, v1 -; GFX900-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX906-LABEL: v_mad_mix_v2f32_clamp_precvt: +; SDAG-GFX906: ; %bb.0: +; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp +; SDAG-GFX906-NEXT: v_cvt_f16_f32_e32 v1, v3 +; SDAG-GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-GFX906-NEXT: v_pack_b32_f16 v0, v0, v1 +; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mix_v2f32_clamp_precvt: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_sdwa v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_mad_f32 v3, v3, v4, v5 clamp -; VI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp -; VI-NEXT: v_cvt_f16_f32_sdwa v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; VI-NEXT: v_or_b32_e32 v0, v0, v1 -; VI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mix_v2f32_clamp_precvt: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_mad_f32 v3, v3, v4, v5 clamp +; SDAG-VI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-VI-NEXT: v_or_b32_e32 v0, v0, v1 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mix_v2f32_clamp_precvt: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: v_mad_f32 v0, v0, v2, v4 clamp +; SDAG-CI-NEXT: v_mad_f32 v1, v1, v3, v5 clamp +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_v2f32_clamp_precvt: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v5, v5 -; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v5, v5 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_mad_f32 v1, v1, v3, v5 clamp -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_mad_f32 v0, v0, v2, v4 clamp -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: s_setpc_b64 s[30:31] +; GISEL-GFX900-LABEL: v_mad_mix_v2f32_clamp_precvt: +; GISEL-GFX900: ; %bb.0: +; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX900-NEXT: v_mad_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp +; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; GISEL-GFX900-NEXT: v_cvt_f16_f32_e32 v1, v3 +; GISEL-GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-GFX900-NEXT: v_pack_b32_f16 v0, v1, v0 +; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX906-LABEL: v_mad_mix_v2f32_clamp_precvt: +; GISEL-GFX906: ; %bb.0: +; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1] clamp +; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; GISEL-GFX906-NEXT: v_cvt_f16_f32_e32 v1, v3 +; GISEL-GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-GFX906-NEXT: v_pack_b32_f16 v0, v1, v0 +; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mix_v2f32_clamp_precvt: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v4, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v2 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_mad_f32 v3, v3, v4, v5 clamp +; GISEL-VI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v1, v3 +; GISEL-VI-NEXT: v_mov_b32_e32 v2, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_v2f32_clamp_precvt: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GISEL-CI-NEXT: v_mad_f32 v0, v0, v2, v4 clamp +; GISEL-CI-NEXT: v_mad_f32 v1, v1, v3, v5 clamp +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext <2 x half> %src0 to <2 x float> %src1.ext = fpext <2 x half> %src1 to <2 x float> %src2.ext = fpext <2 x half> %src2 to <2 x float> @@ -893,82 +1546,150 @@ define <2 x half> @v_mad_mix_v2f32_clamp_precvt(<2 x half> %src0, <2 x half> %sr ; FIXME (DAG): Handling undef 4th component define <3 x half> @v_mad_mix_v3f32_clamp_precvt(<3 x half> %src0, <3 x half> %src1, <3 x half> %src2) #0 { -; GFX906-LABEL: v_mad_mix_v3f32_clamp_precvt: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_fma_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp -; GFX906-NEXT: v_fma_mix_f32 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp -; GFX906-NEXT: v_fma_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp -; GFX906-NEXT: v_cvt_f16_f32_e32 v2, v3 -; GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX906-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX906-NEXT: v_pack_b32_f16 v0, v0, v2 -; GFX906-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX900-LABEL: v_mad_mix_v3f32_clamp_precvt: +; SDAG-GFX900: ; %bb.0: +; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX900-NEXT: v_mad_mix_f32 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp +; SDAG-GFX900-NEXT: v_cvt_f16_f32_e32 v2, v6 +; SDAG-GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-GFX900-NEXT: v_mad_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp +; SDAG-GFX900-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-GFX900-NEXT: v_pack_b32_f16 v0, v0, v2 +; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX900-LABEL: v_mad_mix_v3f32_clamp_precvt: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_mad_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp -; GFX900-NEXT: v_mad_mix_f32 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp -; GFX900-NEXT: v_mad_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp -; GFX900-NEXT: v_cvt_f16_f32_e32 v2, v3 -; GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX900-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX900-NEXT: v_pack_b32_f16 v0, v0, v2 -; GFX900-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX906-LABEL: v_mad_mix_v3f32_clamp_precvt: +; SDAG-GFX906: ; %bb.0: +; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX906-NEXT: v_fma_mix_f32 v6, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp +; SDAG-GFX906-NEXT: v_cvt_f16_f32_e32 v2, v6 +; SDAG-GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-GFX906-NEXT: v_fma_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp +; SDAG-GFX906-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-GFX906-NEXT: v_pack_b32_f16 v0, v0, v2 +; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mix_v3f32_clamp_precvt: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_sdwa v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_sdwa v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_cvt_f32_f16_sdwa v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v4, v4 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; VI-NEXT: v_cvt_f32_f16_e32 v5, v5 -; VI-NEXT: v_mad_f32 v6, v6, v7, v8 clamp -; VI-NEXT: v_mad_f32 v0, v0, v2, v4 clamp -; VI-NEXT: v_cvt_f16_f32_sdwa v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: v_mad_f32 v1, v1, v3, v5 clamp -; VI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; VI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; VI-NEXT: v_or_b32_e32 v0, v0, v2 -; VI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mix_v3f32_clamp_precvt: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SDAG-VI-NEXT: v_mad_f32 v6, v6, v7, v8 clamp +; SDAG-VI-NEXT: v_mad_f32 v0, v0, v2, v4 clamp +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-VI-NEXT: v_mad_f32 v1, v1, v3, v5 clamp +; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-VI-NEXT: v_or_b32_e32 v0, v0, v2 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mix_v3f32_clamp_precvt: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v8, v8 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v7, v7 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v6 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v8, v8 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v7, v7 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v6, v6 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: v_mad_f32 v0, v0, v3, v6 clamp +; SDAG-CI-NEXT: v_mad_f32 v1, v1, v4, v7 clamp +; SDAG-CI-NEXT: v_mad_f32 v2, v2, v5, v8 clamp +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_v3f32_clamp_precvt: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v8, v8 -; CI-NEXT: v_cvt_f16_f32_e32 v5, v5 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v7, v7 -; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v6, v6 -; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v8, v8 -; CI-NEXT: v_cvt_f32_f16_e32 v5, v5 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v7, v7 -; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v6, v6 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_mad_f32 v1, v1, v4, v7 clamp -; CI-NEXT: v_mad_f32 v2, v2, v5, v8 clamp -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_mad_f32 v0, v0, v3, v6 clamp -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; GISEL-GFX900-LABEL: v_mad_mix_v3f32_clamp_precvt: +; GISEL-GFX900: ; %bb.0: +; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX900-NEXT: v_mad_mix_f32 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp +; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; GISEL-GFX900-NEXT: v_cvt_f16_f32_e32 v2, v6 +; GISEL-GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp +; GISEL-GFX900-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-GFX900-NEXT: v_pack_b32_f16 v0, v2, v0 +; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX906-LABEL: v_mad_mix_v3f32_clamp_precvt: +; GISEL-GFX906: ; %bb.0: +; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX906-NEXT: v_fma_mix_f32 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp +; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; GISEL-GFX906-NEXT: v_cvt_f16_f32_e32 v2, v6 +; GISEL-GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp +; GISEL-GFX906-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-GFX906-NEXT: v_pack_b32_f16 v0, v2, v0 +; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mix_v3f32_clamp_precvt: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v6, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v7, v2 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v8, v4 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GISEL-VI-NEXT: v_mad_f32 v0, v0, v2, v4 clamp +; GISEL-VI-NEXT: v_mad_f32 v6, v6, v7, v8 clamp +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-VI-NEXT: v_mad_f32 v1, v1, v3, v5 clamp +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v2, v6 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-VI-NEXT: v_mov_b32_e32 v3, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GISEL-VI-NEXT: v_bfe_u32 v1, v1, 0, 16 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_v3f32_clamp_precvt: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v8, v8 +; GISEL-CI-NEXT: v_mad_f32 v0, v0, v3, v6 clamp +; GISEL-CI-NEXT: v_mad_f32 v1, v1, v4, v7 clamp +; GISEL-CI-NEXT: v_mad_f32 v2, v2, v5, v8 clamp +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext <3 x half> %src0 to <3 x float> %src1.ext = fpext <3 x half> %src1 to <3 x float> %src2.ext = fpext <3 x half> %src2 to <3 x float> @@ -980,103 +1701,188 @@ define <3 x half> @v_mad_mix_v3f32_clamp_precvt(<3 x half> %src0, <3 x half> %sr } define <4 x half> @v_mad_mix_v4f32_clamp_precvt(<4 x half> %src0, <4 x half> %src1, <4 x half> %src2) #0 { -; GFX906-LABEL: v_mad_mix_v4f32_clamp_precvt: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_fma_mix_f32 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp -; GFX906-NEXT: v_fma_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp -; GFX906-NEXT: v_fma_mix_f32 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp -; GFX906-NEXT: v_fma_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp -; GFX906-NEXT: v_cvt_f16_f32_e32 v2, v6 -; GFX906-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX906-NEXT: v_cvt_f16_f32_e32 v3, v3 -; GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX906-NEXT: v_pack_b32_f16 v1, v1, v2 -; GFX906-NEXT: v_pack_b32_f16 v0, v0, v3 -; GFX906-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX900-LABEL: v_mad_mix_v4f32_clamp_precvt: +; SDAG-GFX900: ; %bb.0: +; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX900-NEXT: v_mad_mix_f32 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; SDAG-GFX900-NEXT: v_mad_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp +; SDAG-GFX900-NEXT: v_mad_mix_f32 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp +; SDAG-GFX900-NEXT: v_cvt_f16_f32_e32 v2, v6 +; SDAG-GFX900-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SDAG-GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-GFX900-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-GFX900-NEXT: v_pack_b32_f16 v0, v0, v3 +; SDAG-GFX900-NEXT: v_pack_b32_f16 v1, v1, v2 +; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] ; -; GFX900-LABEL: v_mad_mix_v4f32_clamp_precvt: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_mad_mix_f32 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp -; GFX900-NEXT: v_mad_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp -; GFX900-NEXT: v_mad_mix_f32 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp -; GFX900-NEXT: v_mad_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp -; GFX900-NEXT: v_cvt_f16_f32_e32 v2, v6 -; GFX900-NEXT: v_cvt_f16_f32_e32 v1, v1 -; GFX900-NEXT: v_cvt_f16_f32_e32 v3, v3 -; GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX900-NEXT: v_pack_b32_f16 v1, v1, v2 -; GFX900-NEXT: v_pack_b32_f16 v0, v0, v3 -; GFX900-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX906-LABEL: v_mad_mix_v4f32_clamp_precvt: +; SDAG-GFX906: ; %bb.0: +; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX906-NEXT: v_fma_mix_f32 v6, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; SDAG-GFX906-NEXT: v_fma_mix_f32 v1, v1, v3, v5 op_sel_hi:[1,1,1] clamp +; SDAG-GFX906-NEXT: v_fma_mix_f32 v3, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v2, v4 op_sel_hi:[1,1,1] clamp +; SDAG-GFX906-NEXT: v_cvt_f16_f32_e32 v2, v6 +; SDAG-GFX906-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SDAG-GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-GFX906-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-GFX906-NEXT: v_pack_b32_f16 v0, v0, v3 +; SDAG-GFX906-NEXT: v_pack_b32_f16 v1, v1, v2 +; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mix_v4f32_clamp_precvt: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_sdwa v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_sdwa v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_sdwa v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_sdwa v9, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; VI-NEXT: v_cvt_f32_f16_sdwa v10, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_sdwa v11, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v4, v4 -; VI-NEXT: v_cvt_f32_f16_e32 v5, v5 -; VI-NEXT: v_mad_f32 v6, v6, v8, v10 clamp -; VI-NEXT: v_mad_f32 v7, v7, v9, v11 clamp -; VI-NEXT: v_mad_f32 v0, v0, v2, v4 clamp -; VI-NEXT: v_mad_f32 v1, v1, v3, v5 clamp -; VI-NEXT: v_cvt_f16_f32_sdwa v2, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: v_cvt_f16_f32_sdwa v3, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; VI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; VI-NEXT: v_or_b32_e32 v1, v1, v2 -; VI-NEXT: v_or_b32_e32 v0, v0, v3 -; VI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mix_v4f32_clamp_precvt: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v9, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v10, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v11, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SDAG-VI-NEXT: v_mad_f32 v7, v7, v9, v10 clamp +; SDAG-VI-NEXT: v_mad_f32 v6, v6, v8, v11 clamp +; SDAG-VI-NEXT: v_mad_f32 v1, v1, v3, v5 clamp +; SDAG-VI-NEXT: v_mad_f32 v0, v0, v2, v4 clamp +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v2, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: v_cvt_f16_f32_sdwa v3, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-VI-NEXT: v_or_b32_e32 v0, v0, v3 +; SDAG-VI-NEXT: v_or_b32_e32 v1, v1, v2 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mix_v4f32_clamp_precvt: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v11, v11 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v7, v7 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v10, v10 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v6 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v9, v9 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v8, v8 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v11, v11 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v7, v7 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v10, v10 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v6, v6 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v9, v9 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v8, v8 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-CI-NEXT: v_mad_f32 v0, v0, v4, v8 clamp +; SDAG-CI-NEXT: v_mad_f32 v1, v1, v5, v9 clamp +; SDAG-CI-NEXT: v_mad_f32 v2, v2, v6, v10 clamp +; SDAG-CI-NEXT: v_mad_f32 v3, v3, v7, v11 clamp +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX900-LABEL: v_mad_mix_v4f32_clamp_precvt: +; GISEL-GFX900: ; %bb.0: +; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX900-NEXT: v_mad_mix_f32 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp +; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; GISEL-GFX900-NEXT: v_mad_mix_f32 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp +; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; GISEL-GFX900-NEXT: v_cvt_f16_f32_e32 v3, v6 +; GISEL-GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-GFX900-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GISEL-GFX900-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-GFX900-NEXT: v_pack_b32_f16 v0, v3, v0 +; GISEL-GFX900-NEXT: v_pack_b32_f16 v1, v2, v1 +; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX906-LABEL: v_mad_mix_v4f32_clamp_precvt: +; GISEL-GFX906: ; %bb.0: +; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX906-NEXT: v_fma_mix_f32 v6, v0, v2, v4 op_sel_hi:[1,1,1] clamp +; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v2, v4 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; GISEL-GFX906-NEXT: v_fma_mix_f32 v2, v1, v3, v5 op_sel_hi:[1,1,1] clamp +; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v1, v3, v5 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp +; GISEL-GFX906-NEXT: v_cvt_f16_f32_e32 v3, v6 +; GISEL-GFX906-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-GFX906-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GISEL-GFX906-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-GFX906-NEXT: v_pack_b32_f16 v0, v3, v0 +; GISEL-GFX906-NEXT: v_pack_b32_f16 v1, v2, v1 +; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mix_v4f32_clamp_precvt: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v6, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v7, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v8, v2 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v9, v3 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v10, v4 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v11, v5 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v5, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_mad_f32 v6, v6, v8, v10 clamp +; GISEL-VI-NEXT: v_mad_f32 v0, v0, v2, v4 clamp +; GISEL-VI-NEXT: v_mad_f32 v2, v7, v9, v11 clamp +; GISEL-VI-NEXT: v_mad_f32 v1, v1, v3, v5 clamp +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v3, v6 +; GISEL-VI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GISEL-VI-NEXT: v_mov_b32_e32 v4, 16 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GISEL-VI-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GISEL-VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_v4f32_clamp_precvt: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v11, v11 -; CI-NEXT: v_cvt_f16_f32_e32 v7, v7 -; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v10, v10 -; CI-NEXT: v_cvt_f16_f32_e32 v6, v6 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v9, v9 -; CI-NEXT: v_cvt_f16_f32_e32 v5, v5 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v8, v8 -; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v11, v11 -; CI-NEXT: v_cvt_f32_f16_e32 v7, v7 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NEXT: v_cvt_f32_f16_e32 v10, v10 -; CI-NEXT: v_cvt_f32_f16_e32 v6, v6 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v9, v9 -; CI-NEXT: v_cvt_f32_f16_e32 v5, v5 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v8, v8 -; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_mad_f32 v1, v1, v5, v9 clamp -; CI-NEXT: v_mad_f32 v2, v2, v6, v10 clamp -; CI-NEXT: v_mad_f32 v3, v3, v7, v11 clamp -; CI-NEXT: v_mad_f32 v0, v0, v4, v8 clamp -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NEXT: s_setpc_b64 s[30:31] +; GISEL-CI-LABEL: v_mad_mix_v4f32_clamp_precvt: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v5 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v6, v6 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v7, v7 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v8, v8 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v9, v9 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v10, v10 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v11, v11 +; GISEL-CI-NEXT: v_mad_f32 v0, v0, v4, v8 clamp +; GISEL-CI-NEXT: v_mad_f32 v1, v1, v5, v9 clamp +; GISEL-CI-NEXT: v_mad_f32 v2, v2, v6, v10 clamp +; GISEL-CI-NEXT: v_mad_f32 v3, v3, v7, v11 clamp +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GISEL-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext <4 x half> %src0 to <4 x float> %src1.ext = fpext <4 x half> %src1 to <4 x float> %src2.ext = fpext <4 x half> %src2 to <4 x float> @@ -1114,3 +1920,5 @@ declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) # attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" } attributes #1 = { nounwind readnone speculatable } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CI: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix.ll b/llvm/test/CodeGen/AMDGPU/mad-mix.ll index 3a6c1f1850ad0..b3b8807fea05f 100644 --- a/llvm/test/CodeGen/AMDGPU/mad-mix.ll +++ b/llvm/test/CodeGen/AMDGPU/mad-mix.ll @@ -1,8 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900 %s -; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906 %s -; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=CIVI,VI %s -; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CIVI,CI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,SDAG-GFX900 %s +; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906,SDAG-GFX906 %s +; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,SDAG-VI %s +; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,SDAG-CI %s + +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,GISEL-GFX900 %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906,GISEL-GFX906 %s +; RUN: llc -global-isel -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,GISEL-VI %s +; RUN: llc -global-isel -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,GISEL-CI %s define float @v_mad_mix_f32_f16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 { ; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo: @@ -26,11 +31,20 @@ define float @v_mad_mix_f32_f16lo_f16lo_f16lo(half %src0, half %src1, half %src2 ; VI-NEXT: v_mac_f32_e32 v0, v3, v1 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mad_f32 v0, v0, v1, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2 +; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.ext = fpext half %src2 to float @@ -109,11 +123,20 @@ define float @v_mad_mix_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> % ; VI-NEXT: v_mac_f32_e32 v0, v3, v1 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mad_f32 v0, v1, v3, v5 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mad_f32 v0, v1, v3, v5 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v3 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v5 +; GISEL-CI-NEXT: v_mac_f32_e32 v0, v1, v2 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.hi = extractelement <2 x half> %src0, i32 1 %src1.hi = extractelement <2 x half> %src1, i32 1 %src2.hi = extractelement <2 x half> %src2, i32 1 @@ -125,54 +148,96 @@ define float @v_mad_mix_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> % } define <2 x float> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { -; GFX900-LABEL: v_mad_mix_v2f32: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] -; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] -; GFX900-NEXT: v_mov_b32_e32 v1, v3 -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX906-LABEL: v_mad_mix_v2f32: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] -; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] -; GFX906-NEXT: v_mov_b32_e32 v1, v3 -; GFX906-NEXT: s_setpc_b64 s[30:31] -; -; VI-LABEL: v_mad_mix_v2f32: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v4, v0 -; VI-NEXT: v_cvt_f32_f16_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v6, v1 -; VI-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v0, v2 -; VI-NEXT: v_mac_f32_e32 v1, v3, v5 -; VI-NEXT: v_mac_f32_e32 v0, v4, v6 -; VI-NEXT: s_setpc_b64 s[30:31] -; -; CI-LABEL: v_mad_mix_v2f32: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v5, v5 -; CI-NEXT: v_cvt_f16_f32_e32 v6, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v5 -; CI-NEXT: v_cvt_f32_f16_e32 v5, v6 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v6, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v4 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v4, v6 -; CI-NEXT: v_mac_f32_e32 v3, v1, v5 -; CI-NEXT: v_mov_b32_e32 v1, v3 -; CI-NEXT: v_mac_f32_e32 v0, v4, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX900-LABEL: v_mad_mix_v2f32: +; SDAG-GFX900: ; %bb.0: +; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX900-NEXT: v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] +; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v3 +; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-GFX906-LABEL: v_mad_mix_v2f32: +; SDAG-GFX906: ; %bb.0: +; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] +; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v3 +; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-VI-LABEL: v_mad_mix_v2f32: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v4, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v6, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v2 +; SDAG-VI-NEXT: v_mac_f32_e32 v1, v3, v5 +; SDAG-VI-NEXT: v_mac_f32_e32 v0, v4, v6 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mix_v2f32: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v3 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v5 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v6 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v4 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v6 +; SDAG-CI-NEXT: v_mac_f32_e32 v3, v1, v5 +; SDAG-CI-NEXT: v_mov_b32_e32 v1, v3 +; SDAG-CI-NEXT: v_mac_f32_e32 v0, v4, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX900-LABEL: v_mad_mix_v2f32: +; GISEL-GFX900: ; %bb.0: +; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX900-NEXT: v_mad_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1] +; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v3 +; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX906-LABEL: v_mad_mix_v2f32: +; GISEL-GFX906: ; %bb.0: +; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1] +; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v3 +; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mix_v2f32: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v5 +; GISEL-VI-NEXT: v_mac_f32_e32 v1, v4, v6 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_v2f32: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v6, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v7, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v4 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v5 +; GISEL-CI-NEXT: v_mac_f32_e32 v0, v6, v2 +; GISEL-CI-NEXT: v_mac_f32_e32 v1, v7, v3 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext <2 x half> %src0 to <2 x float> %src1.ext = fpext <2 x half> %src1 to <2 x float> %src2.ext = fpext <2 x half> %src2 to <2 x float> @@ -210,22 +275,42 @@ define <2 x float> @v_mad_mix_v2f32_shuffle(<2 x half> %src0, <2 x half> %src1, ; VI-NEXT: v_mov_b32_e32 v1, v2 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_v2f32_shuffle: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v4, v5 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v5, v1 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v4 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v4, v5 -; CI-NEXT: v_cvt_f32_f16_e32 v5, v0 -; CI-NEXT: v_mad_f32 v0, v4, v2, v1 -; CI-NEXT: v_mac_f32_e32 v1, v5, v3 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_v2f32_shuffle: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v5 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v1 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v4 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v5 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v0 +; SDAG-CI-NEXT: v_mad_f32 v0, v4, v2, v1 +; SDAG-CI-NEXT: v_mac_f32_e32 v1, v5, v3 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_v2f32_shuffle: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GISEL-CI-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GISEL-CI-NEXT: v_or_b32_e32 v0, v1, v0 +; GISEL-CI-NEXT: v_lshlrev_b32_e32 v1, 16, v5 +; GISEL-CI-NEXT: v_and_b32_e32 v4, 0xffff, v4 +; GISEL-CI-NEXT: v_or_b32_e32 v1, v1, v4 +; GISEL-CI-NEXT: v_lshrrev_b32_e32 v4, 16, v0 +; GISEL-CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v3 +; GISEL-CI-NEXT: v_mad_f32 v0, v4, v0, v1 +; GISEL-CI-NEXT: v_mac_f32_e32 v1, v5, v2 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.shuf = shufflevector <2 x half> %src0, <2 x half> undef, <2 x i32> %src1.shuf = shufflevector <2 x half> %src1, <2 x half> undef, <2 x i32> %src2.shuf = shufflevector <2 x half> %src2, <2 x half> undef, <2 x i32> @@ -249,20 +334,38 @@ define float @v_mad_mix_f32_negf16lo_f16lo_f16lo(half %src0, half %src1, half %s ; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_mad_f32 v0, -v0, v1, v2 -; VI-NEXT: s_setpc_b64 s[30:31] -; -; CI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mad_f32 v0, -v0, v1, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_mad_f32 v0, -v0, v1, v2 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mad_f32 v0, -v0, v1, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e64 v3, -v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2 +; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e64 v3, -v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2 +; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.ext = fpext half %src2 to float @@ -293,11 +396,20 @@ define float @v_mad_mix_f32_absf16lo_f16lo_f16lo(half %src0, half %src1, half %s ; VI-NEXT: v_mad_f32 v0, |v0|, v1, v2 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mad_f32 v0, |v0|, v1, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mad_f32 v0, |v0|, v1, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_mad_f32 v0, |v0|, v1, v2 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.ext = fpext half %src2 to float @@ -328,11 +440,20 @@ define float @v_mad_mix_f32_negabsf16lo_f16lo_f16lo(half %src0, half %src1, half ; VI-NEXT: v_mad_f32 v0, -|v0|, v1, v2 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mad_f32 v0, -|v0|, v1, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mad_f32 v0, -|v0|, v1, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_mad_f32 v0, -|v0|, v1, v2 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.ext = fpext half %src2 to float @@ -363,11 +484,19 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32(half %src0, half %src1, float %src2) ; VI-NEXT: v_mad_f32 v0, v0, v1, v2 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mad_f32 v0, v0, v1, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, v2 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2) @@ -395,11 +524,19 @@ define float @v_mad_mix_f32_f16lo_f16lo_negf32(half %src0, half %src1, float %sr ; VI-NEXT: v_mad_f32 v0, v0, v1, -v2 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mad_f32 v0, v0, v1, -v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, -v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, -v2 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.neg = fneg float %src2 @@ -428,11 +565,19 @@ define float @v_mad_mix_f32_f16lo_f16lo_absf32(half %src0, half %src1, float %sr ; VI-NEXT: v_mad_f32 v0, v0, v1, |v2| ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mad_f32 v0, v0, v1, |v2| -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, |v2| +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, |v2| +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.abs = call float @llvm.fabs.f32(float %src2) @@ -461,11 +606,19 @@ define float @v_mad_mix_f32_f16lo_f16lo_negabsf32(half %src0, half %src1, float ; VI-NEXT: v_mad_f32 v0, v0, v1, -|v2| ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mad_f32 v0, v0, v1, -|v2| -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, -|v2| +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, -|v2| +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.abs = call float @llvm.fabs.f32(float %src2) @@ -479,19 +632,19 @@ define float @v_mad_mix_f32_f16lo_f16lo_negabsf32(half %src0, half %src1, float ; inline immediate. define float @v_mad_mix_f32_f16lo_f16lo_f32imm1(half %src0, half %src1) #0 { -; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: s_mov_b32 s4, 1.0 -; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: s_mov_b32 s4, 1.0 -; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] -; GFX906-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1: +; SDAG-GFX900: ; %bb.0: +; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX900-NEXT: s_mov_b32 s4, 1.0 +; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] +; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1: +; SDAG-GFX906: ; %bb.0: +; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX906-NEXT: s_mov_b32 s4, 1.0 +; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] +; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1: ; VI: ; %bb.0: @@ -501,11 +654,33 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32imm1(half %src0, half %src1) #0 { ; VI-NEXT: v_mad_f32 v0, v0, v1, 1.0 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mad_f32 v0, v0, v1, 1.0 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, 1.0 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1: +; GISEL-GFX900: ; %bb.0: +; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX900-NEXT: v_mov_b32_e32 v2, 1.0 +; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] +; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1: +; GISEL-GFX906: ; %bb.0: +; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX906-NEXT: v_mov_b32_e32 v2, 1.0 +; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] +; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, 1.0 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float 1.0) @@ -513,19 +688,19 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32imm1(half %src0, half %src1) #0 { } define float @v_mad_mix_f32_f16lo_f16lo_f32imminv2pi(half %src0, half %src1) #0 { -; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: s_mov_b32 s4, 0.15915494 -; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: s_mov_b32 s4, 0.15915494 -; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] -; GFX906-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi: +; SDAG-GFX900: ; %bb.0: +; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX900-NEXT: s_mov_b32 s4, 0.15915494 +; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] +; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi: +; SDAG-GFX906: ; %bb.0: +; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX906-NEXT: s_mov_b32 s4, 0.15915494 +; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] +; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi: ; VI: ; %bb.0: @@ -535,11 +710,34 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32imminv2pi(half %src0, half %src1) #0 ; VI-NEXT: v_mad_f32 v0, v0, v1, 0.15915494 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_madak_f32 v0, v0, v1, 0x3e22f983 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_madak_f32 v0, v0, v1, 0x3e22f983 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi: +; GISEL-GFX900: ; %bb.0: +; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX900-NEXT: v_mov_b32_e32 v2, 0.15915494 +; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] +; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi: +; GISEL-GFX906: ; %bb.0: +; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX906-NEXT: v_mov_b32_e32 v2, 0.15915494 +; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] +; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_mov_b32_e32 v0, 0x3e22f983 +; GISEL-CI-NEXT: v_mac_f32_e32 v0, v2, v1 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float 0x3FC45F3060000000) @@ -553,33 +751,65 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32imminv2pi(half %src0, half %src1) #0 ; f32 1/2pi = 0x3e22f983 define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi(half %src0, half %src1) #0 { -; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: s_mov_b32 s4, 0x3e230000 -; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: s_mov_b32 s4, 0x3e230000 -; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] -; GFX906-NEXT: s_setpc_b64 s[30:31] -; -; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_madak_f32 v0, v0, v1, 0x3e230000 -; VI-NEXT: s_setpc_b64 s[30:31] -; -; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_madak_f32 v0, v0, v1, 0x3e230000 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: +; SDAG-GFX900: ; %bb.0: +; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX900-NEXT: s_mov_b32 s4, 0x3e230000 +; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] +; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: +; SDAG-GFX906: ; %bb.0: +; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX906-NEXT: s_mov_b32 s4, 0x3e230000 +; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] +; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_madak_f32 v0, v0, v1, 0x3e230000 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_madak_f32 v0, v0, v1, 0x3e230000 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: +; GISEL-GFX900: ; %bb.0: +; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX900-NEXT: v_mov_b32_e32 v2, 0x3e230000 +; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] +; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: +; GISEL-GFX906: ; %bb.0: +; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX906-NEXT: v_mov_b32_e32 v2, 0x3e230000 +; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] +; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x3e230000 +; GISEL-VI-NEXT: v_mac_f32_e32 v0, v2, v1 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_mov_b32_e32 v0, 0x3e230000 +; GISEL-CI-NEXT: v_mac_f32_e32 v0, v2, v1 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2 = fpext half 0xH3118 to float @@ -589,33 +819,65 @@ define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi(half %src0, half %src1) define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imm63(half %src0, half %src1) #0 { -; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: s_mov_b32 s4, 0x367c0000 -; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: s_mov_b32 s4, 0x367c0000 -; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] -; GFX906-NEXT: s_setpc_b64 s[30:31] -; -; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_madak_f32 v0, v0, v1, 0x367c0000 -; VI-NEXT: s_setpc_b64 s[30:31] -; -; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_madak_f32 v0, v0, v1, 0x367c0000 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: +; SDAG-GFX900: ; %bb.0: +; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX900-NEXT: s_mov_b32 s4, 0x367c0000 +; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] +; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: +; SDAG-GFX906: ; %bb.0: +; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX906-NEXT: s_mov_b32 s4, 0x367c0000 +; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] +; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_madak_f32 v0, v0, v1, 0x367c0000 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_madak_f32 v0, v0, v1, 0x367c0000 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: +; GISEL-GFX900: ; %bb.0: +; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX900-NEXT: v_mov_b32_e32 v2, 0x367c0000 +; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] +; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: +; GISEL-GFX906: ; %bb.0: +; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX906-NEXT: v_mov_b32_e32 v2, 0x367c0000 +; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] +; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x367c0000 +; GISEL-VI-NEXT: v_mac_f32_e32 v0, v2, v1 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_mov_b32_e32 v0, 0x367c0000 +; GISEL-CI-NEXT: v_mac_f32_e32 v0, v2, v1 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2 = fpext half 0xH003F to float @@ -624,49 +886,89 @@ define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imm63(half %src0, half %src1) #0 { } define <2 x float> @v_mad_mix_v2f32_f32imm1(<2 x half> %src0, <2 x half> %src1) #0 { -; GFX900-LABEL: v_mad_mix_v2f32_f32imm1: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: s_mov_b32 s4, 1.0 -; GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] -; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] -; GFX900-NEXT: v_mov_b32_e32 v1, v2 -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX906-LABEL: v_mad_mix_v2f32_f32imm1: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: s_mov_b32 s4, 1.0 -; GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] -; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] -; GFX906-NEXT: v_mov_b32_e32 v1, v2 -; GFX906-NEXT: s_setpc_b64 s[30:31] -; -; VI-LABEL: v_mad_mix_v2f32_f32imm1: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_e32 v3, v1 -; VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_mad_f32 v0, v0, v3, 1.0 -; VI-NEXT: v_mad_f32 v1, v2, v1, 1.0 -; VI-NEXT: s_setpc_b64 s[30:31] -; -; CI-LABEL: v_mad_mix_v2f32_f32imm1: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; CI-NEXT: v_mad_f32 v0, v0, v2, 1.0 -; CI-NEXT: v_mad_f32 v1, v1, v3, 1.0 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX900-LABEL: v_mad_mix_v2f32_f32imm1: +; SDAG-GFX900: ; %bb.0: +; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX900-NEXT: s_mov_b32 s4, 1.0 +; SDAG-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] +; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v2 +; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-GFX906-LABEL: v_mad_mix_v2f32_f32imm1: +; SDAG-GFX906: ; %bb.0: +; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX906-NEXT: s_mov_b32 s4, 1.0 +; SDAG-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] +; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v2 +; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-VI-LABEL: v_mad_mix_v2f32_f32imm1: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_mad_f32 v0, v0, v3, 1.0 +; SDAG-VI-NEXT: v_mad_f32 v1, v2, v1, 1.0 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mix_v2f32_f32imm1: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-CI-NEXT: v_mad_f32 v0, v0, v2, 1.0 +; SDAG-CI-NEXT: v_mad_f32 v1, v1, v3, 1.0 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX900-LABEL: v_mad_mix_v2f32_f32imm1: +; GISEL-GFX900: ; %bb.0: +; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX900-NEXT: s_mov_b32 s4, 1.0 +; GISEL-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel_hi:[1,1,0] +; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v2 +; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX906-LABEL: v_mad_mix_v2f32_f32imm1: +; GISEL-GFX906: ; %bb.0: +; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX906-NEXT: s_mov_b32 s4, 1.0 +; GISEL-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel_hi:[1,1,0] +; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v2 +; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mix_v2f32_f32imm1: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_mad_f32 v0, v2, v0, 1.0 +; GISEL-VI-NEXT: v_mad_f32 v1, v3, v1, 1.0 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_v2f32_f32imm1: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-CI-NEXT: v_mad_f32 v0, v0, v2, 1.0 +; GISEL-CI-NEXT: v_mad_f32 v1, v1, v3, 1.0 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext <2 x half> %src0 to <2 x float> %src1.ext = fpext <2 x half> %src1 to <2 x float> %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> ) @@ -674,51 +976,93 @@ define <2 x float> @v_mad_mix_v2f32_f32imm1(<2 x half> %src0, <2 x half> %src1) } define <2 x float> @v_mad_mix_v2f32_cvtf16imminv2pi(<2 x half> %src0, <2 x half> %src1) #0 { -; GFX900-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: s_mov_b32 s4, 0x3e230000 -; GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] -; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] -; GFX900-NEXT: v_mov_b32_e32 v1, v2 -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX906-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: s_mov_b32 s4, 0x3e230000 -; GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] -; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] -; GFX906-NEXT: v_mov_b32_e32 v1, v2 -; GFX906-NEXT: s_setpc_b64 s[30:31] -; -; VI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_e32 v3, v1 -; VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_mov_b32_e32 v1, 0x3e230000 -; VI-NEXT: v_madak_f32 v0, v0, v3, 0x3e230000 -; VI-NEXT: v_mac_f32_e32 v1, v2, v4 -; VI-NEXT: s_setpc_b64 s[30:31] -; -; CI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v4, v1 -; CI-NEXT: v_mov_b32_e32 v1, 0x3e230000 -; CI-NEXT: v_madak_f32 v0, v0, v2, 0x3e230000 -; CI-NEXT: v_mac_f32_e32 v1, v4, v3 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX900-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: +; SDAG-GFX900: ; %bb.0: +; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX900-NEXT: s_mov_b32 s4, 0x3e230000 +; SDAG-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] +; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v2 +; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-GFX906-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: +; SDAG-GFX906: ; %bb.0: +; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX906-NEXT: s_mov_b32 s4, 0x3e230000 +; SDAG-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] +; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v2 +; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-VI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_mov_b32_e32 v1, 0x3e230000 +; SDAG-VI-NEXT: v_madak_f32 v0, v0, v3, 0x3e230000 +; SDAG-VI-NEXT: v_mac_f32_e32 v1, v2, v4 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v1 +; SDAG-CI-NEXT: v_mov_b32_e32 v1, 0x3e230000 +; SDAG-CI-NEXT: v_madak_f32 v0, v0, v2, 0x3e230000 +; SDAG-CI-NEXT: v_mac_f32_e32 v1, v4, v3 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX900-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: +; GISEL-GFX900: ; %bb.0: +; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX900-NEXT: s_mov_b32 s4, 0x3e230000 +; GISEL-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel_hi:[1,1,0] +; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v2 +; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX906-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: +; GISEL-GFX906: ; %bb.0: +; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX906-NEXT: s_mov_b32 s4, 0x3e230000 +; GISEL-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel_hi:[1,1,0] +; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v2 +; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: s_mov_b32 s4, 0x3e230000 +; GISEL-VI-NEXT: v_mad_f32 v0, v2, v0, s4 +; GISEL-VI-NEXT: v_mad_f32 v1, v3, v1, s4 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-CI-NEXT: s_mov_b32 s4, 0x3e230000 +; GISEL-CI-NEXT: v_mad_f32 v0, v0, v2, s4 +; GISEL-CI-NEXT: v_mad_f32 v1, v1, v3, s4 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext <2 x half> %src0 to <2 x float> %src1.ext = fpext <2 x half> %src1 to <2 x float> %src2 = fpext <2 x half> to <2 x float> @@ -727,50 +1071,91 @@ define <2 x float> @v_mad_mix_v2f32_cvtf16imminv2pi(<2 x half> %src0, <2 x half> } define <2 x float> @v_mad_mix_v2f32_f32imminv2pi(<2 x half> %src0, <2 x half> %src1) #0 { -; GFX900-LABEL: v_mad_mix_v2f32_f32imminv2pi: -; GFX900: ; %bb.0: -; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: s_mov_b32 s4, 0.15915494 -; GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] -; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] -; GFX900-NEXT: v_mov_b32_e32 v1, v2 -; GFX900-NEXT: s_setpc_b64 s[30:31] -; -; GFX906-LABEL: v_mad_mix_v2f32_f32imminv2pi: -; GFX906: ; %bb.0: -; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: s_mov_b32 s4, 0.15915494 -; GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] -; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] -; GFX906-NEXT: v_mov_b32_e32 v1, v2 -; GFX906-NEXT: s_setpc_b64 s[30:31] -; -; VI-LABEL: v_mad_mix_v2f32_f32imminv2pi: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_e32 v3, v1 -; VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_mad_f32 v0, v0, v3, 0.15915494 -; VI-NEXT: v_mad_f32 v1, v2, v1, 0.15915494 -; VI-NEXT: s_setpc_b64 s[30:31] -; -; CI-LABEL: v_mad_mix_v2f32_f32imminv2pi: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 -; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 -; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 -; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 -; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 -; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_cvt_f32_f16_e32 v4, v1 -; CI-NEXT: v_mov_b32_e32 v1, 0x3e22f983 -; CI-NEXT: v_madak_f32 v0, v0, v2, 0x3e22f983 -; CI-NEXT: v_mac_f32_e32 v1, v4, v3 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-GFX900-LABEL: v_mad_mix_v2f32_f32imminv2pi: +; SDAG-GFX900: ; %bb.0: +; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX900-NEXT: s_mov_b32 s4, 0.15915494 +; SDAG-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] +; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v2 +; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-GFX906-LABEL: v_mad_mix_v2f32_f32imminv2pi: +; SDAG-GFX906: ; %bb.0: +; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-GFX906-NEXT: s_mov_b32 s4, 0.15915494 +; SDAG-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] +; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v2 +; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-VI-LABEL: v_mad_mix_v2f32_f32imminv2pi: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_mad_f32 v0, v0, v3, 0.15915494 +; SDAG-VI-NEXT: v_mad_f32 v1, v2, v1, 0.15915494 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mix_v2f32_f32imminv2pi: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v1 +; SDAG-CI-NEXT: v_mov_b32_e32 v1, 0x3e22f983 +; SDAG-CI-NEXT: v_madak_f32 v0, v0, v2, 0x3e22f983 +; SDAG-CI-NEXT: v_mac_f32_e32 v1, v4, v3 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX900-LABEL: v_mad_mix_v2f32_f32imminv2pi: +; GISEL-GFX900: ; %bb.0: +; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX900-NEXT: s_mov_b32 s4, 0.15915494 +; GISEL-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel_hi:[1,1,0] +; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v2 +; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-GFX906-LABEL: v_mad_mix_v2f32_f32imminv2pi: +; GISEL-GFX906: ; %bb.0: +; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-GFX906-NEXT: s_mov_b32 s4, 0.15915494 +; GISEL-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel_hi:[1,1,0] +; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] +; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v2 +; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mix_v2f32_f32imminv2pi: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_mad_f32 v0, v2, v0, 0.15915494 +; GISEL-VI-NEXT: v_mad_f32 v1, v3, v1, 0.15915494 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_v2f32_f32imminv2pi: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GISEL-CI-NEXT: s_mov_b32 s4, 0x3e22f983 +; GISEL-CI-NEXT: v_mad_f32 v0, v0, v2, s4 +; GISEL-CI-NEXT: v_mad_f32 v1, v1, v3, s4 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext <2 x half> %src0 to <2 x float> %src1.ext = fpext <2 x half> %src1 to <2 x float> %src2 = fpext <2 x half> to <2 x float> @@ -800,11 +1185,20 @@ define float @v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x h ; VI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mad_f32 v0, v1, v3, v5 clamp -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mad_f32 v0, v1, v3, v5 clamp +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v3 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v5 +; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.hi = extractelement <2 x half> %src0, i32 1 %src1.hi = extractelement <2 x half> %src1, i32 1 %src2.hi = extractelement <2 x half> %src2, i32 1 @@ -830,11 +1224,17 @@ define float @no_mix_simple(float %src0, float %src1, float %src2) #0 { ; GFX906-NEXT: v_fma_f32 v0, v0, v1, v2 ; GFX906-NEXT: s_setpc_b64 s[30:31] ; -; CIVI-LABEL: no_mix_simple: -; CIVI: ; %bb.0: -; CIVI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CIVI-NEXT: v_mad_f32 v0, v0, v1, v2 -; CIVI-NEXT: s_setpc_b64 s[30:31] +; VI-LABEL: no_mix_simple: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_mad_f32 v0, v0, v1, v2 +; VI-NEXT: s_setpc_b64 s[30:31] +; +; CI-LABEL: no_mix_simple: +; CI: ; %bb.0: +; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CI-NEXT: v_mad_f32 v0, v0, v1, v2 +; CI-NEXT: s_setpc_b64 s[30:31] %result = call float @llvm.fmuladd.f32(float %src0, float %src1, float %src2) ret float %result } @@ -852,11 +1252,17 @@ define float @no_mix_simple_fabs(float %src0, float %src1, float %src2) #0 { ; GFX906-NEXT: v_fma_f32 v0, |v0|, v1, v2 ; GFX906-NEXT: s_setpc_b64 s[30:31] ; -; CIVI-LABEL: no_mix_simple_fabs: -; CIVI: ; %bb.0: -; CIVI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CIVI-NEXT: v_mad_f32 v0, |v0|, v1, v2 -; CIVI-NEXT: s_setpc_b64 s[30:31] +; VI-LABEL: no_mix_simple_fabs: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_mad_f32 v0, |v0|, v1, v2 +; VI-NEXT: s_setpc_b64 s[30:31] +; +; CI-LABEL: no_mix_simple_fabs: +; CI: ; %bb.0: +; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CI-NEXT: v_mad_f32 v0, |v0|, v1, v2 +; CI-NEXT: s_setpc_b64 s[30:31] %src0.fabs = call float @llvm.fabs.f32(float %src0) %result = call float @llvm.fmuladd.f32(float %src0.fabs, float %src1, float %src2) ret float %result @@ -892,11 +1298,20 @@ define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals(half %src0, half %sr ; VI-NEXT: v_add_f32_e32 v0, v0, v2 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_fma_f32 v0, v0, v1, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_fma_f32 v0, v0, v1, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_fma_f32 v0, v0, v1, v2 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.ext = fpext half %src2 to float @@ -928,11 +1343,19 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals(half %src0, half %src1, fl ; VI-NEXT: v_add_f32_e32 v0, v0, v2 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_fma_f32 v0, v0, v1, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_fma_f32 v0, v0, v1, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_fma_f32 v0, v0, v1, v2 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2) @@ -970,12 +1393,22 @@ define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd(half %src0, ; VI-NEXT: v_add_f32_e32 v0, v0, v2 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mul_f32_e32 v0, v0, v1 -; CI-NEXT: v_add_f32_e32 v0, v0, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mul_f32_e32 v0, v0, v1 +; SDAG-CI-NEXT: v_add_f32_e32 v0, v0, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_mul_f32_e32 v0, v0, v1 +; GISEL-CI-NEXT: v_add_f32_e32 v0, v0, v2 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.ext = fpext half %src2 to float @@ -1012,12 +1445,21 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd(half %src0, half ; VI-NEXT: v_add_f32_e32 v0, v0, v2 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mul_f32_e32 v0, v0, v1 -; CI-NEXT: v_add_f32_e32 v0, v0, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mul_f32_e32 v0, v0, v1 +; SDAG-CI-NEXT: v_add_f32_e32 v0, v0, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_mul_f32_e32 v0, v0, v1 +; GISEL-CI-NEXT: v_add_f32_e32 v0, v0, v2 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %mul = fmul float %src0.ext, %src1.ext @@ -1047,11 +1489,20 @@ define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd(half %src0, hal ; VI-NEXT: v_mac_f32_e32 v0, v3, v1 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mad_f32 v0, v0, v1, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2 +; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.ext = fpext half %src2 to float @@ -1081,11 +1532,19 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd(half %src0, half %src ; VI-NEXT: v_mad_f32 v0, v0, v1, v2 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_mad_f32 v0, v0, v1, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, v2 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %mul = fmul contract float %src0.ext, %src1.ext @@ -1106,21 +1565,39 @@ define float @v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo(i32 %src0.arg, half %src1 ; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_mad_f32 v0, -v0, v1, v2 -; VI-NEXT: s_setpc_b64 s[30:31] -; -; CI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_cvt_f32_f16_e32 v0, v0 -; CI-NEXT: v_mad_f32 v0, -v0, v1, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_mad_f32 v0, -v0, v1, v2 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 +; SDAG-CI-NEXT: v_mad_f32 v0, -v0, v1, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_cvt_f32_f16_e64 v3, -v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2 +; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_cvt_f32_f16_e64 v3, -v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2 +; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> %src0 = extractelement <2 x half> %src0.arg.bc, i32 0 %src0.neg = fneg half %src0 @@ -1160,13 +1637,23 @@ define float @v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo(i32 %src0.arg, half % ; VI-NEXT: v_mad_f32 v0, |v0|, v1, v2 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; CI-NEXT: v_cvt_f32_f16_e64 v0, |v0| -; CI-NEXT: v_mad_f32 v0, v0, v1, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, |v0| +; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e64 v0, -v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GISEL-CI-NEXT: v_mad_f32 v0, |v0|, v1, v2 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> %src0 = extractelement <2 x half> %src0.arg.bc, i32 1 %src0.neg = fneg half %src0 @@ -1200,13 +1687,23 @@ define float @v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo(i32 %src0.arg, half %src1 ; VI-NEXT: v_mac_f32_e32 v0, v3, v1 ; VI-NEXT: s_setpc_b64 s[30:31] ; -; CI-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; CI-NEXT: v_cvt_f32_f16_e64 v0, |v0| -; CI-NEXT: v_mad_f32 v0, v0, v1, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-CI-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, |v0| +; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e64 v3, |v0| +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2 +; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> %src0 = extractelement <2 x half> %src0.arg.bc, i32 1 %src0.abs = call half @llvm.fabs.f16(half %src0) @@ -1230,22 +1727,43 @@ define float @v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half ; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_mad_f32 v0, -v0, v1, v2 -; VI-NEXT: s_setpc_b64 s[30:31] -; -; CI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; CI-NEXT: v_cvt_f32_f16_e64 v0, -v0 -; CI-NEXT: v_mad_f32 v0, v0, v1, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_mad_f32 v0, -v0, v1, v2 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, -v0 +; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2 +; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 +; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2 +; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> %fneg = fneg <2 x half> %src0.arg.bc %src0 = extractelement <2 x half> %fneg, i32 1 @@ -1269,22 +1787,43 @@ define float @v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo(i32 %src0.arg, half ; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_sdwa v3, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_e32 v0, v2 -; VI-NEXT: v_mac_f32_e32 v0, v3, v1 -; VI-NEXT: s_setpc_b64 s[30:31] -; -; CI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; CI-NEXT: v_cvt_f32_f16_e64 v0, |v0| -; CI-NEXT: v_mad_f32 v0, v0, v1, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v3, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v2 +; SDAG-VI-NEXT: v_mac_f32_e32 v0, v3, v1 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, |v0| +; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2 +; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 +; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2 +; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %src0.arg.bc) %src0 = extractelement <2 x half> %fabs, i32 1 @@ -1308,22 +1847,43 @@ define float @v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo(i32 %src0.arg, ; GFX906-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] ; GFX906-NEXT: s_setpc_b64 s[30:31] ; -; VI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_cvt_f32_f16_sdwa v0, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 -; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 -; VI-NEXT: v_mad_f32 v0, -v0, v1, v2 -; VI-NEXT: s_setpc_b64 s[30:31] -; -; CI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo: -; CI: ; %bb.0: -; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; CI-NEXT: v_cvt_f32_f16_e64 v0, -|v0| -; CI-NEXT: v_mad_f32 v0, v0, v1, v2 -; CI-NEXT: s_setpc_b64 s[30:31] +; SDAG-VI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo: +; SDAG-VI: ; %bb.0: +; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v0, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 +; SDAG-VI-NEXT: v_mad_f32 v0, -v0, v1, v2 +; SDAG-VI-NEXT: s_setpc_b64 s[30:31] +; +; SDAG-CI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo: +; SDAG-CI: ; %bb.0: +; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, -|v0| +; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2 +; SDAG-CI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-VI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo: +; GISEL-VI: ; %bb.0: +; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-VI-NEXT: v_or_b32_e32 v0, 0x80008000, v0 +; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2 +; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1 +; GISEL-VI-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-CI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo: +; GISEL-CI: ; %bb.0: +; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-CI-NEXT: v_or_b32_e32 v0, 0x80008000, v0 +; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2 +; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1 +; GISEL-CI-NEXT: s_setpc_b64 s[30:31] %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %src0.arg.bc) %fneg.fabs = fneg <2 x half> %fabs diff --git a/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir b/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir index 1403f9bd1cf0d..ae2c77ca87039 100644 --- a/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir +++ b/llvm/test/CodeGen/AMDGPU/opt-exec-masking-pre-ra-update-liveness.mir @@ -686,3 +686,106 @@ body: | bb.3: ... + +# This was trying to extend the liverange of %0 farther than needed, +# following %1's segment to %bb3 + +--- +name: cannot_create_empty_or_backwards_segment +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: cannot_create_empty_or_backwards_segment + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[COPY]], implicit $exec + ; CHECK-NEXT: $vcc = S_ANDN2_B64 $exec, [[COPY]], implicit-def dead $scc + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: S_ENDPGM 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] + bb.0: + liveins: $sgpr4_sgpr5 + + %0:sreg_64_xexec = COPY $sgpr4_sgpr5 + %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec + %2:sreg_64_xexec = V_CMP_NE_U32_e64 1, %1, implicit $exec + $vcc = S_AND_B64 $exec, %2, implicit-def dead $scc + S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc + + bb.1: + S_CBRANCH_VCCNZ %bb.3, implicit killed undef $vcc + + bb.2: + S_ENDPGM 0 + + bb.3: + S_ENDPGM 0, implicit %1 +... + +--- +name: cannot_create_empty_or_backwards_segment_2 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: cannot_create_empty_or_backwards_segment_2 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: liveins: $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[COPY]], implicit $exec + ; CHECK-NEXT: $vcc = S_ANDN2_B64 $exec, [[COPY]], implicit-def dead $scc + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_NOP 0, implicit-def dead [[V_CNDMASK_B32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc + ; CHECK-NEXT: S_BRANCH %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: S_ENDPGM 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $sgpr4_sgpr5 + + bb.1: + liveins: $sgpr4_sgpr5 + + %0:sreg_64_xexec = COPY $sgpr4_sgpr5 + %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec + %2:sreg_64_xexec = V_CMP_NE_U32_e64 1, %1, implicit $exec + $vcc = S_AND_B64 $exec, %2, implicit-def dead $scc + S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc + + bb.2: + liveins: $sgpr4_sgpr5 + S_NOP 0, implicit-def %1, implicit %1 + S_CBRANCH_VCCNZ %bb.4, implicit killed undef $vcc + S_BRANCH %bb.1 + + bb.3: + S_ENDPGM 0 + + bb.4: + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll index d9d511f72ae8e..aa3fff44b9f3e 100644 --- a/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll @@ -69,8 +69,8 @@ declare i64 @llvm.amdgcn.s.getpc() #0 attributes #0 = { nounwind readnone speculatable willreturn } ;. -; AKF_GCN: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn } +; AKF_GCN: attributes #[[ATTR0:[0-9]+]] = { nounwind speculatable willreturn memory(none) } ;. ; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" } -; ATTRIBUTOR_GCN: attributes #[[ATTR1:[0-9]+]] = { nounwind readnone speculatable willreturn } +; ATTRIBUTOR_GCN: attributes #[[ATTR1:[0-9]+]] = { nounwind speculatable willreturn memory(none) } ;. diff --git a/llvm/test/CodeGen/AMDGPU/r600.global_atomics.ll b/llvm/test/CodeGen/AMDGPU/r600.global_atomics.ll index 1ddc41feb0069..3d2f1b4fb9f4f 100644 --- a/llvm/test/CodeGen/AMDGPU/r600.global_atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/r600.global_atomics.ll @@ -505,36 +505,36 @@ entry: ret void } -; FUNC-LABEL: {{^}}atomic_inc_add -; EG: MEM_RAT ATOMIC_INC_UINT -define amdgpu_kernel void @atomic_inc_add(i32 addrspace(1)* %out) { +; FUNC-LABEL: {{^}}atomic_add_1 +; EG: MEM_RAT ATOMIC_ADD +define amdgpu_kernel void @atomic_add_1(i32 addrspace(1)* %out) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 1 seq_cst ret void } -; FUNC-LABEL: {{^}}atomic_dec_add -; EG: MEM_RAT ATOMIC_DEC_UINT -define amdgpu_kernel void @atomic_dec_add(i32 addrspace(1)* %out) { +; FUNC-LABEL: {{^}}atomic_add_neg1 +; EG: MEM_RAT ATOMIC_ADD +define amdgpu_kernel void @atomic_add_neg1(i32 addrspace(1)* %out) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 -1 seq_cst ret void } -; FUNC-LABEL: {{^}}atomic_inc_sub -; EG: MEM_RAT ATOMIC_INC_UINT -define amdgpu_kernel void @atomic_inc_sub(i32 addrspace(1)* %out) { +; FUNC-LABEL: {{^}}atomic_sub_neg1 +; EG: MEM_RAT ATOMIC_SUB +define amdgpu_kernel void @atomic_sub_neg1(i32 addrspace(1)* %out) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 -1 seq_cst ret void } -; FUNC-LABEL: {{^}}atomic_dec_sub -; EG: MEM_RAT ATOMIC_DEC_UINT -define amdgpu_kernel void @atomic_dec_sub(i32 addrspace(1)* %out) { +; FUNC-LABEL: {{^}}atomic_sub_1 +; EG: MEM_RAT ATOMIC_SUB +define amdgpu_kernel void @atomic_sub_1(i32 addrspace(1)* %out) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 1 seq_cst diff --git a/llvm/test/CodeGen/AMDGPU/read_register.ll b/llvm/test/CodeGen/AMDGPU/read_register.ll index 8fd2fb05cea22..8e8fc44bf57df 100644 --- a/llvm/test/CodeGen/AMDGPU/read_register.ll +++ b/llvm/test/CodeGen/AMDGPU/read_register.ll @@ -1,4 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=bonaire -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=bonaire -verify-machineinstrs < %s | FileCheck %s declare i32 @llvm.read_register.i32(metadata) #0 declare i64 @llvm.read_register.i64(metadata) #0 diff --git a/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll b/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll index f339bd86ea5dd..79ba68b339091 100644 --- a/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll +++ b/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll @@ -1,8 +1,13 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=MEMTIME -check-prefix=SIVI -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=MEMTIME -check-prefix=SIVI -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=MEMTIME -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GETREG,GETREG-SDAG -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -enable-var-scope -check-prefixes=GETREG,GETREG-SDAG -check-prefix=GCN %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=MEMTIME -check-prefix=SIVI -check-prefix=GCN %s +; -global-isel=1 SI run line skipped since store not yet implemented. +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=MEMTIME -check-prefix=SIVI -check-prefix=GCN %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=MEMTIME -check-prefix=SIVI -check-prefix=GCN %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=MEMTIME -check-prefix=GCN %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=MEMTIME -check-prefix=GCN %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=GETREG,GETREG-SDAG -check-prefix=GCN %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=GETREG,GETREG-GISEL -check-prefix=GCN %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GETREG,GETREG-SDAG -check-prefix=GCN %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GETREG,GETREG-GISEL -check-prefix=GCN %s declare i64 @llvm.readcyclecounter() #0 diff --git a/llvm/test/CodeGen/AMDGPU/ret.ll b/llvm/test/CodeGen/AMDGPU/ret.ll index cee224bfa3899..97c07a3d7773f 100644 --- a/llvm/test/CodeGen/AMDGPU/ret.ll +++ b/llvm/test/CodeGen/AMDGPU/ret.ll @@ -1,5 +1,7 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}vgpr: ; GCN-DAG: v_mov_b32_e32 v1, v0 diff --git a/llvm/test/CodeGen/AMDGPU/select-undef.ll b/llvm/test/CodeGen/AMDGPU/select-undef.ll index f02cd3fc5e4e6..81deec1e0dbb8 100644 --- a/llvm/test/CodeGen/AMDGPU/select-undef.ll +++ b/llvm/test/CodeGen/AMDGPU/select-undef.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -amdgpu-scalar-ir-passes=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}select_undef_lhs: ; GCN: s_waitcnt @@ -6,8 +6,7 @@ ; GCN-NOT: v_cndmask ; GCN-NEXT: s_setpc_b64 define float @select_undef_lhs(float %val, i1 %cond) { - %undef = call float @llvm.amdgcn.rcp.f32(float undef) - %sel = select i1 %cond, float %undef, float %val + %sel = select i1 %cond, float undef, float %val ret float %sel } @@ -17,8 +16,7 @@ define float @select_undef_lhs(float %val, i1 %cond) { ; GCN-NOT: v_cndmask ; GCN-NEXT: s_setpc_b64 define float @select_undef_rhs(float %val, i1 %cond) { - %undef = call float @llvm.amdgcn.rcp.f32(float undef) - %sel = select i1 %cond, float %val, float %undef + %sel = select i1 %cond, float %val, float undef ret float %sel } diff --git a/llvm/test/CodeGen/AMDGPU/selectcc-opt.ll b/llvm/test/CodeGen/AMDGPU/selectcc-opt.ll index 8e9409188daad..563d86daa55cb 100644 --- a/llvm/test/CodeGen/AMDGPU/selectcc-opt.ll +++ b/llvm/test/CodeGen/AMDGPU/selectcc-opt.ll @@ -33,6 +33,7 @@ ENDIF: ; EG-LABEL: {{^}}test_b: ; EG: SET{{[GTEQN]+}}_DX10 +; EG-NEXT: 0(0.000000e+00), 0(0.000000e+00) ; EG-NEXT: PRED_ ; EG-NEXT: ALU clause starting define amdgpu_kernel void @test_b(i32 addrspace(1)* %out, float %in) { diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-fi-skip-processing-stack-arg-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-fi-skip-processing-stack-arg-dbg-value.mir new file mode 100644 index 0000000000000..4694810379fe0 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-fi-skip-processing-stack-arg-dbg-value.mir @@ -0,0 +1,56 @@ +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-sgpr-to-vgpr=true -verify-machineinstrs -run-pass=si-lower-sgpr-spills,prologepilog -o - %s | FileCheck %s + +# After handling the SGPR spill to VGPR in SILowerSGPRSpills pass, we replace the dead frame index in the DBG_VALUE instruction with reg 0. +# Skip looking for frame indices in the debug value instruction for incoming arguments passed via stack. The test would crash otherwise. +# It is safe to skip the fixed stack objects as they will never become the spill objects. + +--- | + define amdgpu_kernel void @test() { ret void } + + !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !4, producer: "llvm", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !4) + !1 = !DILocalVariable(name: "a", scope: !2, file: !4, line: 126, type: !6) + !2 = distinct !DISubprogram(name: "test", scope: !4, file: !4, line: 1, type: !3, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !5) + !3 = !DISubroutineType(types: !4) + !4 = !{null} + !5 = !{!1} + !6 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !7, size: 64, align: 32) + !7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) + !8 = !DIExpression() + !9 = !DILocation(line: 10, column: 9, scope: !2) + +... +--- +name: test +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 +fixedStack: + - { id: 0, type: default, offset: 4, size: 4, alignment: 4, stack-id: default } +stack: + - { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill } +machineFunctionInfo: + maxKernArgAlign: 4 + isEntryFunction: true + waveLimiter: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr33' + hasSpilledSGPRs: true + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + dispatchPtr: { reg: '$sgpr4_sgpr5' } + kernargSegmentPtr: { reg: '$sgpr6_sgpr7' } + workGroupIDX: { reg: '$sgpr8' } + privateSegmentWaveByteOffset: { reg: '$sgpr9' } +body: | + ; CHECK-LABEL: name: test + ; CHECK: bb.0: + ; CHECK: DBG_VALUE $noreg, 0 + bb.0: + renamable $sgpr10 = IMPLICIT_DEF + SI_SPILL_S32_SAVE killed $sgpr10, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 + DBG_VALUE %fixed-stack.0, 0, !1, !8, debug-location !9 + + bb.1: + renamable $sgpr10 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 + S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll index 76e1288bf7ac0..95307c84bf948 100644 --- a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll +++ b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll @@ -791,5 +791,5 @@ entry: ; GCN-PRELINK: declare float @_Z11native_sqrtf(float) local_unnamed_addr #[[$NOUNWIND_READONLY]] ; GCN-PRELINK: attributes #[[$NOUNWIND]] = { nounwind } -; GCN-PRELINK: attributes #[[$NOUNWIND_READONLY]] = { nofree nounwind readonly } +; GCN-PRELINK: attributes #[[$NOUNWIND_READONLY]] = { nofree nounwind memory(read) } attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/target-mem-intrinsic-metadata.ll b/llvm/test/CodeGen/AMDGPU/target-mem-intrinsic-metadata.ll new file mode 100644 index 0000000000000..9c6c8beed669b --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/target-mem-intrinsic-metadata.ll @@ -0,0 +1,21 @@ +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=bonaire -stop-before=machine-scheduler < %s | FileCheck -enable-var-scope -check-prefixes=MIR %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=bonaire -stop-before=machine-scheduler < %s | FileCheck -enable-var-scope -check-prefixes=MIR %s + +; Make sure !noalias metadata is passed through from target intrinsics + +; MIR-LABEL: name: ds_append_noalias +; MIR: DS_APPEND {{.*}} :: (load store (s32) on %{{.*}}, !noalias !{{[0-9]+}}, addrspace 3) +define amdgpu_kernel void @ds_append_noalias() { + %lds = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(1)* null + %val = call i32 @llvm.amdgcn.ds.append.p3i32(i32 addrspace(3)* %lds, i1 false), !noalias !0 + store i32 %val, i32 addrspace(1)* null, align 4 + ret void +} + +declare i32 @llvm.amdgcn.ds.append.p3i32(i32 addrspace(3)* nocapture, i1 immarg) #0 + +attributes #0 = { argmemonly convergent nounwind willreturn } + +!0 = !{!1} +!1 = distinct !{!1, !2} +!2 = distinct !{!2} diff --git a/llvm/test/CodeGen/AMDGPU/trap.ll b/llvm/test/CodeGen/AMDGPU/trap.ll index 64f4064610468..77b8b47d1f0b0 100644 --- a/llvm/test/CodeGen/AMDGPU/trap.ll +++ b/llvm/test/CodeGen/AMDGPU/trap.ll @@ -1,18 +1,27 @@ -; RUN: llc -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s -; RUN: llc -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mattr=+trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s -; RUN: llc -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mattr=-trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-HSA-TRAP %s -; RUN: llc -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mattr=-trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s +; RUN: llc -global-isel=0 -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mattr=+trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mattr=+trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA-TRAP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mattr=-trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-HSA-TRAP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mattr=-trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-HSA-TRAP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mattr=-trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s +; RUN: llc -global-isel=1 -mtriple=amdgcn--amdhsa --amdhsa-code-object-version=2 -mattr=-trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s ; enable trap handler feature -; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=TRAP-BIT -check-prefix=MESA-TRAP %s -; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=TRAP-BIT %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=TRAP-BIT -check-prefix=MESA-TRAP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=TRAP-BIT -check-prefix=MESA-TRAP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=TRAP-BIT %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-unknown-mesa3d -mattr=+trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=TRAP-BIT %s ; disable trap handler feature -; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=NO-TRAP-BIT -check-prefix=NOMESA-TRAP %s -; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=NO-TRAP-BIT %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=NO-TRAP-BIT -check-prefix=NOMESA-TRAP %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NO-MESA-TRAP -check-prefix=NO-TRAP-BIT -check-prefix=NOMESA-TRAP %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=NO-TRAP-BIT %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-unknown-mesa3d -mattr=-trap-handler -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING -check-prefix=NO-TRAP-BIT %s -; RUN: llc -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s +; RUN: llc -global-isel=0 -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s +; RUN: llc -global-isel=1 -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=GCN-WARNING %s ; GCN-WARNING: warning: :0:0: in function hsa_debugtrap void (i32 addrspace(1)*): debugtrap handler not supported diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll index 9367b4fd47bbb..c28aa55e346f2 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll @@ -101,7 +101,7 @@ define amdgpu_kernel void @kernel(i32 addrspace(1)* %m) #1 { attributes #0 = { nounwind readnone } attributes #1 = { "uniform-work-group-size"="true" } ;. -; CHECK: attributes #[[ATTR0]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } -; CHECK: attributes #[[ATTR1]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } +; CHECK: attributes #[[ATTR0]] = { nounwind memory(none) "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR1]] = { nounwind memory(none) "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } ; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" } ;. diff --git a/llvm/test/CodeGen/AMDGPU/v_pack.ll b/llvm/test/CodeGen/AMDGPU/v_pack.ll index e17d38cff6332..1fbf9593aceea 100644 --- a/llvm/test/CodeGen/AMDGPU/v_pack.ll +++ b/llvm/test/CodeGen/AMDGPU/v_pack.ll @@ -223,9 +223,7 @@ define amdgpu_kernel void @v_pack_b32.fneg(half addrspace(1)* %in0, half addrspa ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: v_add_f16_e32 v0, 2.0, v1 ; GISEL-NEXT: v_add_f16_e32 v1, 2.0, v2 -; GISEL-NEXT: v_sub_f16_e32 v0, 0x8000, v0 -; GISEL-NEXT: v_sub_f16_e32 v1, 0x8000, v1 -; GISEL-NEXT: v_pack_b32_f16 v0, v0, v1 +; GISEL-NEXT: v_pack_b32_f16 v0, -v0, -v1 ; GISEL-NEXT: ;;#ASMSTART ; GISEL-NEXT: ; use v0 ; GISEL-NEXT: ;;#ASMEND diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-fi-skip-processing-stack-arg-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/vgpr-spill-fi-skip-processing-stack-arg-dbg-value.mir new file mode 100644 index 0000000000000..2058a94b0614a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-fi-skip-processing-stack-arg-dbg-value.mir @@ -0,0 +1,56 @@ +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-vgpr-to-agpr=true -verify-machineinstrs -run-pass=prologepilog -o - %s | FileCheck %s + +# After handling the VGPR spill to AGPR copy, we replace the dead frame index in the DBG_VALUE instruction with reg 0. +# Skip looking for frame indices in the debug value instruction for incoming arguments passed via stack. The test would crash otherwise. +# It is safe to skip the fixed stack objects as they will never become the spill objects. + +--- | + define amdgpu_kernel void @test() { ret void } + + !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !4, producer: "llvm", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !4) + !1 = !DILocalVariable(name: "a", scope: !2, file: !4, line: 126, type: !6) + !2 = distinct !DISubprogram(name: "test", scope: !4, file: !4, line: 1, type: !3, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !5) + !3 = !DISubroutineType(types: !4) + !4 = !{null} + !5 = !{!1} + !6 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !7, size: 64, align: 32) + !7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) + !8 = !DIExpression() + !9 = !DILocation(line: 10, column: 9, scope: !2) + +... +--- +name: test +tracksRegLiveness: true +frameInfo: + maxAlignment: 4 +fixedStack: + - { id: 0, type: default, offset: 4, size: 4, alignment: 4, stack-id: default } +stack: + - { id: 0, type: spill-slot, size: 4, alignment: 4 } +machineFunctionInfo: + maxKernArgAlign: 4 + isEntryFunction: true + waveLimiter: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr33' + hasSpilledVGPRs: true + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + dispatchPtr: { reg: '$sgpr4_sgpr5' } + kernargSegmentPtr: { reg: '$sgpr6_sgpr7' } + workGroupIDX: { reg: '$sgpr8' } + privateSegmentWaveByteOffset: { reg: '$sgpr9' } +body: | + ; CHECK-LABEL: name: test + ; CHECK: bb.0: + ; CHECK: DBG_VALUE $noreg, 0 + bb.0: + $vgpr2 = IMPLICIT_DEF + SI_SPILL_V32_SAVE $vgpr2, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, align 4, addrspace 5) + DBG_VALUE %fixed-stack.0, 0, !1, !8, debug-location !9 + + bb.1: + renamable $vgpr2 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, align 4, addrspace 5) + S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll index 2167a5ab8f42d..7e612f53151c2 100644 --- a/llvm/test/CodeGen/AMDGPU/wqm.ll +++ b/llvm/test/CodeGen/AMDGPU/wqm.ll @@ -204,7 +204,7 @@ define amdgpu_ps float @test5(i32 inreg %idx0, i32 inreg %idx1) { ; GFX9-W64-NEXT: buffer_load_dword v1, v1, s[0:3], 0 idxen ; GFX9-W64-NEXT: s_waitcnt vmcnt(0) ; GFX9-W64-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX9-W64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec +; GFX9-W64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX9-W64-NEXT: s_and_b64 exec, exec, s[2:3] ; GFX9-W64-NEXT: ; return to shader part epilog ; @@ -219,7 +219,7 @@ define amdgpu_ps float @test5(i32 inreg %idx0, i32 inreg %idx1) { ; GFX10-W32-NEXT: buffer_load_dword v1, v1, s[0:3], 0 idxen ; GFX10-W32-NEXT: s_waitcnt vmcnt(0) ; GFX10-W32-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX10-W32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec +; GFX10-W32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s2 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: @@ -243,7 +243,7 @@ define amdgpu_ps float @test6(i32 inreg %idx0, i32 inreg %idx1) { ; GFX9-W64-NEXT: buffer_load_dword v1, v1, s[0:3], 0 idxen ; GFX9-W64-NEXT: s_waitcnt vmcnt(0) ; GFX9-W64-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX9-W64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec +; GFX9-W64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX9-W64-NEXT: s_and_b64 exec, exec, s[2:3] ; GFX9-W64-NEXT: ; return to shader part epilog ; @@ -258,7 +258,7 @@ define amdgpu_ps float @test6(i32 inreg %idx0, i32 inreg %idx1) { ; GFX10-W32-NEXT: buffer_load_dword v1, v1, s[0:3], 0 idxen ; GFX10-W32-NEXT: s_waitcnt vmcnt(0) ; GFX10-W32-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX10-W32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec +; GFX10-W32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s2 ; GFX10-W32-NEXT: ; return to shader part epilog main_body: @@ -496,7 +496,7 @@ define amdgpu_ps float @test_wwm5(i32 inreg %idx0, i32 inreg %idx1) { ; GFX9-W64-NEXT: s_wqm_b64 exec, exec ; GFX9-W64-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-W64-NEXT: v_add_f32_e32 v0, v0, v0 -; GFX9-W64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec +; GFX9-W64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX9-W64-NEXT: s_and_b64 exec, exec, s[2:3] ; GFX9-W64-NEXT: ; return to shader part epilog ; @@ -518,7 +518,7 @@ define amdgpu_ps float @test_wwm5(i32 inreg %idx0, i32 inreg %idx1) { ; GFX10-W32-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10-W32-NEXT: v_mov_b32_e32 v0, v1 ; GFX10-W32-NEXT: v_add_f32_e32 v0, v0, v0 -; GFX10-W32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec +; GFX10-W32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s2 ; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-W32-NEXT: ; return to shader part epilog @@ -962,7 +962,7 @@ define amdgpu_ps float @test_strict_wqm5(i32 inreg %idx0, i32 inreg %idx1) { ; GFX9-W64-NEXT: s_wqm_b64 exec, exec ; GFX9-W64-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-W64-NEXT: v_add_f32_e32 v0, v0, v0 -; GFX9-W64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec +; GFX9-W64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX9-W64-NEXT: s_and_b64 exec, exec, s[2:3] ; GFX9-W64-NEXT: ; return to shader part epilog ; @@ -986,7 +986,7 @@ define amdgpu_ps float @test_strict_wqm5(i32 inreg %idx0, i32 inreg %idx1) { ; GFX10-W32-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10-W32-NEXT: v_mov_b32_e32 v0, v1 ; GFX10-W32-NEXT: v_add_f32_e32 v0, v0, v0 -; GFX10-W32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec +; GFX10-W32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s2 ; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-W32-NEXT: ; return to shader part epilog @@ -1176,7 +1176,7 @@ define amdgpu_ps void @test_set_inactive2(i32 inreg %idx0, i32 inreg %idx1) { ; GFX9-W64-NEXT: s_nop 0 ; GFX9-W64-NEXT: buffer_load_dword v2, v2, s[0:3], 0 idxen ; GFX9-W64-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $scc killed $exec -; GFX9-W64-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $exec killed $exec +; GFX9-W64-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $exec ; GFX9-W64-NEXT: s_and_b64 exec, exec, s[2:3] ; GFX9-W64-NEXT: s_waitcnt vmcnt(0) ; GFX9-W64-NEXT: v_add_u32_e32 v1, v2, v1 @@ -1193,7 +1193,7 @@ define amdgpu_ps void @test_set_inactive2(i32 inreg %idx0, i32 inreg %idx1) { ; GFX10-W32-NEXT: buffer_load_dword v2, v0, s[0:3], 0 idxen ; GFX10-W32-NEXT: buffer_load_dword v1, v1, s[0:3], 0 idxen ; GFX10-W32-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $scc killed $exec -; GFX10-W32-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $exec killed $exec +; GFX10-W32-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $exec ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s2 ; GFX10-W32-NEXT: s_waitcnt vmcnt(0) ; GFX10-W32-NEXT: v_add_nc_u32_e32 v1, v1, v2 @@ -2500,7 +2500,7 @@ define amdgpu_ps float @test_strict_wwm5(i32 inreg %idx0, i32 inreg %idx1) { ; GFX9-W64-NEXT: s_wqm_b64 exec, exec ; GFX9-W64-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-W64-NEXT: v_add_f32_e32 v0, v0, v0 -; GFX9-W64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec +; GFX9-W64-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX9-W64-NEXT: s_and_b64 exec, exec, s[2:3] ; GFX9-W64-NEXT: ; return to shader part epilog ; @@ -2522,7 +2522,7 @@ define amdgpu_ps float @test_strict_wwm5(i32 inreg %idx0, i32 inreg %idx1) { ; GFX10-W32-NEXT: s_wqm_b32 exec_lo, exec_lo ; GFX10-W32-NEXT: v_mov_b32_e32 v0, v1 ; GFX10-W32-NEXT: v_add_f32_e32 v0, v0, v0 -; GFX10-W32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec killed $exec +; GFX10-W32-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $exec ; GFX10-W32-NEXT: s_and_b32 exec_lo, exec_lo, s2 ; GFX10-W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-W32-NEXT: ; return to shader part epilog diff --git a/llvm/test/CodeGen/AMDGPU/write_register.ll b/llvm/test/CodeGen/AMDGPU/write_register.ll index eb9b103775a81..d385425c3e291 100644 --- a/llvm/test/CodeGen/AMDGPU/write_register.ll +++ b/llvm/test/CodeGen/AMDGPU/write_register.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -enable-misched=0 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=bonaire -enable-misched=0 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=bonaire -enable-misched=0 -verify-machineinstrs < %s | FileCheck %s declare void @llvm.write_register.i32(metadata, i32) #0 declare void @llvm.write_register.i64(metadata, i64) #0 diff --git a/llvm/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll b/llvm/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll index 24469cc3717e9..f0b7141b5c7a2 100644 --- a/llvm/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll +++ b/llvm/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s -check-prefix=ARM -; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s -check-prefix=THUMB +; RUN: llc < %s -mtriple=armv7-apple-darwin -arm-atomic-cfg-tidy=0 | FileCheck %s -check-prefix=ARM +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-atomic-cfg-tidy=0 | FileCheck %s -check-prefix=THUMB ; rdar://7998649 %struct.foo = type { i64, i64 } diff --git a/llvm/test/CodeGen/ARM/machine-cse-cmp.ll b/llvm/test/CodeGen/ARM/machine-cse-cmp.ll index ab5f58c27e768..14565a7172827 100644 --- a/llvm/test/CodeGen/ARM/machine-cse-cmp.ll +++ b/llvm/test/CodeGen/ARM/machine-cse-cmp.ll @@ -80,3 +80,41 @@ return: %retval.0 = phi i8* [ %add.ptr, %if.end ], [ null, %entry ] ret i8* %retval.0 } + +; The cmp of %val should not be hoisted above the preceding conditional branch +define void @f4(i32** %ptr1, i64* %ptr2, i64 %val) { +entry: +; CHECK-LABEL: f4: +; CHECK: cmp +; CHECK: movne +; CHECK: strne +; CHECK: orrs +; CHECK-NOT: subs +; CHECK-NOT: sbcs +; CHECK: beq + %tobool.not = icmp eq i32** %ptr1, null + br i1 %tobool.not, label %if.end, label %if.then + +if.then: + store i32* null, i32** %ptr1, align 4 + br label %if.end + +if.end: +; CHECK: subs +; CHECK: sbcs +; CHECK: bxlt lr + %tobool1 = icmp ne i64 %val, 0 + %cmp = icmp slt i64 %val, 10 + %or.cond = and i1 %tobool1, %cmp + br i1 %or.cond, label %cleanup, label %if.end3 + +if.end3: +; CHECK: subs +; CHECK: sbc + %sub = add nsw i64 %val, -10 + store i64 %sub, i64* %ptr2, align 8 + br label %cleanup + +cleanup: + ret void +} diff --git a/llvm/test/CodeGen/ARM/vcmpz.ll b/llvm/test/CodeGen/ARM/vcmpz.ll index f800346a6b564..51b5d28d8192e 100644 --- a/llvm/test/CodeGen/ARM/vcmpz.ll +++ b/llvm/test/CodeGen/ARM/vcmpz.ll @@ -174,11 +174,16 @@ define arm_aapcs_vfpcc <4 x i32> @vcmpz_zr_ult(<4 x i32> %0) { ret <4 x i32> %3 } -;define arm_aapcs_vfpcc <4 x i32> @vcmpz_zr_ule(<4 x i32> %0) { -; %2 = icmp ule <4 x i32> %0, zeroinitializer -; %3 = sext <4 x i1> %2 to <4 x i32> -; ret <4 x i32> %3 -;} +define arm_aapcs_vfpcc <4 x i32> @vcmpz_zr_ule(<4 x i32> %0) { +; CHECK-LABEL: vcmpz_zr_ule: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 q8, #0x0 +; CHECK-NEXT: vcge.u32 q0, q8, q0 +; CHECK-NEXT: bx lr + %2 = icmp ule <4 x i32> %0, zeroinitializer + %3 = sext <4 x i1> %2 to <4 x i32> + ret <4 x i32> %3 +} define arm_aapcs_vfpcc <4 x i32> @vcmpz_zr_ugt(<4 x i32> %0) { ; CHECK-LABEL: vcmpz_zr_ugt: @@ -294,8 +299,13 @@ define arm_aapcs_vfpcc <4 x i32> @vcmpz_zl_ugt(<4 x i32> %0) { ret <4 x i32> %3 } -;define arm_aapcs_vfpcc <4 x i32> @vcmpz_zl_uge(<4 x i32> %0) { -; %2 = icmp uge <4 x i32> zeroinitializer, %0 -; %3 = sext <4 x i1> %2 to <4 x i32> -; ret <4 x i32> %3 -;} +define arm_aapcs_vfpcc <4 x i32> @vcmpz_zl_uge(<4 x i32> %0) { +; CHECK-LABEL: vcmpz_zl_uge: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 q8, #0x0 +; CHECK-NEXT: vcge.u32 q0, q8, q0 +; CHECK-NEXT: bx lr + %2 = icmp uge <4 x i32> zeroinitializer, %0 + %3 = sext <4 x i1> %2 to <4 x i32> + ret <4 x i32> %3 +} diff --git a/llvm/test/CodeGen/Hexagon/swp-carried-dep3.mir b/llvm/test/CodeGen/Hexagon/swp-carried-dep3.mir new file mode 100644 index 0000000000000..a1b0aec4cf81c --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/swp-carried-dep3.mir @@ -0,0 +1,60 @@ +# RUN: llc -mtriple=hexagon -run-pass pipeliner -debug-only=pipeliner %s -o /dev/null 2>&1 -pipeliner-experimental-cg=true | FileCheck %s +# REQUIRES: asserts + +# Test that the loop carried dependence check correctly identifies a recurrence +# when load and store use distinct increment for their pointer. To test this, +# we check that we don't have the Rec NodeSet containing SU(5) and SU(7) which +# requires to use a single CHECK-NOT to match such a Rec NodeSet. Fortunately +# the atom '.' does not match a newline but anything else on a line. + +# CHECK-NOT: Rec NodeSet{{.+[[:space:]]}} SU(5){{.+[[:space:]]}} SU(7) + +... +--- +name: test +tracksRegLiveness: true + +body: | + bb.0: + successors: %bb.3, %bb.1 + liveins: $r0, $r1, $r2 + + %14:intregs = COPY $r2 + %13:intregs = COPY $r1 + %12:intregs = COPY $r0 + %16:predregs = C2_cmpeqi %14, 2 + %15:intregs = A2_tfrsi 0 + J2_jumpt killed %16, %bb.3, implicit-def dead $pc + J2_jump %bb.1, implicit-def dead $pc + + bb.1: + successors: %bb.2 + + %0:intregs = A2_addi %14, -2 + %1:intregs = A2_addi %12, 10 + %2:intregs = A2_addi %13, 4 + %17:intregs = A2_tfrsi 0 + %23:intregs = COPY %0 + J2_loop0r %bb.2, %23, implicit-def $lc0, implicit-def $sa0, implicit-def $usr + + bb.2 (machine-block-address-taken): + successors: %bb.3, %bb.2 + + %3:intregs = PHI %2, %bb.1, %10, %bb.2 + %4:intregs = PHI %1, %bb.1, %9, %bb.2 + %21:intregs = PHI %1, %bb.1, %22, %bb.2 + %6:intregs = PHI %17, %bb.1, %7, %bb.2 + %18:intregs, %10:intregs = L2_loadrh_pi %3, 2 :: (load (s16)) + %19:intregs, %22:intregs = L2_loadrh_pi %21, 2 :: (load (s16)) + %20:intregs = A2_addi %18, 10 + %9:intregs = S2_storerh_pi %4, 2, killed %20 :: (store (s16)) + %7:intregs = M2_acci %19, %6, %18 + ENDLOOP0 %bb.2, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0 + J2_jump %bb.3, implicit-def dead $pc + + bb.3: + %11:intregs = PHI %15, %bb.0, %7, %bb.2 + $r0 = COPY %11 + PS_jmpret $r31, implicit-def dead $pc, implicit $r0 + +... diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll index 070027b698436..03a126a736efc 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/double-convert.ll @@ -242,16 +242,9 @@ define double @convert_u32_to_double(i32 %a) nounwind { ; ; LA64-LABEL: convert_u32_to_double: ; LA64: # %bb.0: -; LA64-NEXT: pcalau12i $a1, %pc_hi20(.LCPI12_0) -; LA64-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI12_0) -; LA64-NEXT: fld.d $fa0, $a1, 0 -; LA64-NEXT: lu52i.d $a1, $zero, 1107 -; LA64-NEXT: movgr2fr.d $fa1, $a1 -; LA64-NEXT: fsub.d $fa0, $fa1, $fa0 -; LA64-NEXT: lu12i.w $a1, 275200 -; LA64-NEXT: bstrins.d $a0, $a1, 63, 32 -; LA64-NEXT: movgr2fr.d $fa1, $a0 -; LA64-NEXT: fadd.d $fa0, $fa1, $fa0 +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +; LA64-NEXT: movgr2fr.d $fa0, $a0 +; LA64-NEXT: ffint.d.l $fa0, $fa0 ; LA64-NEXT: ret %1 = uitofp i32 %a to double ret double %1 diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll index 559206a4db5ac..a18cb6f756a60 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/float-convert.ll @@ -376,14 +376,12 @@ define float @convert_i32_to_float(i32 %a) nounwind { ; ; LA64F-LABEL: convert_i32_to_float: ; LA64F: # %bb.0: -; LA64F-NEXT: addi.w $a0, $a0, 0 ; LA64F-NEXT: movgr2fr.w $fa0, $a0 ; LA64F-NEXT: ffint.s.w $fa0, $fa0 ; LA64F-NEXT: ret ; ; LA64D-LABEL: convert_i32_to_float: ; LA64D: # %bb.0: -; LA64D-NEXT: addi.w $a0, $a0, 0 ; LA64D-NEXT: movgr2fr.w $fa0, $a0 ; LA64D-NEXT: ffint.s.w $fa0, $fa0 ; LA64D-NEXT: ret @@ -412,14 +410,17 @@ define float @convert_i64_to_float(i64 %a) nounwind { ; ; LA64F-LABEL: convert_i64_to_float: ; LA64F: # %bb.0: -; LA64F-NEXT: movgr2fr.w $fa0, $a0 -; LA64F-NEXT: ffint.s.w $fa0, $fa0 +; LA64F-NEXT: addi.d $sp, $sp, -16 +; LA64F-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64F-NEXT: bl %plt(__floatdisf) +; LA64F-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 16 ; LA64F-NEXT: ret ; ; LA64D-LABEL: convert_i64_to_float: ; LA64D: # %bb.0: -; LA64D-NEXT: movgr2fr.w $fa0, $a0 -; LA64D-NEXT: ffint.s.w $fa0, $fa0 +; LA64D-NEXT: movgr2fr.d $fa0, $a0 +; LA64D-NEXT: ffint.s.l $fa0, $fa0 ; LA64D-NEXT: ret %1 = sitofp i64 %a to float ret float %1 @@ -514,34 +515,19 @@ define float @convert_u32_to_float(i32 %a) nounwind { ; ; LA64F-LABEL: convert_u32_to_float: ; LA64F: # %bb.0: -; LA64F-NEXT: bstrpick.d $a1, $a0, 31, 1 -; LA64F-NEXT: andi $a2, $a0, 1 -; LA64F-NEXT: or $a1, $a2, $a1 -; LA64F-NEXT: movgr2fr.w $fa0, $a1 -; LA64F-NEXT: ffint.s.w $fa0, $fa0 -; LA64F-NEXT: fadd.s $fa0, $fa0, $fa0 +; LA64F-NEXT: addi.d $sp, $sp, -16 +; LA64F-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill ; LA64F-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64F-NEXT: slti $a1, $a0, 0 -; LA64F-NEXT: movgr2cf $fcc0, $a1 -; LA64F-NEXT: movgr2fr.w $fa1, $a0 -; LA64F-NEXT: ffint.s.w $fa1, $fa1 -; LA64F-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +; LA64F-NEXT: bl %plt(__floatundisf) +; LA64F-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 16 ; LA64F-NEXT: ret ; ; LA64D-LABEL: convert_u32_to_float: ; LA64D: # %bb.0: -; LA64D-NEXT: bstrpick.d $a1, $a0, 31, 1 -; LA64D-NEXT: andi $a2, $a0, 1 -; LA64D-NEXT: or $a1, $a2, $a1 -; LA64D-NEXT: movgr2fr.w $fa0, $a1 -; LA64D-NEXT: ffint.s.w $fa0, $fa0 -; LA64D-NEXT: fadd.s $fa0, $fa0, $fa0 ; LA64D-NEXT: bstrpick.d $a0, $a0, 31, 0 -; LA64D-NEXT: slti $a1, $a0, 0 -; LA64D-NEXT: movgr2cf $fcc0, $a1 -; LA64D-NEXT: movgr2fr.w $fa1, $a0 -; LA64D-NEXT: ffint.s.w $fa1, $fa1 -; LA64D-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +; LA64D-NEXT: movgr2fr.d $fa0, $a0 +; LA64D-NEXT: ffint.s.l $fa0, $fa0 ; LA64D-NEXT: ret %1 = uitofp i32 %a to float ret float %1 @@ -568,17 +554,11 @@ define float @convert_u64_to_float(i64 %a) nounwind { ; ; LA64F-LABEL: convert_u64_to_float: ; LA64F: # %bb.0: -; LA64F-NEXT: srli.d $a1, $a0, 1 -; LA64F-NEXT: andi $a2, $a0, 1 -; LA64F-NEXT: or $a1, $a2, $a1 -; LA64F-NEXT: movgr2fr.w $fa0, $a1 -; LA64F-NEXT: ffint.s.w $fa0, $fa0 -; LA64F-NEXT: fadd.s $fa0, $fa0, $fa0 -; LA64F-NEXT: slti $a1, $a0, 0 -; LA64F-NEXT: movgr2cf $fcc0, $a1 -; LA64F-NEXT: movgr2fr.w $fa1, $a0 -; LA64F-NEXT: ffint.s.w $fa1, $fa1 -; LA64F-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 +; LA64F-NEXT: addi.d $sp, $sp, -16 +; LA64F-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64F-NEXT: bl %plt(__floatundisf) +; LA64F-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 16 ; LA64F-NEXT: ret ; ; LA64D-LABEL: convert_u64_to_float: @@ -586,13 +566,13 @@ define float @convert_u64_to_float(i64 %a) nounwind { ; LA64D-NEXT: srli.d $a1, $a0, 1 ; LA64D-NEXT: andi $a2, $a0, 1 ; LA64D-NEXT: or $a1, $a2, $a1 -; LA64D-NEXT: movgr2fr.w $fa0, $a1 -; LA64D-NEXT: ffint.s.w $fa0, $fa0 +; LA64D-NEXT: movgr2fr.d $fa0, $a1 +; LA64D-NEXT: ffint.s.l $fa0, $fa0 ; LA64D-NEXT: fadd.s $fa0, $fa0, $fa0 ; LA64D-NEXT: slti $a1, $a0, 0 ; LA64D-NEXT: movgr2cf $fcc0, $a1 -; LA64D-NEXT: movgr2fr.w $fa1, $a0 -; LA64D-NEXT: ffint.s.w $fa1, $fa1 +; LA64D-NEXT: movgr2fr.d $fa1, $a0 +; LA64D-NEXT: ffint.s.l $fa1, $fa1 ; LA64D-NEXT: fsel $fa0, $fa1, $fa0, $fcc0 ; LA64D-NEXT: ret %1 = uitofp i64 %a to float diff --git a/llvm/test/CodeGen/MIR/AArch64/mir-canon-constant-pool-hash.mir b/llvm/test/CodeGen/MIR/AArch64/mir-canon-constant-pool-hash.mir index 78ed554687fa2..46da2af6bdf04 100644 --- a/llvm/test/CodeGen/MIR/AArch64/mir-canon-constant-pool-hash.mir +++ b/llvm/test/CodeGen/MIR/AArch64/mir-canon-constant-pool-hash.mir @@ -14,8 +14,8 @@ constants: body: | bb.0: ; Test that we no longer have hash collisions between two different consts: - ;CHECK: %bb{{[0-9]+}}_{{[0-9]+}}__1:gpr64common = ADR - ;CHECK: %bb{{[0-9]+}}_{{[0-9]+}}__1:gpr64common = ADR + ;CHECK: %bb{{[0-9a-f]+}}_{{[0-9a-f]+}}__1:gpr64common = ADR + ;CHECK: %bb{{[0-9a-f]+}}_{{[0-9a-f]+}}__1:gpr64common = ADR %vreg0:gpr64common = ADRP target-flags(aarch64-page) %const.0 %vreg1:gpr64common = ADRP target-flags(aarch64-page) %const.1 ... diff --git a/llvm/test/CodeGen/MIR/AArch64/mir-canon-jump-table.mir b/llvm/test/CodeGen/MIR/AArch64/mir-canon-jump-table.mir index 6d3124c61db1b..a5ffd6e4ce3f9 100644 --- a/llvm/test/CodeGen/MIR/AArch64/mir-canon-jump-table.mir +++ b/llvm/test/CodeGen/MIR/AArch64/mir-canon-jump-table.mir @@ -21,10 +21,10 @@ body: | bb.2: bb.3: bb.7: - ;CHECK: %bb{{[0-9]+}}_{{[0-9]+}}__1:_(p0) = G_JUMP_TABLE %jump-table.0 - ;CHECK: %bb{{[0-9]+}}_{{[0-9]+}}__1:_(p0) = G_JUMP_TABLE %jump-table.1 - ;CHECK: %bb{{[0-9]+}}_{{[0-9]+}}__1:_(p0) = G_JUMP_TABLE %jump-table.2 - ;CHECK: %bb{{[0-9]+}}_{{[0-9]+}}__1:_(p0) = G_JUMP_TABLE %jump-table.3 + ;CHECK: %bb{{[0-9a-f]+}}_{{[0-9a-f]+}}__1:_(p0) = G_JUMP_TABLE %jump-table.0 + ;CHECK: %bb{{[0-9a-f]+}}_{{[0-9a-f]+}}__1:_(p0) = G_JUMP_TABLE %jump-table.1 + ;CHECK: %bb{{[0-9a-f]+}}_{{[0-9a-f]+}}__1:_(p0) = G_JUMP_TABLE %jump-table.2 + ;CHECK: %bb{{[0-9a-f]+}}_{{[0-9a-f]+}}__1:_(p0) = G_JUMP_TABLE %jump-table.3 %a:_(p0) = G_JUMP_TABLE %jump-table.0 %b:_(p0) = G_JUMP_TABLE %jump-table.1 %c:_(p0) = G_JUMP_TABLE %jump-table.2 diff --git a/llvm/test/CodeGen/MIR/AArch64/mirCanonCopyCopyProp.mir b/llvm/test/CodeGen/MIR/AArch64/mirCanonCopyCopyProp.mir index 21a7dddc98591..daf78187c4849 100644 --- a/llvm/test/CodeGen/MIR/AArch64/mirCanonCopyCopyProp.mir +++ b/llvm/test/CodeGen/MIR/AArch64/mirCanonCopyCopyProp.mir @@ -40,7 +40,7 @@ body: | %42:gpr32 = LDRWui %stack.0, 0 :: (dereferenceable load (s64)) - ;CHECK: %bb0_{{[0-9]+}}__1:gpr32 = LDRWui %stack.0, 0 :: (dereferenceable load (s64)) + ;CHECK: %bb0_{{[0-9a-f]+}}__1:gpr32 = LDRWui %stack.0, 0 :: (dereferenceable load (s64)) ;CHECK-NEXT: $w0 = COPY %bb0_ ;CHECK-NEXT: RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/MIR/AArch64/mirCanonIdempotent.mir b/llvm/test/CodeGen/MIR/AArch64/mirCanonIdempotent.mir index b30ca7c1c7e3c..63e28498ca532 100644 --- a/llvm/test/CodeGen/MIR/AArch64/mirCanonIdempotent.mir +++ b/llvm/test/CodeGen/MIR/AArch64/mirCanonIdempotent.mir @@ -1,12 +1,12 @@ # RUN: llc -mtriple=arm64-apple-ios11.0.0 -o - -verify-machineinstrs -run-pass mir-canonicalizer %s | FileCheck %s # RUN: llc -mtriple=arm64-apple-ios11.0.0 -o - -mir-vreg-namer-use-stable-hash -verify-machineinstrs -run-pass mir-canonicalizer %s | FileCheck %s # These Idempotent instructions are sorted alphabetically (based on after the '=') -# CHECK: %bb0_{{[0-9]+}}__1:gpr64 = MOVi64imm 4617315517961601024 -# CHECK-NEXT: %bb0_{{[0-9]+}}__1:gpr32 = MOVi32imm 408 -# CHECK-NEXT: %bb0_{{[0-9]+}}__2:gpr32 = MOVi32imm 408 -# CHECK-NEXT: %bb0_{{[0-9]+}}__1:gpr64all = IMPLICIT_DEF -# CHECK-NEXT: %bb0_{{[0-9]+}}__1:fpr64 = FMOVDi 20 -# CHECK-NEXT: %bb0_{{[0-9]+}}__1:fpr64 = FMOVDi 112 +# CHECK: %bb0_{{[0-9a-f]+}}__1:gpr64 = MOVi64imm 4617315517961601024 +# CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:gpr32 = MOVi32imm 408 +# CHECK-NEXT: %bb0_{{[0-9a-f]+}}__2:gpr32 = MOVi32imm 408 +# CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:gpr64all = IMPLICIT_DEF +# CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:fpr64 = FMOVDi 20 +# CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:fpr64 = FMOVDi 112 ... --- diff --git a/llvm/test/CodeGen/MIR/AArch64/mirnamer.mir b/llvm/test/CodeGen/MIR/AArch64/mirnamer.mir index cdb2ecca60274..a3b339f07d502 100644 --- a/llvm/test/CodeGen/MIR/AArch64/mirnamer.mir +++ b/llvm/test/CodeGen/MIR/AArch64/mirnamer.mir @@ -8,9 +8,9 @@ body: | ;CHECK-LABEL: bb.0 ;CHECK-NEXT: liveins ;CHECK-NEXT: {{ $}} - ;CHECK-NEXT: %bb0_{{[0-9]+}}__1:_(p0) = COPY $d0 - ;CHECK-NEXT: %bb0_{{[0-9]+}}__1:_(<4 x s32>) = COPY $q0 - ;CHECK-NEXT: G_STORE %bb0_{{[0-9]+}}__1(<4 x s32>), %bb0_{{[0-9]+}}__1(p0) :: (store (<4 x s32>)) + ;CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:_(p0) = COPY $d0 + ;CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:_(<4 x s32>) = COPY $q0 + ;CHECK-NEXT: G_STORE %bb0_{{[0-9a-f]+}}__1(<4 x s32>), %bb0_{{[0-9a-f]+}}__1(p0) :: (store (<4 x s32>)) liveins: $q0, $d0 %1:fpr(p0) = COPY $d0 @@ -28,19 +28,19 @@ body: | bb.0: ;CHECK-LABEL: bb.0 - ;CHECK-NEXT: %bb0_{{[0-9]+}}__1:gpr32 = LDRWui - ;CHECK-NEXT: %bb0_{{[0-9]+}}__1:gpr32 = MOVi32imm 1 - ;CHECK-NEXT: %bb0_{{[0-9]+}}__2:gpr32 = LDRWui - ;CHECK-NEXT: %bb0_{{[0-9]+}}__1:gpr32 = MOVi32imm 2 - ;CHECK-NEXT: %bb0_{{[0-9]+}}__3:gpr32 = LDRWui - ;CHECK-NEXT: %bb0_{{[0-9]+}}__1:gpr32 = MOVi32imm 3 - ;CHECK-NEXT: %bb0_{{[0-9]+}}__1:gpr32 = nsw ADDWrr - ;CHECK-NEXT: %bb0_{{[0-9]+}}__4:gpr32 = LDRWui - ;CHECK-NEXT: %bb0_{{[0-9]+}}__2:gpr32 = nsw ADDWrr - ;CHECK-NEXT: %bb0_{{[0-9]+}}__1:gpr32 = MOVi32imm 4 - ;CHECK-NEXT: %bb0_{{[0-9]+}}__3:gpr32 = nsw ADDWrr - ;CHECK-NEXT: %bb0_{{[0-9]+}}__5:gpr32 = LDRWui - ;CHECK-NEXT: %bb0_{{[0-9]+}}__1:gpr32 = MOVi32imm 5 + ;CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:gpr32 = LDRWui + ;CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:gpr32 = MOVi32imm 1 + ;CHECK-NEXT: %bb0_{{[0-9a-f]+}}__2:gpr32 = LDRWui + ;CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:gpr32 = MOVi32imm 2 + ;CHECK-NEXT: %bb0_{{[0-9a-f]+}}__3:gpr32 = LDRWui + ;CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:gpr32 = MOVi32imm 3 + ;CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:gpr32 = nsw ADDWrr + ;CHECK-NEXT: %bb0_{{[0-9a-f]+}}__4:gpr32 = LDRWui + ;CHECK-NEXT: %bb0_{{[0-9a-f]+}}__2:gpr32 = nsw ADDWrr + ;CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:gpr32 = MOVi32imm 4 + ;CHECK-NEXT: %bb0_{{[0-9a-f]+}}__3:gpr32 = nsw ADDWrr + ;CHECK-NEXT: %bb0_{{[0-9a-f]+}}__5:gpr32 = LDRWui + ;CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:gpr32 = MOVi32imm 5 %0:gpr32 = LDRWui %stack.0, 0 :: (dereferenceable load (s64)) %1:gpr32 = MOVi32imm 1 @@ -78,11 +78,11 @@ body: | ;CHECK-LABEL: bb.0: ;CHECK-NEXT: liveins ;CHECK-NEXT: {{ $}} - ;CHECK-NEXT: %bb0_{{[0-9]+}}__1:gpr32 = LDRWui %stack.0, 0 - ;CHECK-NEXT: %bb0_{{[0-9]+}}__1:gpr32 = COPY %bb0_{{[0-9]+}}__1 - ;CHECK-NEXT: %bb0_{{[0-9]+}}__1:gpr32 = COPY %bb0_{{[0-9]+}}__1 - ;CHECK-NEXT: %bb0_{{[0-9]+}}__2:gpr32 = COPY %bb0_{{[0-9]+}}__1 - ;CHECK-NEXT: $w0 = COPY %bb0_{{[0-9]+}}__2 + ;CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:gpr32 = LDRWui %stack.0, 0 + ;CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:gpr32 = COPY %bb0_{{[0-9a-f]+}}__1 + ;CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:gpr32 = COPY %bb0_{{[0-9a-f]+}}__1 + ;CHECK-NEXT: %bb0_{{[0-9a-f]+}}__2:gpr32 = COPY %bb0_{{[0-9a-f]+}}__1 + ;CHECK-NEXT: $w0 = COPY %bb0_{{[0-9a-f]+}}__2 %0:gpr32 = LDRWui %stack.0, 0 :: (dereferenceable load (s64)) %1:gpr32 = COPY %0 diff --git a/llvm/test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir b/llvm/test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir index 785cd20d31968..fb1728d9021b7 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir @@ -8,18 +8,18 @@ name: foo body: | bb.0: ; CHECK-LABEL: name: foo - ; CHECK: %bb0_{{[0-9]+}}__1:sreg_32_xm0 = S_MOV_B32 61440 - ; CHECK: %bb0_{{[0-9]+}}__1:sreg_32_xm0 = S_MOV_B32 0 - ; CHECK: %bb0_{{[0-9]+}}__1:vgpr_32 = COPY $vgpr0 - ; CHECK: %bb0_{{[0-9]+}}__1:sgpr_64 = COPY $sgpr0_sgpr1 - ; CHECK: %bb0_{{[0-9]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %bb0_{{[0-9]+}}__1, 9, 0 - ; CHECK: %bb0_{{[0-9]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %bb0_{{[0-9]+}}__1, 11, 0 - ; CHECK: %bb0_{{[0-9]+}}__1:vgpr_32 = COPY %bb0_{{[0-9]+}}__1 - ; CHECK: %bb0_{{[0-9]+}}__1:vgpr_32 = COPY %bb0_{{[0-9]+}}__1 - ; CHECK: %bb0_{{[0-9]+}}__2:vgpr_32 = COPY %bb0_{{[0-9]+}}__1 - ; CHECK: %bb0_{{[0-9]+}}__1:vreg_64 = REG_SEQUENCE %bb0_{{[0-9]+}}__1, %subreg.sub0, %bb0_{{[0-9]+}}__1, %subreg.sub1 - ; CHECK: %bb0_{{[0-9]+}}__1:sgpr_128 = REG_SEQUENCE %bb0_{{[0-9]+}}__1, %subreg.sub0, %bb0_{{[0-9]+}}__1, %subreg.sub1, %bb0_{{[0-9]+}}__1, %subreg.sub2, %bb0_{{[0-9]+}}__2, %subreg.sub3 - ; CHECK: BUFFER_STORE_DWORD_ADDR64 %bb0_{{[0-9]+}}__1, %bb0_{{[0-9]+}}__1, %bb0_{{[0-9]+}}__1, 0, 0, 0, 0, 0, implicit $exec + ; CHECK: %bb0_{{[0-9a-f]+}}__1:sreg_32_xm0 = S_MOV_B32 61440 + ; CHECK: %bb0_{{[0-9a-f]+}}__1:sreg_32_xm0 = S_MOV_B32 0 + ; CHECK: %bb0_{{[0-9a-f]+}}__1:vgpr_32 = COPY $vgpr0 + ; CHECK: %bb0_{{[0-9a-f]+}}__1:sgpr_64 = COPY $sgpr0_sgpr1 + ; CHECK: %bb0_{{[0-9a-f]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %bb0_{{[0-9a-f]+}}__1, 9, 0 + ; CHECK: %bb0_{{[0-9a-f]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %bb0_{{[0-9a-f]+}}__1, 11, 0 + ; CHECK: %bb0_{{[0-9a-f]+}}__1:vgpr_32 = COPY %bb0_{{[0-9a-f]+}}__1 + ; CHECK: %bb0_{{[0-9a-f]+}}__1:vgpr_32 = COPY %bb0_{{[0-9a-f]+}}__1 + ; CHECK: %bb0_{{[0-9a-f]+}}__2:vgpr_32 = COPY %bb0_{{[0-9a-f]+}}__1 + ; CHECK: %bb0_{{[0-9a-f]+}}__1:vreg_64 = REG_SEQUENCE %bb0_{{[0-9a-f]+}}__1, %subreg.sub0, %bb0_{{[0-9a-f]+}}__1, %subreg.sub1 + ; CHECK: %bb0_{{[0-9a-f]+}}__1:sgpr_128 = REG_SEQUENCE %bb0_{{[0-9a-f]+}}__1, %subreg.sub0, %bb0_{{[0-9a-f]+}}__1, %subreg.sub1, %bb0_{{[0-9a-f]+}}__1, %subreg.sub2, %bb0_{{[0-9a-f]+}}__2, %subreg.sub3 + ; CHECK: BUFFER_STORE_DWORD_ADDR64 %bb0_{{[0-9a-f]+}}__1, %bb0_{{[0-9a-f]+}}__1, %bb0_{{[0-9a-f]+}}__1, 0, 0, 0, 0, 0, implicit $exec ; CHECK: S_ENDPGM 0 %10:sreg_32_xm0 = S_MOV_B32 61440 %11:sreg_32_xm0 = S_MOV_B32 0 diff --git a/llvm/test/CodeGen/MIR/AMDGPU/mircanon-memoperands.mir b/llvm/test/CodeGen/MIR/AMDGPU/mircanon-memoperands.mir index e5d80e9c59fcd..99a905a1a7306 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/mircanon-memoperands.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/mircanon-memoperands.mir @@ -25,12 +25,12 @@ body: | liveins: $sgpr4_sgpr5 ; CHECK: COPY - ; CHECK-NEXT: %bb0_{{[0-9]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM - ; CHECK-NEXT: %bb0_{{[0-9]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM - ; CHECK-NEXT: %bb0_{{[0-9]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM - ; CHECK-NEXT: %bb0_{{[0-9]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM - ; CHECK-NEXT: %bb0_{{[0-9]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM - ; CHECK-NEXT: %bb0_{{[0-9]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM + ; CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM + ; CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM + ; CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM + ; CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM + ; CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM + ; CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0 = COPY $sgpr4_sgpr5 %1 = S_LOAD_DWORDX2_IMM %0, 0, 0 :: (non-temporal dereferenceable invariant load (s64) from `i64 addrspace(4)* undef`) diff --git a/llvm/test/CodeGen/MIR/X86/mir-canon-hash-bb.mir b/llvm/test/CodeGen/MIR/X86/mir-canon-hash-bb.mir index ebd29f917ffb6..32dc9e8752d8a 100644 --- a/llvm/test/CodeGen/MIR/X86/mir-canon-hash-bb.mir +++ b/llvm/test/CodeGen/MIR/X86/mir-canon-hash-bb.mir @@ -40,7 +40,7 @@ body: | G_BR %bb.2 ; CHECK: bb.1: - ; CHECK: %bb2_{{[0-9]+}}__1:_(s32) = G_CONSTANT + ; CHECK: %bb2_{{[0-9a-f]+}}__1:_(s32) = G_CONSTANT bb.1: %tmp4:_(s32) = G_CONSTANT i32 1 G_STORE %tmp4(s32), %tmp6(p0) :: (store (s32) into %ir.tmp1) @@ -48,13 +48,13 @@ body: | ; CHECK: bb.2: - ; CHECK: %bb1_{{[0-9]+}}__1:_(s32) = G_CONSTANT + ; CHECK: %bb1_{{[0-9a-f]+}}__1:_(s32) = G_CONSTANT bb.2: %tmp3:_(s32) = G_CONSTANT i32 2 G_STORE %tmp3(s32), %tmp6(p0) :: (store (s32) into %ir.tmp1) ; CHECK: bb.3: - ; CHECK: %bb3_{{[0-9]+}}__1:_(s32) = G_LOAD + ; CHECK: %bb3_{{[0-9a-f]+}}__1:_(s32) = G_LOAD bb.3: %tmp9:_(s32) = G_LOAD %tmp6(p0) :: (load (s32) from %ir.tmp1) $eax = COPY %tmp9(s32) diff --git a/llvm/test/CodeGen/MIR/X86/mircanon-flags.mir b/llvm/test/CodeGen/MIR/X86/mircanon-flags.mir index bc5991ea41b5f..6b7b577f8ca54 100644 --- a/llvm/test/CodeGen/MIR/X86/mircanon-flags.mir +++ b/llvm/test/CodeGen/MIR/X86/mircanon-flags.mir @@ -12,15 +12,15 @@ body: | bb.0: ; CHECK: COPY - ; CHECK-NEXT: %bb0_{{[0-9]+}}__1:fr32 = nnan VMULSSrr - ; CHECK-NEXT: %bb0_{{[0-9]+}}__1:fr32 = ninf VMULSSrr - ; CHECK-NEXT: %bb0_{{[0-9]+}}__1:fr32 = nsz VMULSSrr - ; CHECK-NEXT: %bb0_{{[0-9]+}}__1:fr32 = arcp VMULSSrr - ; CHECK-NEXT: %bb0_{{[0-9]+}}__1:fr32 = contract VMULSSrr - ; CHECK-NEXT: %bb0_{{[0-9]+}}__1:fr32 = afn VMULSSrr - ; CHECK-NEXT: %bb0_{{[0-9]+}}__1:fr32 = reassoc VMULSSrr - ; CHECK-NEXT: %bb0_{{[0-9]+}}__1:fr32 = nsz arcp contract afn reassoc VMULSSrr - ; CHECK-NEXT: %bb0_{{[0-9]+}}__1:fr32 = contract afn reassoc VMULSSrr + ; CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:fr32 = nnan VMULSSrr + ; CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:fr32 = ninf VMULSSrr + ; CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:fr32 = nsz VMULSSrr + ; CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:fr32 = arcp VMULSSrr + ; CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:fr32 = contract VMULSSrr + ; CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:fr32 = afn VMULSSrr + ; CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:fr32 = reassoc VMULSSrr + ; CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:fr32 = nsz arcp contract afn reassoc VMULSSrr + ; CHECK-NEXT: %bb0_{{[0-9a-f]+}}__1:fr32 = contract afn reassoc VMULSSrr %0:fr32 = COPY $xmm0 %1:fr32 = nnan VMULSSrr %0, %0, implicit $mxcsr diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll index af026593c1ee3..190d57a07b3cf 100644 --- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-double.ll @@ -699,5 +699,5 @@ entry: ; LARGE64-NEXT: .tc GInit[TE],GInit[RW] -attributes #0 = { nofree norecurse nounwind willreturn writeonly "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-rop-protection,-spe,-vsx" } -attributes #1 = { norecurse nounwind readonly willreturn "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-rop-protection,-spe,-vsx" } +attributes #0 = { nofree norecurse nounwind willreturn writeonly "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-spe,-vsx" } +attributes #1 = { norecurse nounwind readonly willreturn "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-spe,-vsx" } diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll index 37cba2a90c4ad..34ebe758343e5 100644 --- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-int.ll @@ -714,5 +714,5 @@ entry: ; LARGE64-LABEL: L..C8: ; LARGE64-NEXT: .tc GInit[TE],GInit[RW] -attributes #0 = { nofree norecurse nounwind willreturn writeonly "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-rop-protection,-spe,-vsx" } -attributes #1 = { norecurse nounwind readonly willreturn "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-rop-protection,-spe,-vsx" } +attributes #0 = { nofree norecurse nounwind willreturn writeonly "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-spe,-vsx" } +attributes #1 = { norecurse nounwind readonly willreturn "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-spe,-vsx" } diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll b/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll index 098f3ae4afb21..34e83221c4452 100644 --- a/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-gd-longlong.ll @@ -754,5 +754,5 @@ entry: ; LARGE64-LABEL: L..C8: ; LARGE64-NEXT: .tc GInit[TE],GInit[RW] -attributes #0 = { nofree norecurse nounwind willreturn writeonly "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-rop-protection,-spe,-vsx" } -attributes #1 = { norecurse nounwind readonly willreturn "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-rop-protection,-spe,-vsx" } +attributes #0 = { nofree norecurse nounwind willreturn writeonly "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-spe,-vsx" } +attributes #1 = { norecurse nounwind readonly willreturn "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-spe,-vsx" } diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll index dbdf84fa68ef9..423719ec04c8f 100644 --- a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc-large.ll @@ -610,5 +610,5 @@ entry: ; DIS-NEXT: 8: 3f f0 00 00 ; DIS-NEXT: c: 00 00 00 00 -attributes #0 = { nofree norecurse nounwind willreturn writeonly "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-rop-protection,-spe,-vsx" } -attributes #1 = { norecurse nounwind readonly willreturn "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-rop-protection,-spe,-vsx" } +attributes #0 = { nofree norecurse nounwind willreturn writeonly "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-spe,-vsx" } +attributes #1 = { norecurse nounwind readonly willreturn "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-spe,-vsx" } diff --git a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll index 108f9758d055f..2550904e65fec 100644 --- a/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll +++ b/llvm/test/CodeGen/PowerPC/aix-tls-xcoff-reloc.ll @@ -640,5 +640,5 @@ entry: ; DIS: 00000004 (idx: 37) TIUninit[UL]: ; DIS-NEXT: ... -attributes #0 = { nofree norecurse nounwind willreturn writeonly "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-rop-protection,-spe,-vsx" } -attributes #1 = { norecurse nounwind readonly willreturn "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-rop-protection,-spe,-vsx" } +attributes #0 = { nofree norecurse nounwind willreturn writeonly "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-spe,-vsx" } +attributes #1 = { norecurse nounwind readonly willreturn "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr4" "target-features"="-altivec,-bpermd,-crypto,-direct-move,-extdiv,-float128,-htm,-mma,-paired-vector-memops,-power10-vector,-power8-vector,-power9-vector,-spe,-vsx" } diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll b/llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll index de0441f279971..3c9f39ff76682 100644 --- a/llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll +++ b/llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll @@ -1,11 +1,91 @@ -; RUN: llc < %s -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown | FileCheck %s -; RUN: llc < %s -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown | FileCheck %s -check-prefix=PWR9 +; RUN: llc < %s -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown | FileCheck %s -check-prefix=PWR8 @a = internal global fp128 0xL00000000000000000000000000000000, align 16 @x = internal global [4 x fp128] zeroinitializer, align 16 @y = internal global [4 x fp128] zeroinitializer, align 16 -define void @fmul_ctrloop_fp128() { +define void @fmul_ctrloop_fp128() nounwind { +; PWR9-LABEL: fmul_ctrloop_fp128: +; PWR9: # %bb.0: # %entry +; PWR9-NEXT: addis 5, 2, a@toc@ha +; PWR9-NEXT: addis 3, 2, y@toc@ha +; PWR9-NEXT: addis 4, 2, x@toc@ha +; PWR9-NEXT: addi 5, 5, a@toc@l +; PWR9-NEXT: addi 3, 3, y@toc@l +; PWR9-NEXT: addi 4, 4, x@toc@l +; PWR9-NEXT: lxv 34, 0(5) +; PWR9-NEXT: addi 3, 3, -16 +; PWR9-NEXT: addi 4, 4, -16 +; PWR9-NEXT: li 5, 0 +; PWR9-NEXT: .p2align 5 +; PWR9-NEXT: .LBB0_1: # %for.body +; PWR9-NEXT: # +; PWR9-NEXT: lxv 35, 16(4) +; PWR9-NEXT: addi 5, 5, 16 +; PWR9-NEXT: addi 4, 4, 16 +; PWR9-NEXT: cmpldi 5, 64 +; PWR9-NEXT: xsmulqp 3, 2, 3 +; PWR9-NEXT: stxv 35, 16(3) +; PWR9-NEXT: addi 3, 3, 16 +; PWR9-NEXT: bne 0, .LBB0_1 +; PWR9-NEXT: # %bb.2: # %for.end +; PWR9-NEXT: blr +; +; PWR8-LABEL: fmul_ctrloop_fp128: +; PWR8: # %bb.0: # %entry +; PWR8-NEXT: mflr 0 +; PWR8-NEXT: std 0, 16(1) +; PWR8-NEXT: stdu 1, -112(1) +; PWR8-NEXT: li 3, 48 +; PWR8-NEXT: addis 4, 2, x@toc@ha +; PWR8-NEXT: std 28, 80(1) # 8-byte Folded Spill +; PWR8-NEXT: std 29, 88(1) # 8-byte Folded Spill +; PWR8-NEXT: std 30, 96(1) # 8-byte Folded Spill +; PWR8-NEXT: li 30, 0 +; PWR8-NEXT: li 29, 16 +; PWR8-NEXT: addi 4, 4, x@toc@l +; PWR8-NEXT: std 26, 64(1) # 8-byte Folded Spill +; PWR8-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill +; PWR8-NEXT: addis 3, 2, a@toc@ha +; PWR8-NEXT: std 27, 72(1) # 8-byte Folded Spill +; PWR8-NEXT: addi 3, 3, a@toc@l +; PWR8-NEXT: lxvd2x 0, 0, 3 +; PWR8-NEXT: addis 3, 2, y@toc@ha +; PWR8-NEXT: addi 3, 3, y@toc@l +; PWR8-NEXT: addi 28, 3, -16 +; PWR8-NEXT: addi 3, 4, -16 +; PWR8-NEXT: xxswapd 63, 0 +; PWR8-NEXT: .p2align 4 +; PWR8-NEXT: .LBB0_1: # %for.body +; PWR8-NEXT: # +; PWR8-NEXT: lxvd2x 0, 3, 29 +; PWR8-NEXT: vmr 2, 31 +; PWR8-NEXT: addi 27, 28, 16 +; PWR8-NEXT: addi 26, 3, 16 +; PWR8-NEXT: xxswapd 35, 0 +; PWR8-NEXT: bl __mulkf3 +; PWR8-NEXT: nop +; PWR8-NEXT: xxswapd 0, 34 +; PWR8-NEXT: addi 30, 30, 16 +; PWR8-NEXT: mr 3, 26 +; PWR8-NEXT: cmpldi 30, 64 +; PWR8-NEXT: stxvd2x 0, 28, 29 +; PWR8-NEXT: mr 28, 27 +; PWR8-NEXT: bne 0, .LBB0_1 +; PWR8-NEXT: # %bb.2: # %for.end +; PWR8-NEXT: li 3, 48 +; PWR8-NEXT: ld 30, 96(1) # 8-byte Folded Reload +; PWR8-NEXT: ld 29, 88(1) # 8-byte Folded Reload +; PWR8-NEXT: ld 28, 80(1) # 8-byte Folded Reload +; PWR8-NEXT: ld 27, 72(1) # 8-byte Folded Reload +; PWR8-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload +; PWR8-NEXT: ld 26, 64(1) # 8-byte Folded Reload +; PWR8-NEXT: addi 1, 1, 112 +; PWR8-NEXT: ld 0, 16(1) +; PWR8-NEXT: mtlr 0 +; PWR8-NEXT: blr entry: %0 = load fp128, ptr @a, align 16 br label %for.body @@ -23,12 +103,63 @@ for.body: ; preds = %for.body, %entry for.end: ; preds = %for.body ret void - -; CHECK-LABEL: fmul_ctrloop_fp128 -; CHECK-NOT: mtctr } -define void @fpext_ctrloop_fp128(ptr %a) { +define void @fpext_ctrloop_fp128(ptr %a) nounwind { +; PWR9-LABEL: fpext_ctrloop_fp128: +; PWR9: # %bb.0: # %entry +; PWR9-NEXT: addis 4, 2, y@toc@ha +; PWR9-NEXT: addi 3, 3, -8 +; PWR9-NEXT: addi 4, 4, y@toc@l +; PWR9-NEXT: addi 5, 4, -16 +; PWR9-NEXT: li 4, 0 +; PWR9-NEXT: .p2align 5 +; PWR9-NEXT: .LBB1_1: # %for.body +; PWR9-NEXT: # +; PWR9-NEXT: lfdu 0, 8(3) +; PWR9-NEXT: addi 4, 4, 8 +; PWR9-NEXT: cmpldi 4, 32 +; PWR9-NEXT: xscpsgndp 34, 0, 0 +; PWR9-NEXT: xscvdpqp 2, 2 +; PWR9-NEXT: stxv 34, 16(5) +; PWR9-NEXT: addi 5, 5, 16 +; PWR9-NEXT: bne 0, .LBB1_1 +; PWR9-NEXT: # %bb.2: # %for.end +; PWR9-NEXT: blr +; +; PWR8-LABEL: fpext_ctrloop_fp128: +; PWR8: # %bb.0: # %entry +; PWR8-NEXT: mflr 0 +; PWR8-NEXT: std 28, -32(1) # 8-byte Folded Spill +; PWR8-NEXT: std 29, -24(1) # 8-byte Folded Spill +; PWR8-NEXT: std 30, -16(1) # 8-byte Folded Spill +; PWR8-NEXT: std 0, 16(1) +; PWR8-NEXT: stdu 1, -64(1) +; PWR8-NEXT: addis 4, 2, y@toc@ha +; PWR8-NEXT: addi 30, 3, -8 +; PWR8-NEXT: li 28, 0 +; PWR8-NEXT: addi 4, 4, y@toc@l +; PWR8-NEXT: addi 29, 4, -16 +; PWR8-NEXT: .p2align 4 +; PWR8-NEXT: .LBB1_1: # %for.body +; PWR8-NEXT: # +; PWR8-NEXT: lfdu 1, 8(30) +; PWR8-NEXT: addi 29, 29, 16 +; PWR8-NEXT: bl __extenddfkf2 +; PWR8-NEXT: nop +; PWR8-NEXT: xxswapd 0, 34 +; PWR8-NEXT: addi 28, 28, 8 +; PWR8-NEXT: cmpldi 28, 32 +; PWR8-NEXT: stxvd2x 0, 0, 29 +; PWR8-NEXT: bne 0, .LBB1_1 +; PWR8-NEXT: # %bb.2: # %for.end +; PWR8-NEXT: addi 1, 1, 64 +; PWR8-NEXT: ld 0, 16(1) +; PWR8-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; PWR8-NEXT: ld 29, -24(1) # 8-byte Folded Reload +; PWR8-NEXT: ld 28, -32(1) # 8-byte Folded Reload +; PWR8-NEXT: mtlr 0 +; PWR8-NEXT: blr entry: br label %for.body @@ -45,12 +176,63 @@ for.body: for.end: ret void - -; CHECK-LABEL: fpext_ctrloop_fp128 -; CHECK-NOT: mtctr } -define void @fptrunc_ctrloop_fp128(ptr %a) { +define void @fptrunc_ctrloop_fp128(ptr %a) nounwind { +; PWR9-LABEL: fptrunc_ctrloop_fp128: +; PWR9: # %bb.0: # %entry +; PWR9-NEXT: addis 4, 2, x@toc@ha +; PWR9-NEXT: addi 3, 3, -8 +; PWR9-NEXT: li 5, 0 +; PWR9-NEXT: addi 4, 4, x@toc@l +; PWR9-NEXT: addi 4, 4, -16 +; PWR9-NEXT: .p2align 5 +; PWR9-NEXT: .LBB2_1: # %for.body +; PWR9-NEXT: # +; PWR9-NEXT: lxv 34, 16(4) +; PWR9-NEXT: addi 5, 5, 8 +; PWR9-NEXT: addi 4, 4, 16 +; PWR9-NEXT: cmpldi 5, 32 +; PWR9-NEXT: xscvqpdp 2, 2 +; PWR9-NEXT: xscpsgndp 0, 34, 34 +; PWR9-NEXT: stfdu 0, 8(3) +; PWR9-NEXT: bne 0, .LBB2_1 +; PWR9-NEXT: # %bb.2: # %for.end +; PWR9-NEXT: blr +; +; PWR8-LABEL: fptrunc_ctrloop_fp128: +; PWR8: # %bb.0: # %entry +; PWR8-NEXT: mflr 0 +; PWR8-NEXT: std 28, -32(1) # 8-byte Folded Spill +; PWR8-NEXT: std 29, -24(1) # 8-byte Folded Spill +; PWR8-NEXT: std 30, -16(1) # 8-byte Folded Spill +; PWR8-NEXT: std 0, 16(1) +; PWR8-NEXT: stdu 1, -64(1) +; PWR8-NEXT: addis 4, 2, x@toc@ha +; PWR8-NEXT: addi 30, 3, -8 +; PWR8-NEXT: li 28, 0 +; PWR8-NEXT: addi 4, 4, x@toc@l +; PWR8-NEXT: addi 29, 4, -16 +; PWR8-NEXT: .p2align 4 +; PWR8-NEXT: .LBB2_1: # %for.body +; PWR8-NEXT: # +; PWR8-NEXT: addi 29, 29, 16 +; PWR8-NEXT: lxvd2x 0, 0, 29 +; PWR8-NEXT: xxswapd 34, 0 +; PWR8-NEXT: bl __trunckfdf2 +; PWR8-NEXT: nop +; PWR8-NEXT: addi 28, 28, 8 +; PWR8-NEXT: stfdu 1, 8(30) +; PWR8-NEXT: cmpldi 28, 32 +; PWR8-NEXT: bne 0, .LBB2_1 +; PWR8-NEXT: # %bb.2: # %for.end +; PWR8-NEXT: addi 1, 1, 64 +; PWR8-NEXT: ld 0, 16(1) +; PWR8-NEXT: ld 30, -16(1) # 8-byte Folded Reload +; PWR8-NEXT: ld 29, -24(1) # 8-byte Folded Reload +; PWR8-NEXT: ld 28, -32(1) # 8-byte Folded Reload +; PWR8-NEXT: mtlr 0 +; PWR8-NEXT: blr entry: br label %for.body @@ -67,9 +249,6 @@ for.body: for.end: ret void - -; CHECK-LABEL: fptrunc_ctrloop_fp128 -; CHECK-NOT: mtctr } declare void @obfuscate(ptr, ...) local_unnamed_addr #2 diff --git a/llvm/test/CodeGen/PowerPC/future-check-features.ll b/llvm/test/CodeGen/PowerPC/future-check-features.ll index 1dca4dafd5b64..f881119335d09 100644 --- a/llvm/test/CodeGen/PowerPC/future-check-features.ll +++ b/llvm/test/CodeGen/PowerPC/future-check-features.ll @@ -1,9 +1,19 @@ -; RUN: llc -mattr=pcrelative-memops,prefix-instrs,paired-vector-memops,mma,rop-protect,privileged \ +; RUN: llc -mattr=isa-future-instructions,pcrelative-memops,prefix-instrs \ +; RUN: -mattr=paired-vector-memops,mma,rop-protect,privileged \ ; RUN: -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown \ ; RUN: -ppc-asm-full-reg-names %s -o - 2>&1 | FileCheck %s -; RUN: llc -mattr=pcrelative-memops,prefix-instrs,paired-vector-memops,mma,rop-protect,privileged \ +; RUN: llc -mattr=isa-future-instructions,pcrelative-memops,prefix-instrs \ +; RUN: -mattr=paired-vector-memops,mma,rop-protect,privileged \ ; RUN: -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \ ; RUN: -ppc-asm-full-reg-names %s -o - 2>&1 | FileCheck %s +; RUN: llc -mattr=isa-future-instructions,pcrelative-memops,prefix-instrs \ +; RUN: -mattr=paired-vector-memops,mma,rop-protect,privileged \ +; RUN: -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff \ +; RUN: -ppc-asm-full-reg-names %s -o - 2>&1 | FileCheck %s +; RUN: llc -mattr=isa-future-instructions,pcrelative-memops,prefix-instrs \ +; RUN: -mattr=paired-vector-memops,mma,rop-protect,privileged \ +; RUN: -verify-machineinstrs -mtriple=powerpc-ibm-aix-xcoff \ +; RUN: -ppc-asm-full-reg-names %s -o - 2>&1 | FileCheck %s define dso_local signext i32 @f() { entry: diff --git a/llvm/test/CodeGen/PowerPC/livevars-crash2.mir b/llvm/test/CodeGen/PowerPC/livevars-crash2.mir index 2f1022f13a4b1..e397567f4e582 100644 --- a/llvm/test/CodeGen/PowerPC/livevars-crash2.mir +++ b/llvm/test/CodeGen/PowerPC/livevars-crash2.mir @@ -191,7 +191,7 @@ body: | ; CHECK: %1:g8rc_and_g8rc_nox0 = COPY killed %12 ; CHECK: %5:gprc = LBZ 0, %1 :: (load (s8) from %ir.0) ; CHECK: %6:crrc = CMPWI killed %5, 0 - ; CEHCK: %7:crbitrc = COPY killed %6.sub_eq + ; CHECK: %7:crbitrc = COPY killed %6.sub_eq ; CHECK: %2:g8rc = nuw ADDI8 %1, 1 ; CHECK: STD %2, 0, %4 :: (store (s64) into %ir.p) ; CHECK: %8:gprc = LBZ 1, %1 :: (load (s8) from %ir.incdec.ptr) diff --git a/llvm/test/CodeGen/PowerPC/machine-cse-rm-pre.mir b/llvm/test/CodeGen/PowerPC/machine-cse-rm-pre.mir new file mode 100644 index 0000000000000..36484be012362 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/machine-cse-rm-pre.mir @@ -0,0 +1,173 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc %s -o - -mtriple=powerpc-unknown-unknown -run-pass=machine-cse -verify-machineinstrs | FileCheck %s +--- | + define void @can_pre() { + entry: + br label %for.body + + for.body: + br i1 undef, label %if.then, label %if.else + + if.then: + br label %if.end + + if.else: + br label %if.end + + if.end: + br label %for.body + } + + define void @cannot_pre() { + entry: + br label %for.body + + for.body: + br i1 undef, label %if.then, label %if.else + + if.then: + br label %if.end + + if.else: + br label %if.end + + if.end: + br label %for.body + } +... +--- +name: can_pre +registers: + - { id: 0, class: f8rc, preferred-register: '' } + - { id: 1, class: f8rc, preferred-register: '' } + - { id: 2, class: gprc, preferred-register: '' } + - { id: 3, class: gprc, preferred-register: '' } + - { id: 4, class: f8rc, preferred-register: '' } + - { id: 5, class: f8rc, preferred-register: '' } +liveins: + - { reg: '$r1', virtual-reg: '%2' } + - { reg: '$r2', virtual-reg: '%3' } + - { reg: '$f1', virtual-reg: '%4' } + - { reg: '$f2', virtual-reg: '%5' } +body: | + ; CHECK-LABEL: name: can_pre + ; CHECK: bb.0.for.body: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $r1, $r2, $f1, $f2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:f8rc = COPY $f2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:f8rc = COPY $f1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gprc = COPY $r2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gprc = COPY $r1 + ; CHECK-NEXT: $cr0 = CMPLWI [[COPY3]], 0 + ; CHECK-NEXT: %6:f8rc = nofpexcept FDIV [[COPY1]], [[COPY]], implicit $rm + ; CHECK-NEXT: BCC 44, $cr0, %bb.1 + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.if.then: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: B %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.if.else: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.if.end: + ; CHECK-NEXT: BLR implicit $lr, implicit $rm + bb.0.for.body: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $r1, $r2, $f1, $f2 + + %5:f8rc = COPY $f2 + %4:f8rc = COPY $f1 + %3:gprc = COPY $r2 + %2:gprc = COPY $r1 + $cr0 = CMPLWI %2, 0 + BCC 44, $cr0, %bb.1 + B %bb.2 + + bb.1.if.then: + successors: %bb.3(0x80000000) + + %0:f8rc = nofpexcept FDIV %4, %5, implicit $rm + B %bb.3 + + bb.2.if.else: + successors: %bb.3(0x80000000) + + %1:f8rc = nofpexcept FDIV %4, %5, implicit $rm + + bb.3.if.end: + BLR implicit $lr, implicit $rm +... +--- +name: cannot_pre +registers: + - { id: 0, class: f8rc, preferred-register: '' } + - { id: 1, class: f8rc, preferred-register: '' } + - { id: 2, class: gprc, preferred-register: '' } + - { id: 3, class: gprc, preferred-register: '' } + - { id: 4, class: f8rc, preferred-register: '' } + - { id: 5, class: f8rc, preferred-register: '' } + - { id: 6, class: f8rc, preferred-register: '' } +liveins: + - { reg: '$r1', virtual-reg: '%2' } + - { reg: '$r2', virtual-reg: '%3' } + - { reg: '$f1', virtual-reg: '%4' } + - { reg: '$f2', virtual-reg: '%5' } +body: | + ; CHECK-LABEL: name: cannot_pre + ; CHECK: bb.0.for.body: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $r1, $r2, $f1, $f2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:f8rc = COPY $f2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:f8rc = COPY $f1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gprc = COPY $r2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gprc = COPY $r1 + ; CHECK-NEXT: $cr0 = CMPLWI [[COPY3]], 0 + ; CHECK-NEXT: BCC 44, $cr0, %bb.1 + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.if.then: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[SETRND:%[0-9]+]]:f8rc = SETRND [[COPY2]], implicit-def $rm, implicit $rm + ; CHECK-NEXT: %0:f8rc = nofpexcept FDIV [[COPY1]], [[COPY]], implicit $rm + ; CHECK-NEXT: B %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.if.else: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %1:f8rc = nofpexcept FDIV [[COPY1]], [[COPY]], implicit $rm + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.if.end: + ; CHECK-NEXT: BLR implicit $lr, implicit $rm + bb.0.for.body: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $r1, $r2, $f1, $f2 + + %5:f8rc = COPY $f2 + %4:f8rc = COPY $f1 + %3:gprc = COPY $r2 + %2:gprc = COPY $r1 + $cr0 = CMPLWI %2, 0 + BCC 44, $cr0, %bb.1 + B %bb.2 + + bb.1.if.then: + successors: %bb.3(0x80000000) + + %6:f8rc = SETRND %3, implicit-def $rm, implicit $rm + %0:f8rc = nofpexcept FDIV %4, %5, implicit $rm + B %bb.3 + + bb.2.if.else: + successors: %bb.3(0x80000000) + + %1:f8rc = nofpexcept FDIV %4, %5, implicit $rm + + bb.3.if.end: + BLR implicit $lr, implicit $rm +... diff --git a/llvm/test/CodeGen/PowerPC/phi-eliminate.mir b/llvm/test/CodeGen/PowerPC/phi-eliminate.mir index a8eacc201a365..a79f2586850fe 100644 --- a/llvm/test/CodeGen/PowerPC/phi-eliminate.mir +++ b/llvm/test/CodeGen/PowerPC/phi-eliminate.mir @@ -158,7 +158,7 @@ body: | ; CHECK: %21:gprc = SUBF killed %20, killed %8 ; CHECK: %22:crrc = CMPLWI %21, 10 ; CHECK: %23:gprc = ISEL %15, %14, killed %22.sub_lt - ; CEHCK: %24:gprc = ADD4 killed %23, killed %21 + ; CHECK: %24:gprc = ADD4 killed %23, killed %21 ; CHECK: %25:g8rc_and_g8rc_nox0 = STBU killed %24, -1, undef %0:g8rc_and_g8rc_nox0 :: (store (s8) into %ir.7) ; CHECK: %26:gprc = DIVW %19, %9 ; CHECK: %57:gprc = COPY killed %26 diff --git a/llvm/test/CodeGen/PowerPC/ppc64-smallarg.ll b/llvm/test/CodeGen/PowerPC/ppc64-smallarg.ll index af24164496ffd..0a448a934d74a 100644 --- a/llvm/test/CodeGen/PowerPC/ppc64-smallarg.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-smallarg.ll @@ -50,7 +50,7 @@ entry: ret void } ; CHECK: @caller2 -; CHECK: std {{[0-9]+}}, 16(1) +; CHECK: stw {{[0-9]+}}, 156(1) ; CHECK: bl test2 declare float @test2(float, float, float, float, float, float, float, float, float, float, float, float, float, float) diff --git a/llvm/test/CodeGen/PowerPC/ppc64le-smallarg.ll b/llvm/test/CodeGen/PowerPC/ppc64le-smallarg.ll index fa2e09a8e4b6b..39948fec8150b 100644 --- a/llvm/test/CodeGen/PowerPC/ppc64le-smallarg.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64le-smallarg.ll @@ -50,7 +50,7 @@ entry: ret void } ; CHECK: @caller2 -; CHECK: std {{[0-9]+}}, 16({{[0-9]+}}) +; CHECK: stw {{[0-9]+}}, 136({{[0-9]+}}) ; CHECK: bl test2 declare float @test2(float, float, float, float, float, float, float, float, float, float, float, float, float, float) diff --git a/llvm/test/CodeGen/PowerPC/vsx-args.ll b/llvm/test/CodeGen/PowerPC/vsx-args.ll index 8cd2dbfde2795..e8137fa6bebb5 100644 --- a/llvm/test/CodeGen/PowerPC/vsx-args.ll +++ b/llvm/test/CodeGen/PowerPC/vsx-args.ll @@ -28,7 +28,7 @@ entry: ; CHECK-FISL: vmr 4, 3 ; CHECK-FISL: lxvd2x 35, 1, 3 ; CHECK-FISL: 3, 144 -; CHCEK-FISL: stxvd2x 36, 1, 3 +; CHECK-FISL: stxvd2x 36, 1, 3 ; CHECK-FISL: vmr 4, 2 ; CHECK-FISL: bl sv diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll index e9161ae8d69d0..6fa4e37782b37 100644 --- a/llvm/test/CodeGen/RISCV/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64zba.ll @@ -1408,6 +1408,33 @@ define signext i16 @srliw_1_sh1add(i16* %0, i32 signext %1) { ret i16 %6 } +define i128 @slliuw_ptrdiff(i64 %diff, i128* %baseptr) { +; RV64I-LABEL: slliuw_ptrdiff: +; RV64I: # %bb.0: +; RV64I-NEXT: li a2, 1 +; RV64I-NEXT: slli a2, a2, 36 +; RV64I-NEXT: addi a2, a2, -16 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: add a1, a1, a0 +; RV64I-NEXT: ld a0, 0(a1) +; RV64I-NEXT: ld a1, 8(a1) +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: slliuw_ptrdiff: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: srli a0, a0, 4 +; RV64ZBA-NEXT: slli.uw a0, a0, 4 +; RV64ZBA-NEXT: add a1, a1, a0 +; RV64ZBA-NEXT: ld a0, 0(a1) +; RV64ZBA-NEXT: ld a1, 8(a1) +; RV64ZBA-NEXT: ret + %ptrdiff = lshr exact i64 %diff, 4 + %cast = and i64 %ptrdiff, 4294967295 + %ptr = getelementptr inbounds i128, i128* %baseptr, i64 %cast + %res = load i128, i128* %ptr + ret i128 %res +} + define signext i32 @srliw_2_sh2add(i32* %0, i32 signext %1) { ; RV64I-LABEL: srliw_2_sh2add: ; RV64I: # %bb.0: diff --git a/llvm/test/CodeGen/SPARC/64bit.ll b/llvm/test/CodeGen/SPARC/64bit.ll index c61476eb2265d..c079d901a03d1 100644 --- a/llvm/test/CodeGen/SPARC/64bit.ll +++ b/llvm/test/CodeGen/SPARC/64bit.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -march=sparcv9 -mattr=+popc -disable-sparc-delay-filler -disable-sparc-leaf-proc | FileCheck %s ; RUN: llc < %s -march=sparcv9 -mattr=+popc | FileCheck %s -check-prefix=OPT +; RUN: llc %s -march=sparcv9 -mattr=+popc -filetype=null ; CHECK-LABEL: ret2: ; CHECK: mov %i1, %i0 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll index 040e026e6a80a..f0495def81858 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll @@ -277,23 +277,23 @@ for.body: ; preds = %entry, %for.body define void @test_memset_preheader(i8* %x, i8* %y, i32 %n) { ; CHECK-LABEL: test_memset_preheader: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r7, lr} -; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: cbz r2, .LBB6_5 ; CHECK-NEXT: @ %bb.1: @ %prehead ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: mov r12, r0 +; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: wlstp.8 lr, r2, .LBB6_3 ; CHECK-NEXT: .LBB6_2: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vstrb.8 q0, [r12], #16 +; CHECK-NEXT: vstrb.8 q0, [r4], #16 ; CHECK-NEXT: letp lr, .LBB6_2 ; CHECK-NEXT: .LBB6_3: @ %prehead ; CHECK-NEXT: dls lr, r2 -; CHECK-NEXT: mov r12, r0 +; CHECK-NEXT: mov r3, r0 ; CHECK-NEXT: .LBB6_4: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrb r3, [r12], #1 -; CHECK-NEXT: strb r3, [r1], #1 +; CHECK-NEXT: ldrb r4, [r3], #1 +; CHECK-NEXT: strb r4, [r1], #1 ; CHECK-NEXT: le lr, .LBB6_4 ; CHECK-NEXT: .LBB6_5: @ %for.cond.cleanup ; CHECK-NEXT: vmov.i32 q0, #0x0 @@ -302,7 +302,7 @@ define void @test_memset_preheader(i8* %x, i8* %y, i32 %n) { ; CHECK-NEXT: vstrb.8 q0, [r0], #16 ; CHECK-NEXT: letp lr, .LBB6_6 ; CHECK-NEXT: .LBB6_7: @ %for.cond.cleanup -; CHECK-NEXT: pop {r7, pc} +; CHECK-NEXT: pop {r4, pc} entry: %cmp6 = icmp ne i32 %n, 0 br i1 %cmp6, label %prehead, label %for.cond.cleanup diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-and.ll b/llvm/test/CodeGen/Thumb2/mve-pred-and.ll index e745fafdbea72..e8d5eadabf7f9 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-and.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-and.ll @@ -122,8 +122,9 @@ entry: define arm_aapcs_vfpcc <4 x i32> @cmpulez_v4i1(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: cmpulez_v4i1: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i32 q2, #0x0 ; CHECK-NEXT: vpt.i32 eq, q0, zr -; CHECK-NEXT: vcmpt.u32 cs, q1, zr +; CHECK-NEXT: vcmpt.u32 cs, q2, q1 ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: bx lr entry: diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-or.ll b/llvm/test/CodeGen/Thumb2/mve-pred-or.ll index cb3f554e21b0a..435ddf0a6e57b 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-or.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-or.ll @@ -123,7 +123,8 @@ entry: define arm_aapcs_vfpcc <4 x i32> @cmpulez_v4i1(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: cmpulez_v4i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.u32 cs, q1, zr +; CHECK-NEXT: vmov.i32 q2, #0x0 +; CHECK-NEXT: vcmp.u32 cs, q2, q1 ; CHECK-NEXT: vpnot ; CHECK-NEXT: vpst ; CHECK-NEXT: vcmpt.i32 ne, q0, zr diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll b/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll index e5fef332034fe..0ff262e6b53ab 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll @@ -151,7 +151,8 @@ entry: define arm_aapcs_vfpcc <4 x i32> @cmpulez_v4i1(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: cmpulez_v4i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.u32 cs, q1, zr +; CHECK-NEXT: vmov.i32 q2, #0x0 +; CHECK-NEXT: vcmp.u32 cs, q2, q1 ; CHECK-NEXT: vmrs r0, p0 ; CHECK-NEXT: vcmp.i32 eq, q0, zr ; CHECK-NEXT: vmrs r1, p0 diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpz.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpz.ll index fcb9d136307fe..aaf49c76a07a0 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vcmpz.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcmpz.ll @@ -110,7 +110,8 @@ entry: define arm_aapcs_vfpcc <4 x i32> @vcmp_ulez_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: vcmp_ulez_v4i32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.u32 cs, q0, zr +; CHECK-NEXT: vmov.i32 q3, #0x0 +; CHECK-NEXT: vcmp.u32 cs, q3, q0 ; CHECK-NEXT: vpsel q0, q1, q2 ; CHECK-NEXT: bx lr entry: @@ -229,7 +230,8 @@ entry: define arm_aapcs_vfpcc <8 x i16> @vcmp_ulez_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: vcmp_ulez_v8i16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.u16 cs, q0, zr +; CHECK-NEXT: vmov.i32 q3, #0x0 +; CHECK-NEXT: vcmp.u16 cs, q3, q0 ; CHECK-NEXT: vpsel q0, q1, q2 ; CHECK-NEXT: bx lr entry: @@ -348,7 +350,8 @@ entry: define arm_aapcs_vfpcc <16 x i8> @vcmp_ulez_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: vcmp_ulez_v16i8: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.u8 cs, q0, zr +; CHECK-NEXT: vmov.i32 q3, #0x0 +; CHECK-NEXT: vcmp.u8 cs, q3, q0 ; CHECK-NEXT: vpsel q0, q1, q2 ; CHECK-NEXT: bx lr entry: @@ -489,7 +492,8 @@ entry: define arm_aapcs_vfpcc <4 x i32> @vcmp_r_ugez_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: vcmp_r_ugez_v4i32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.u32 cs, q0, zr +; CHECK-NEXT: vmov.i32 q3, #0x0 +; CHECK-NEXT: vcmp.u32 cs, q3, q0 ; CHECK-NEXT: vpsel q0, q1, q2 ; CHECK-NEXT: bx lr entry: @@ -608,7 +612,8 @@ entry: define arm_aapcs_vfpcc <8 x i16> @vcmp_r_ugez_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: vcmp_r_ugez_v8i16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.u16 cs, q0, zr +; CHECK-NEXT: vmov.i32 q3, #0x0 +; CHECK-NEXT: vcmp.u16 cs, q3, q0 ; CHECK-NEXT: vpsel q0, q1, q2 ; CHECK-NEXT: bx lr entry: @@ -727,7 +732,8 @@ entry: define arm_aapcs_vfpcc <16 x i8> @vcmp_r_ugez_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: vcmp_r_ugez_v16i8: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.u8 cs, q0, zr +; CHECK-NEXT: vmov.i32 q3, #0x0 +; CHECK-NEXT: vcmp.u8 cs, q3, q0 ; CHECK-NEXT: vpsel q0, q1, q2 ; CHECK-NEXT: bx lr entry: diff --git a/llvm/test/CodeGen/WebAssembly/PR41149.ll b/llvm/test/CodeGen/WebAssembly/PR41149.ll index 428f84979d89e..d18bd9c4a3b88 100644 --- a/llvm/test/CodeGen/WebAssembly/PR41149.ll +++ b/llvm/test/CodeGen/WebAssembly/PR41149.ll @@ -13,9 +13,8 @@ define void @mod() { ; CHECK-NEXT: i32.load8_u 0 ; CHECK-NEXT: local.tee 0 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i32.const 24 -; CHECK-NEXT: i32.shl -; CHECK-NEXT: i32.const 31 +; CHECK-NEXT: i32.extend8_s +; CHECK-NEXT: i32.const 7 ; CHECK-NEXT: i32.shr_s ; CHECK-NEXT: local.tee 0 ; CHECK-NEXT: i32.xor diff --git a/llvm/test/CodeGen/WebAssembly/bulk-memory.ll b/llvm/test/CodeGen/WebAssembly/bulk-memory.ll index f739f08f70715..4ccc95c8f4928 100644 --- a/llvm/test/CodeGen/WebAssembly/bulk-memory.ll +++ b/llvm/test/CodeGen/WebAssembly/bulk-memory.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+bulk-memory | FileCheck %s --check-prefixes CHECK,BULK-MEM -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=-bulk-memory | FileCheck %s --check-prefixes CHECK,NO-BULK-MEM +; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mcpu=mvp -mattr=+bulk-memory | FileCheck %s --check-prefixes CHECK,BULK-MEM +; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mcpu=mvp -mattr=-bulk-memory | FileCheck %s --check-prefixes CHECK,NO-BULK-MEM ; Test that basic bulk memory codegen works correctly diff --git a/llvm/test/CodeGen/WebAssembly/bulk-memory64.ll b/llvm/test/CodeGen/WebAssembly/bulk-memory64.ll index eaf9a9659429e..88cf6b58c0732 100644 --- a/llvm/test/CodeGen/WebAssembly/bulk-memory64.ll +++ b/llvm/test/CodeGen/WebAssembly/bulk-memory64.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+bulk-memory | FileCheck %s --check-prefixes CHECK,BULK-MEM -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=-bulk-memory | FileCheck %s --check-prefixes CHECK,NO-BULK-MEM +; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mcpu=mvp -mattr=+bulk-memory | FileCheck %s --check-prefixes CHECK,BULK-MEM +; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mcpu=mvp -mattr=-bulk-memory | FileCheck %s --check-prefixes CHECK,NO-BULK-MEM ; Test that basic bulk memory codegen works correctly diff --git a/llvm/test/CodeGen/WebAssembly/byval.ll b/llvm/test/CodeGen/WebAssembly/byval.ll index 5f0a71960b677..5a42f3b9438a0 100644 --- a/llvm/test/CodeGen/WebAssembly/byval.ll +++ b/llvm/test/CodeGen/WebAssembly/byval.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -verify-machineinstrs -fast-isel | FileCheck %s +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -verify-machineinstrs -mcpu=mvp | FileCheck %s +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -verify-machineinstrs -mcpu=mvp -fast-isel | FileCheck %s target triple = "wasm32-unknown-unknown" diff --git a/llvm/test/CodeGen/WebAssembly/conv-trap.ll b/llvm/test/CodeGen/WebAssembly/conv-trap.ll index 0906743374b93..4402880a8c383 100644 --- a/llvm/test/CodeGen/WebAssembly/conv-trap.ll +++ b/llvm/test/CodeGen/WebAssembly/conv-trap.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=-nontrapping-fptoint | FileCheck %s +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mcpu=mvp -mattr=-nontrapping-fptoint | FileCheck %s ; Test that basic conversion operations assemble as expected using ; the trapping opcodes and explicit code to suppress the trapping. diff --git a/llvm/test/CodeGen/WebAssembly/fast-isel-noreg.ll b/llvm/test/CodeGen/WebAssembly/fast-isel-noreg.ll index 1bc87d7c82eed..919ac6815717d 100644 --- a/llvm/test/CodeGen/WebAssembly/fast-isel-noreg.ll +++ b/llvm/test/CodeGen/WebAssembly/fast-isel-noreg.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -asm-verbose=false -wasm-keep-registers -fast-isel -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -asm-verbose=false -mcpu=mvp -disable-wasm-fallthrough-return-opt -wasm-keep-registers -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -asm-verbose=false -mcpu=mvp -wasm-keep-registers -fast-isel -verify-machineinstrs | FileCheck %s ; Test that FastISel does not generate instructions with NoReg diff --git a/llvm/test/CodeGen/WebAssembly/global.ll b/llvm/test/CodeGen/WebAssembly/global.ll index c3f6b2e23e4a4..dc9b909dc7ea4 100644 --- a/llvm/test/CodeGen/WebAssembly/global.ll +++ b/llvm/test/CodeGen/WebAssembly/global.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=-atomics | FileCheck %s -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+atomics | FileCheck %s +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mcpu=mvp -mattr=-atomics | FileCheck %s +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mcpu=mvp -mattr=+atomics | FileCheck %s ; Test that globals assemble as expected. diff --git a/llvm/test/CodeGen/WebAssembly/legalize.ll b/llvm/test/CodeGen/WebAssembly/legalize.ll index 686bd23c9ff54..cd7d719bff11a 100644 --- a/llvm/test/CodeGen/WebAssembly/legalize.ll +++ b/llvm/test/CodeGen/WebAssembly/legalize.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s +; RUN: llc < %s -asm-verbose=false -mcpu=mvp -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s ; Test various types and operators that need to be legalized. diff --git a/llvm/test/CodeGen/WebAssembly/lower-wasm-ehsjlj.ll b/llvm/test/CodeGen/WebAssembly/lower-wasm-ehsjlj.ll index 446c298865ade..beb1b6d7ec8d5 100644 --- a/llvm/test/CodeGen/WebAssembly/lower-wasm-ehsjlj.ll +++ b/llvm/test/CodeGen/WebAssembly/lower-wasm-ehsjlj.ll @@ -109,7 +109,7 @@ catch: ; preds = %catch.start catchret from %2 to label %catchret.dest ; CHECK: catch: ; preds = %catch.start ; CHECK-NEXT: %exn = load i8*, i8** %exn.slot15, align 4 -; CHECK-NEXT: %5 = call i8* @__cxa_begin_catch(i8* %exn) #2 [ "funclet"(token %2) ] +; CHECK-NEXT: %5 = call i8* @__cxa_begin_catch(i8* %exn) #7 [ "funclet"(token %2) ] ; CHECK-NEXT: invoke void @__cxa_end_catch() [ "funclet"(token %2) ] ; CHECK-NEXT: to label %.noexc unwind label %catch.dispatch.longjmp diff --git a/llvm/test/CodeGen/WebAssembly/mem-intrinsics.ll b/llvm/test/CodeGen/WebAssembly/mem-intrinsics.ll index 8491e246c7f66..d9ceb86f208d8 100644 --- a/llvm/test/CodeGen/WebAssembly/mem-intrinsics.ll +++ b/llvm/test/CodeGen/WebAssembly/mem-intrinsics.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -tail-dup-placement=0 | FileCheck %s +; RUN: llc < %s -asm-verbose=false -mcpu=mvp -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -tail-dup-placement=0 | FileCheck %s ; Test memcpy, memmove, and memset intrinsics. diff --git a/llvm/test/CodeGen/WebAssembly/memory64-feature.ll b/llvm/test/CodeGen/WebAssembly/memory64-feature.ll index 53fccfe088883..bd277dfdc37d3 100644 --- a/llvm/test/CodeGen/WebAssembly/memory64-feature.ll +++ b/llvm/test/CodeGen/WebAssembly/memory64-feature.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s | FileCheck %s +; RUN: llc -mcpu=mvp < %s | FileCheck %s ; Test that wasm64 is properly emitted into the target features section diff --git a/llvm/test/CodeGen/WebAssembly/multivalue.ll b/llvm/test/CodeGen/WebAssembly/multivalue.ll index a0f36ea8265e0..0080052e8f7a0 100644 --- a/llvm/test/CodeGen/WebAssembly/multivalue.ll +++ b/llvm/test/CodeGen/WebAssembly/multivalue.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -mattr=+multivalue,+tail-call | FileCheck %s -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -mattr=+reference-types,+multivalue,+tail-call | FileCheck --check-prefix REF %s -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+multivalue,+tail-call | FileCheck %s --check-prefix REGS -; RUN: llc < %s --filetype=obj -mattr=+multivalue,+tail-call | obj2yaml | FileCheck %s --check-prefix OBJ +; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -mcpu=mvp -mattr=+multivalue,+tail-call | FileCheck %s +; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -mcpu=mvp -mattr=+reference-types,+multivalue,+tail-call | FileCheck --check-prefix REF %s +; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mcpu=mvp -mattr=+multivalue,+tail-call | FileCheck %s --check-prefix REGS +; RUN: llc < %s --filetype=obj -mcpu=mvp -mattr=+multivalue,+tail-call | obj2yaml | FileCheck %s --check-prefix OBJ ; Test that the multivalue calls, returns, function types, and block ; types work as expected. diff --git a/llvm/test/CodeGen/WebAssembly/mutable-globals.ll b/llvm/test/CodeGen/WebAssembly/mutable-globals.ll index df698c1a11b82..93962f7e6d92c 100644 --- a/llvm/test/CodeGen/WebAssembly/mutable-globals.ll +++ b/llvm/test/CodeGen/WebAssembly/mutable-globals.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mattr=+mutable-globals | FileCheck %s +; RUN: llc < %s -mcpu=mvp -mattr=+mutable-globals | FileCheck %s ; Test that mutable globals is properly emitted into the target features section diff --git a/llvm/test/CodeGen/WebAssembly/reference-types.ll b/llvm/test/CodeGen/WebAssembly/reference-types.ll index d56541db8572e..168aaec8f0943 100644 --- a/llvm/test/CodeGen/WebAssembly/reference-types.ll +++ b/llvm/test/CodeGen/WebAssembly/reference-types.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mattr=+reference-types | FileCheck %s +; RUN: llc < %s -mcpu=mvp -mattr=+reference-types | FileCheck %s target triple = "wasm32-unknown-unknown" diff --git a/llvm/test/CodeGen/WebAssembly/signext-zeroext.ll b/llvm/test/CodeGen/WebAssembly/signext-zeroext.ll index f5dbfeb78c1db..1dafbe58a7f8c 100644 --- a/llvm/test/CodeGen/WebAssembly/signext-zeroext.ll +++ b/llvm/test/CodeGen/WebAssembly/signext-zeroext.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s +; RUN: llc < %s -mcpu=mvp -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s ; Test zeroext and signext ABI keywords diff --git a/llvm/test/CodeGen/WebAssembly/simd-conversions.ll b/llvm/test/CodeGen/WebAssembly/simd-conversions.ll index f945b8e061717..8459ec8101ff2 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-conversions.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-conversions.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -mcpu=mvp -mattr=+simd128 | FileCheck %s ; Test that vector float-to-int and int-to-float instructions lower correctly diff --git a/llvm/test/CodeGen/WebAssembly/simd-sext-inreg.ll b/llvm/test/CodeGen/WebAssembly/simd-sext-inreg.ll index c4b94381e39a6..45080d14dfd29 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-sext-inreg.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-sext-inreg.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -wasm-keep-registers -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128 +; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -wasm-keep-registers -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mcpu=mvp -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128 ; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -wasm-keep-registers -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals | FileCheck %s --check-prefixes CHECK,NO-SIMD128 ; Test that vector sign extensions lower to shifts diff --git a/llvm/test/CodeGen/WebAssembly/tailcall.ll b/llvm/test/CodeGen/WebAssembly/tailcall.ll index d0f39e6416ba8..3d96c666ddc58 100644 --- a/llvm/test/CodeGen/WebAssembly/tailcall.ll +++ b/llvm/test/CodeGen/WebAssembly/tailcall.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+tail-call | FileCheck --check-prefixes=CHECK,SLOW %s -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -fast-isel -mattr=+tail-call | FileCheck --check-prefixes=CHECK,FAST %s +; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mcpu=mvp -mattr=+tail-call | FileCheck --check-prefixes=CHECK,SLOW %s +; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -fast-isel -mcpu=mvp -mattr=+tail-call | FileCheck --check-prefixes=CHECK,FAST %s ; RUN: llc < %s --filetype=obj -mattr=+tail-call | obj2yaml | FileCheck --check-prefix=YAML %s ; Test that the tail calls lower correctly diff --git a/llvm/test/CodeGen/WebAssembly/target-features-tls.ll b/llvm/test/CodeGen/WebAssembly/target-features-tls.ll index 57d14053f3342..45bc06b5d5c96 100644 --- a/llvm/test/CodeGen/WebAssembly/target-features-tls.ll +++ b/llvm/test/CodeGen/WebAssembly/target-features-tls.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mattr=-bulk-memory,atomics | FileCheck %s --check-prefixes NO-BULK-MEM -; RUN: llc < %s -mattr=+bulk-memory,atomics | FileCheck %s --check-prefixes BULK-MEM +; RUN: llc < %s -mcpu=mvp -mattr=-bulk-memory,atomics | FileCheck %s --check-prefixes NO-BULK-MEM +; RUN: llc < %s -mcpu=mvp -mattr=+bulk-memory,atomics | FileCheck %s --check-prefixes BULK-MEM ; Test that the target features section contains -atomics or +atomics ; for modules that have thread local storage in their source. diff --git a/llvm/test/CodeGen/WebAssembly/target-features.ll b/llvm/test/CodeGen/WebAssembly/target-features.ll index ecb49766659b0..4debf66fe0f7a 100644 --- a/llvm/test/CodeGen/WebAssembly/target-features.ll +++ b/llvm/test/CodeGen/WebAssembly/target-features.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s | FileCheck %s --check-prefixes CHECK,ATTRS -; RUN: llc < %s -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128 +; RUN: llc < %s -mcpu=mvp -mattr=+simd128 | FileCheck %s --check-prefixes CHECK,SIMD128 ; RUN: llc < %s -mcpu=bleeding-edge | FileCheck %s --check-prefixes CHECK,BLEEDING-EDGE ; Test that codegen emits target features from the command line or @@ -55,17 +55,22 @@ attributes #2 = { "target-features"="+reference-types" } ; CHECK-LABEL: .custom_section.target_features,"",@ -; +atomics, +nontrapping-fptoint, +reference-types -; ATTRS-NEXT: .int8 3 -; ATTRS-NEXT: .int8 43 -; ATTRS-NEXT: .int8 7 -; ATTRS-NEXT: .ascii "atomics" -; ATTRS-NEXT: .int8 43 -; ATTRS-NEXT: .int8 19 -; ATTRS-NEXT: .ascii "nontrapping-fptoint" -; ATTRS-NEXT: .int8 43 -; ATTRS-NEXT: .int8 15 -; ATTRS-NEXT: .ascii "reference-types" +; +atomics, +reference-types, +mutable-globals +; ATTRS-NEXT: .int8 5 +; ATTRS-NEXT: .int8 43 +; ATTRS-NEXT: .int8 7 +; ATTRS-NEXT: .ascii "atomics" +; ATTRS-NEXT: .int8 43 +; ATTRS-NEXT: .int8 15 +; ATTRS-NEXT: .ascii "mutable-globals" +; ATTRS-NEXT: .int8 43 +; ATTRS-NEXT: .int8 19 +; ATTRS-NEXT: .ascii "nontrapping-fptoint" +; ATTRS-NEXT: .int8 43 +; ATTRS-NEXT: .int8 15 +; ATTRS-NEXT: .ascii "reference-types" +; ATTRS-NEXT: .int8 43 +; ATTRS-NEXT: .int8 8 ; +atomics, +nontrapping-fptoint, +reference-types, +simd128 ; SIMD128-NEXT: .int8 4 @@ -109,5 +114,3 @@ attributes #2 = { "target-features"="+reference-types" } ; BLEEDING-EDGE-NEXT: .int8 43 ; BLEEDING-EDGE-NEXT: .int8 9 ; BLEEDING-EDGE-NEXT: .ascii "tail-call" - -; CHECK-NEXT: .text diff --git a/llvm/test/CodeGen/X86/atom-pad-short-functions.ll b/llvm/test/CodeGen/X86/atom-pad-short-functions.ll index 454fb32596c8b..430efc7b51303 100644 --- a/llvm/test/CodeGen/X86/atom-pad-short-functions.ll +++ b/llvm/test/CodeGen/X86/atom-pad-short-functions.ll @@ -1,64 +1,77 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -opaque-pointers < %s -O1 -mcpu=atom -mtriple=i686-linux | FileCheck %s declare void @external_function(...) define i32 @test_return_val(i32 %a) nounwind { -; CHECK: test_return_val -; CHECK: movl -; CHECK: nop -; CHECK: nop -; CHECK: nop -; CHECK: nop -; CHECK: nop -; CHECK: nop -; CHECK: ret +; CHECK-LABEL: test_return_val: +; CHECK: # %bb.0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: retl ret i32 %a } define i32 @test_optsize(i32 %a) nounwind optsize { -; CHECK: test_optsize -; CHECK: movl -; CHECK-NEXT: ret +; CHECK-LABEL: test_optsize: +; CHECK: # %bb.0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: retl ret i32 %a } define i32 @test_minsize(i32 %a) nounwind minsize { -; CHECK: test_minsize -; CHECK: movl -; CHECK-NEXT: ret +; CHECK-LABEL: test_minsize: +; CHECK: # %bb.0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: retl ret i32 %a } define i32 @test_pgso(i32 %a) nounwind !prof !14 { -; CHECK: test_pgso -; CHECK: movl -; CHECK-NEXT: ret +; CHECK-LABEL: test_pgso: +; CHECK: # %bb.0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: retl ret i32 %a } define i32 @test_add(i32 %a, i32 %b) nounwind { -; CHECK: test_add -; CHECK: addl -; CHECK: nop -; CHECK: nop -; CHECK: nop -; CHECK: nop -; CHECK: ret +; CHECK-LABEL: test_add: +; CHECK: # %bb.0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: addl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: retl %result = add i32 %a, %b ret i32 %result } define i32 @test_multiple_ret(i32 %a, i32 %b, i1 %c) nounwind { -; CHECK: @test_multiple_ret -; CHECK: je +; CHECK-LABEL: test_multiple_ret: +; CHECK: # %bb.0: +; CHECK-NEXT: testb $1, {{[0-9]+}}(%esp) +; CHECK-NEXT: je .LBB5_2 +; CHECK-NEXT: # %bb.1: # %bb1 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: retl +; CHECK-NEXT: .LBB5_2: # %bb2 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: retl -; CHECK: nop -; CHECK: nop -; CHECK: ret -; CHECK: nop -; CHECK: nop -; CHECK: ret br i1 %c, label %bb1, label %bb2 @@ -69,41 +82,52 @@ bb2: ret i32 %b } -define void @test_call_others(i32 %x) nounwind -{ -; CHECK: test_call_others -; CHECK: je +define void @test_call_others(i32 %x) nounwind { +; CHECK-LABEL: test_call_others: +; CHECK: # %bb.0: +; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: je .LBB6_1 +; CHECK-NEXT: # %bb.2: # %true.case +; CHECK-NEXT: jmp external_function@PLT # TAILCALL +; CHECK-NEXT: .LBB6_1: # %if.end +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: retl %tobool = icmp eq i32 %x, 0 br i1 %tobool, label %if.end, label %true.case -; CHECK: jmp external_function true.case: tail call void @external_function() nounwind br label %if.end -; CHECK: nop -; CHECK: nop -; CHECK: nop -; CHECK: nop -; CHECK: ret if.end: ret void } define void @test_branch_to_same_bb(i32 %x, i32 %y) nounwind { -; CHECK: @test_branch_to_same_bb +; CHECK-LABEL: test_branch_to_same_bb: +; CHECK: # %bb.0: +; CHECK-NEXT: cmpl $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: jle .LBB7_2 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB7_1: # %while.cond +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: jmp .LBB7_1 +; CHECK-NEXT: .LBB7_2: # %while.end +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: retl %cmp = icmp sgt i32 %x, 0 br i1 %cmp, label %while.cond, label %while.end while.cond: br label %while.cond -; CHECK: nop -; CHECK: nop -; CHECK: nop -; CHECK: nop -; CHECK: ret while.end: ret void } diff --git a/llvm/test/CodeGen/X86/cpus-intel.ll b/llvm/test/CodeGen/X86/cpus-intel.ll index f6be71d26100e..e4e24903319c4 100644 --- a/llvm/test/CodeGen/X86/cpus-intel.ll +++ b/llvm/test/CodeGen/X86/cpus-intel.ll @@ -17,6 +17,8 @@ ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=yonah 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=prescott 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=lakemont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty +; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=raptorlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty +; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=meteorlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=nocona 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=core2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty @@ -52,6 +54,8 @@ ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=tremont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=knl 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=knm 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty +; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=raptorlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty +; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=meteorlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty define void @foo() { ret void diff --git a/llvm/test/CodeGen/X86/statepoint-split-single-block.ll b/llvm/test/CodeGen/X86/statepoint-split-single-block.ll new file mode 100644 index 0000000000000..61368d3101ee6 --- /dev/null +++ b/llvm/test/CodeGen/X86/statepoint-split-single-block.ll @@ -0,0 +1,81 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -max-registers-for-gc-values=256 -use-registers-for-deopt-values=true -code-model=large -fixup-allow-gcptr-in-csr=true < %s | FileCheck %s + +; The test checks that Greedy register allocator should not split single basic block +; if it has only one non-statepoint use. Otherwise we may a redundant register usage. + +target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-win64" + +define ptr addrspace(1) @foo(ptr addrspace(1) %arg) gc "statepoint-example" { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: pushq %r13 +; CHECK-NEXT: .cfi_def_cfa_offset 40 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 56 +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset %rbx, -56 +; CHECK-NEXT: .cfi_offset %r12, -48 +; CHECK-NEXT: .cfi_offset %r13, -40 +; CHECK-NEXT: .cfi_offset %r14, -32 +; CHECK-NEXT: .cfi_offset %r15, -24 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rdi, (%rsp) # 8-byte Spill +; CHECK-NEXT: movabsq $nocsr, %rax +; CHECK-NEXT: callq *%rax +; CHECK-NEXT: movabsq $bar, %rax +; CHECK-NEXT: movq (%rsp), %rbx # 8-byte Reload +; CHECK-NEXT: movq %rbx, %rdi +; CHECK-NEXT: callq *%rax +; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: movq %rbx, (%rsp) # 8-byte Spill +; CHECK-NEXT: movabsq $nocsr, %rax +; CHECK-NEXT: callq *%rax +; CHECK-NEXT: movq (%rsp), %rax # 8-byte Reload +; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 56 +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: .cfi_def_cfa_offset 40 +; CHECK-NEXT: popq %r13 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + ; force spill %arg on stack. + call void @nocsr() + br label %do_call + +do_call: + ; Basic block with two use intructions inside: + ; copy to rdi as an argument to call and use in statepoint instruction as gc-live and deopt value. + %statepoint_token = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 2882400000, i32 0, ptr elementtype(void (ptr addrspace(1))) @bar, i32 1, i32 0, ptr addrspace(1) %arg, i32 0, i32 0) [ "deopt"(ptr addrspace(1) %arg), "gc-live"(ptr addrspace(1) %arg) ] + %arg.reloc = call coldcc ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token %statepoint_token, i32 0, i32 0) + br label %next + +next: + ; force spill %arg.reloc on stack. + call void @nocsr() + ret ptr addrspace(1) %arg.reloc +} + +declare void @nocsr() "no_callee_saved_registers" +declare void @bar(ptr addrspace(1)) +declare token @llvm.experimental.gc.statepoint.p0(i64 immarg, i32 immarg, ptr, i32 immarg, i32 immarg, ...) +declare ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token, i32 immarg, i32 immarg) diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/distinct.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/distinct.ll new file mode 100644 index 0000000000000..2cc5452fe7d2a --- /dev/null +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/distinct.ll @@ -0,0 +1,9 @@ +; RUN: not opt -S %s -experimental-assignment-tracking 2>&1 \ +; RUN: | FileCheck %s + +;; Check that badly formed assignment tracking metadata is caught either +;; while parsing or by the verifier. + +; CHECK: error: missing 'distinct', required for !DIAssignID() + +!1 = !DIAssignID() diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/instruction-type.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/instruction-type.ll new file mode 100644 index 0000000000000..d0f447ee200b6 --- /dev/null +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/instruction-type.ll @@ -0,0 +1,36 @@ +; RUN: opt -S %s -verify -experimental-assignment-tracking 2>&1 \ +; RUN: | FileCheck %s + +;; NOTE: Expect opt to return zero because the badly formed debug info +;; is going to be stripped. + +;; Check that badly formed assignment tracking metadata is caught either +;; while parsing or by the verifier. + +;; Check verifier output. +; CHECK: !DIAssignID attached to unexpected instruction kind + +;; Check DIAssignID is stripped from IR. +; CHECK: define dso_local void @fun() { +; CHECK-NOT: DIAssignID + +define dso_local void @fun() !dbg !7 { +entry: + ret void, !DIAssignID !14 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 14.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "test.c", directory: "/") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 14.0.0"} +!7 = distinct !DISubprogram(name: "fun", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!8 = !DISubroutineType(types: !9) +!9 = !{null} +!14 = distinct !DIAssignID() diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/operands.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/operands.ll new file mode 100644 index 0000000000000..79adcb9ce2d12 --- /dev/null +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/operands.ll @@ -0,0 +1,9 @@ +; RUN: not opt -S %s -experimental-assignment-tracking 2>&1 \ +; RUN: | FileCheck %s + +;; Check that badly formed assignment tracking metadata is caught either +;; while parsing or by the verifier. + +; CHECK: error: expected ')' here + +!1 = distinct !DIAssignID(0) diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/roundtrip.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/roundtrip.ll new file mode 100644 index 0000000000000..808636a1a0e58 --- /dev/null +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/roundtrip.ll @@ -0,0 +1,115 @@ +; RUN: opt %s -verify -experimental-assignment-tracking \ +; RUN: | opt -verify -S -experimental-assignment-tracking \ +; RUN: | FileCheck %s + +;; Roundtrip test (text -> bitcode -> text) for DIAssignID metadata and +;; llvm.dbg.assign intrinsics. + +;; DIAssignID attachment only. +; CHECK-LABEL: @fun() +; CHECK: %local = alloca i32, align 4, !DIAssignID ![[ID1:[0-9]+]] +define dso_local void @fun() !dbg !7 { +entry: + %local = alloca i32, align 4, !DIAssignID !14 + ret void, !dbg !13 +} + +;; Unlinked llvm.dbg.assign. +; CHECK-DAG: @fun2() +; CHECK: llvm.dbg.assign(metadata i32 undef, metadata ![[VAR2:[0-9]+]], metadata !DIExpression(), metadata ![[ID2:[0-9]+]], metadata i32 undef, metadata !DIExpression()), !dbg ![[DBG2:[0-9]+]] +define dso_local void @fun2() !dbg !15 { +entry: + %local = alloca i32, align 4 + call void @llvm.dbg.assign(metadata i32 undef, metadata !16, metadata !DIExpression(), metadata !18, metadata i32 undef, metadata !DIExpression()), !dbg !17 + ret void, !dbg !17 +} + +;; An llvm.dbg.assign linked to an alloca. +; CHECK-LABEL: @fun3() +; CHECK: %local = alloca i32, align 4, !DIAssignID ![[ID3:[0-9]+]] +; CHECK-NEXT: llvm.dbg.assign(metadata i32 undef, metadata ![[VAR3:[0-9]+]], metadata !DIExpression(), metadata ![[ID3]], metadata i32 undef, metadata !DIExpression()), !dbg ![[DBG3:[0-9]+]] +define dso_local void @fun3() !dbg !19 { +entry: + %local = alloca i32, align 4, !DIAssignID !22 + call void @llvm.dbg.assign(metadata i32 undef, metadata !20, metadata !DIExpression(), metadata !22, metadata i32 undef, metadata !DIExpression()), !dbg !21 + ret void, !dbg !21 +} + +;; Check that using a DIAssignID as an operand before using it as an attachment +;; works (the order of the alloca and dbg.assign has been swapped). +; CHECK-LABEL: @fun4() +; CHECK: llvm.dbg.assign(metadata i32 undef, metadata ![[VAR4:[0-9]+]], metadata !DIExpression(), metadata ![[ID4:[0-9]+]], metadata i32 undef, metadata !DIExpression()), !dbg ![[DBG4:[0-9]+]] +; CHECK-NEXT: %local = alloca i32, align 4, !DIAssignID ![[ID4]] +define dso_local void @fun4() !dbg !23 { +entry: + call void @llvm.dbg.assign(metadata i32 undef, metadata !24, metadata !DIExpression(), metadata !26, metadata i32 undef, metadata !DIExpression()), !dbg !25 + %local = alloca i32, align 4, !DIAssignID !26 + ret void, !dbg !25 +} + +;; Check that the value and address operands print correctly. +;; There are currently no plans to support DIArgLists for the address component. +; CHECK-LABEL: @fun5 +; CHECK: %local = alloca i32, align 4, !DIAssignID ![[ID5:[0-9]+]] +; CHECK-NEXT: llvm.dbg.assign(metadata i32 %v, metadata ![[VAR5:[0-9]+]], metadata !DIExpression(), metadata ![[ID5]], metadata i32* %local, metadata !DIExpression()), !dbg ![[DBG5:[0-9]+]] +; CHECK-NEXT: llvm.dbg.assign(metadata !DIArgList(i32 %v, i32 1), metadata ![[VAR5]], metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_minus, DW_OP_stack_value), metadata ![[ID5]], metadata i32* %local, metadata !DIExpression()), !dbg ![[DBG5]] +define dso_local void @fun5(i32 %v) !dbg !27 { +entry: + %local = alloca i32, align 4, !DIAssignID !30 + call void @llvm.dbg.assign(metadata i32 %v, metadata !28, metadata !DIExpression(), metadata !30, metadata i32* %local, metadata !DIExpression()), !dbg !29 + call void @llvm.dbg.assign(metadata !DIArgList(i32 %v, i32 1), metadata !28, metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_minus, DW_OP_stack_value), metadata !30, metadata i32* %local, metadata !DIExpression()), !dbg !29 + ret void +} + +; CHECK-DAG: ![[ID1]] = distinct !DIAssignID() +; CHECK-DAG: ![[ID2]] = distinct !DIAssignID() +; CHECK-DAG: ![[VAR2]] = !DILocalVariable(name: "local2", +; CHECK-DAG: ![[DBG2]] = !DILocation(line: 2 +; CHECK-DAG: ![[ID3]] = distinct !DIAssignID() +; CHECK-DAG: ![[VAR3]] = !DILocalVariable(name: "local3", +; CHECK-DAG: ![[DBG3]] = !DILocation(line: 3, +; CHECK-DAG: ![[ID4]] = distinct !DIAssignID() +; CHECK-DAG: ![[VAR4]] = !DILocalVariable(name: "local4", +; CHECK-DAG: ![[DBG4]] = !DILocation(line: 4, +; CHECK-DAG: ![[ID5]] = distinct !DIAssignID() +; CHECK-DAG: ![[VAR5]] = !DILocalVariable(name: "local5", +; CHECK-DAG: ![[DBG5]] = !DILocation(line: 5, + +declare void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 14.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "test.c", directory: "/") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 14.0.0"} +!7 = distinct !DISubprogram(name: "fun", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!8 = !DISubroutineType(types: !9) +!9 = !{null} +!10 = !DILocalVariable(name: "local", scope: !7, file: !1, line: 2, type: !11) +!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!13 = !DILocation(line: 1, column: 1, scope: !7) +!14 = distinct !DIAssignID() +!15 = distinct !DISubprogram(name: "fun2", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!16 = !DILocalVariable(name: "local2", scope: !15, file: !1, line: 2, type: !11) +!17 = !DILocation(line: 2, column: 1, scope: !15) +!18 = distinct !DIAssignID() +!19 = distinct !DISubprogram(name: "fun3", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!20 = !DILocalVariable(name: "local3", scope: !19, file: !1, line: 2, type: !11) +!21 = !DILocation(line: 3, column: 1, scope: !19) +!22 = distinct !DIAssignID() +!23 = distinct !DISubprogram(name: "fun4", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!24 = !DILocalVariable(name: "local4", scope: !23, file: !1, line: 2, type: !11) +!25 = !DILocation(line: 4, column: 1, scope: !23) +!26 = distinct !DIAssignID() +!27 = distinct !DISubprogram(name: "fun5", scope: !1, file: !1, line: 1, type: !31, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!28 = !DILocalVariable(name: "local5", scope: !27, file: !1, line: 2, type: !11) +!29 = !DILocation(line: 5, column: 1, scope: !27) +!30 = distinct !DIAssignID() +!31 = !DISubroutineType(types: !32) +!32 = !{null, !11} diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/verify.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/verify.ll new file mode 100644 index 0000000000000..577289604d536 --- /dev/null +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/verify.ll @@ -0,0 +1,52 @@ +; RUN: opt %s -S -verify -experimental-assignment-tracking 2>&1 \ +; RUN: | FileCheck %s + +;; Check that badly formed assignment tracking metadata is caught either +;; while parsing or by the verifier. +;; +;; Checks for this one are inline. + +define dso_local void @fun() !dbg !7 { +entry: + %a = alloca i32, align 4, !DIAssignID !14 + ;; Here something other than a dbg.assign intrinsic is using a DIAssignID. + ; CHECK: !DIAssignID should only be used by llvm.dbg.assign intrinsics + call void @llvm.dbg.value(metadata !14, metadata !10, metadata !DIExpression()), !dbg !13 + + ;; Each following dbg.assign has an argument of the incorrect type. + ; CHECK: invalid llvm.dbg.assign intrinsic address/value + call void @llvm.dbg.assign(metadata !3, metadata !10, metadata !DIExpression(), metadata !14, metadata i32* undef, metadata !DIExpression()), !dbg !13 + ; CHECK: invalid llvm.dbg.assign intrinsic variable + call void @llvm.dbg.assign(metadata i32 0, metadata !2, metadata !DIExpression(), metadata !14, metadata i32* undef, metadata !DIExpression()), !dbg !13 + ; CHECK: invalid llvm.dbg.assign intrinsic expression + call void @llvm.dbg.assign(metadata !14, metadata !10, metadata !2, metadata !14, metadata i32* undef, metadata !DIExpression()), !dbg !13 + ; CHECK: invalid llvm.dbg.assign intrinsic DIAssignID + call void @llvm.dbg.assign(metadata !14, metadata !10, metadata !DIExpression(), metadata !2, metadata i32* undef, metadata !DIExpression()), !dbg !13 + ; CHECK: invalid llvm.dbg.assign intrinsic address + call void @llvm.dbg.assign(metadata !14, metadata !10, metadata !DIExpression(), metadata !14, metadata !3, metadata !DIExpression()), !dbg !13 + ; CHECK: invalid llvm.dbg.assign intrinsic address expression + call void @llvm.dbg.assign(metadata !14, metadata !10, metadata !DIExpression(), metadata !14, metadata i32* undef, metadata !2), !dbg !13 + ret void +} + +declare void @llvm.dbg.value(metadata, metadata, metadata) +declare void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 14.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "test.c", directory: "/") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 14.0.0"} +!7 = distinct !DISubprogram(name: "fun", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!8 = !DISubroutineType(types: !9) +!9 = !{null} +!10 = !DILocalVariable(name: "local", scope: !7, file: !1, line: 2, type: !11) +!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!13 = !DILocation(line: 1, column: 1, scope: !7) +!14 = distinct !DIAssignID() diff --git a/llvm/test/DebugInfo/Generic/loop-deletion-inline-var.ll b/llvm/test/DebugInfo/Generic/loop-deletion-inline-var.ll new file mode 100644 index 0000000000000..372fc31681943 --- /dev/null +++ b/llvm/test/DebugInfo/Generic/loop-deletion-inline-var.ll @@ -0,0 +1,103 @@ +; RUN: opt -S %s -passes=loop-deletion | FileCheck %s + +;; Generated from this C source: +;; static int f(int p) { return p * p * 2; } +;; static int zero() { return 0; } +;; void fun() { +;; for (int __attribute__((nodebug)) i = zero(); i < 0; ++i) { +;; f(i); +;; f(i + 1); +;; } +;; } +;; +;; Check that loop-deletion doesn't accidently mistake debug intrinsics for +;; different inlined instances of a variable as the same variable. + +; CHECK-LABEL: for.cond.cleanup: ; preds = %entry +; CHECK-NEXT: @llvm.dbg.value({{.+}}, metadata ![[P:[0-9]+]],{{.+}}), !dbg ![[DBG1:[0-9]+]] +; CHECK-NEXT: @llvm.dbg.value({{.+}}, metadata ![[P]], {{.+}}), !dbg ![[DBG2:[0-9]+]] + +; CHECK-DAG: ![[P]] = !DILocalVariable(name: "p", +; CHECK-DAG: ![[DBG1]] = !DILocation({{.+}}, inlinedAt: ![[IA1:[0-9]+]]) +; CHECK-DAG: ![[DBG2]] = !DILocation({{.+}}, inlinedAt: ![[IA2:[0-9]+]]) +; CHECK-DAG: ![[IA1]] = distinct !DILocation(line: 5, +; CHECK-DAG: ![[IA2]] = distinct !DILocation(line: 6, + +define dso_local void @fun() !dbg !9 { +entry: + br label %for.cond, !dbg !13 + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ], !dbg !15 + %cmp = icmp slt i32 %i.0, 0, !dbg !16 + br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !18 + +for.cond.cleanup: ; preds = %for.cond + br label %for.end + +for.body: ; preds = %for.cond + call void @llvm.dbg.value(metadata i32 %i.0, metadata !19, metadata !DIExpression()), !dbg !25 + %mul.i = mul nsw i32 %i.0, %i.0, !dbg !28 + %mul1.i = mul nsw i32 %mul.i, 2, !dbg !29 + %add = add nsw i32 %i.0, 1, !dbg !30 + call void @llvm.dbg.value(metadata i32 %add, metadata !19, metadata !DIExpression()), !dbg !31 + %mul.i1 = mul nsw i32 %add, %add, !dbg !33 + %mul1.i2 = mul nsw i32 %mul.i1, 2, !dbg !34 + br label %for.inc, !dbg !35 + +for.inc: ; preds = %for.body + %inc = add nsw i32 %i.0, 1, !dbg !36 + br label %for.cond, !dbg !37, !llvm.loop !38 + +for.end: ; preds = %for.cond.cleanup + ret void, !dbg !41 +} + +declare void @llvm.dbg.value(metadata, metadata, metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3, !4, !5, !6, !7} +!llvm.ident = !{!8} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 16.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "test.c", directory: "/") +!2 = !{i32 7, !"Dwarf Version", i32 5} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 8, !"PIC Level", i32 2} +!6 = !{i32 7, !"PIE Level", i32 2} +!7 = !{i32 7, !"uwtable", i32 2} +!8 = !{!"clang version 16.0.0"} +!9 = distinct !DISubprogram(name: "fun", scope: !1, file: !1, line: 3, type: !10, scopeLine: 3, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !12) +!10 = !DISubroutineType(types: !11) +!11 = !{null} +!12 = !{} +!13 = !DILocation(line: 4, column: 8, scope: !14) +!14 = distinct !DILexicalBlock(scope: !9, file: !1, line: 4, column: 3) +!15 = !DILocation(line: 4, scope: !14) +!16 = !DILocation(line: 4, column: 51, scope: !17) +!17 = distinct !DILexicalBlock(scope: !14, file: !1, line: 4, column: 3) +!18 = !DILocation(line: 4, column: 3, scope: !14) +!19 = !DILocalVariable(name: "p", arg: 1, scope: !20, file: !1, line: 1, type: !23) +!20 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 1, type: !21, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !24) +!21 = !DISubroutineType(types: !22) +!22 = !{!23, !23} +!23 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!24 = !{!19} +!25 = !DILocation(line: 0, scope: !20, inlinedAt: !26) +!26 = distinct !DILocation(line: 5, column: 5, scope: !27) +!27 = distinct !DILexicalBlock(scope: !17, file: !1, line: 4, column: 61) +!28 = !DILocation(line: 1, column: 32, scope: !20, inlinedAt: !26) +!29 = !DILocation(line: 1, column: 36, scope: !20, inlinedAt: !26) +!30 = !DILocation(line: 6, column: 8, scope: !27) +!31 = !DILocation(line: 0, scope: !20, inlinedAt: !32) +!32 = distinct !DILocation(line: 6, column: 5, scope: !27) +!33 = !DILocation(line: 1, column: 32, scope: !20, inlinedAt: !32) +!34 = !DILocation(line: 1, column: 36, scope: !20, inlinedAt: !32) +!35 = !DILocation(line: 7, column: 3, scope: !27) +!36 = !DILocation(line: 4, column: 56, scope: !17) +!37 = !DILocation(line: 4, column: 3, scope: !17) +!38 = distinct !{!38, !18, !39, !40} +!39 = !DILocation(line: 7, column: 3, scope: !14) +!40 = !{!"llvm.loop.mustprogress"} +!41 = !DILocation(line: 8, column: 1, scope: !9) diff --git a/llvm/test/DebugInfo/MIR/InstrRef/pick-vphi-in-shifting-loop.mir b/llvm/test/DebugInfo/MIR/InstrRef/pick-vphi-in-shifting-loop.mir index d652da6088d5d..f43ae955bcd33 100644 --- a/llvm/test/DebugInfo/MIR/InstrRef/pick-vphi-in-shifting-loop.mir +++ b/llvm/test/DebugInfo/MIR/InstrRef/pick-vphi-in-shifting-loop.mir @@ -19,7 +19,7 @@ # CHECK: DBG_VALUE $rcx # CHECK-NEXT: $rdx = MOV64rr killed $rcx # CHECK-LABEL: bb.5: -# CHEKC-NOT: DBG_VALUE +# CHECK-NOT: DBG_VALUE --- | target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/DebugInfo/Generic/missing-abstract-variable.ll b/llvm/test/DebugInfo/X86/missing-abstract-variable.ll similarity index 94% rename from llvm/test/DebugInfo/Generic/missing-abstract-variable.ll rename to llvm/test/DebugInfo/X86/missing-abstract-variable.ll index 80e278e51194b..bf63cb41428a2 100644 --- a/llvm/test/DebugInfo/Generic/missing-abstract-variable.ll +++ b/llvm/test/DebugInfo/X86/missing-abstract-variable.ll @@ -1,10 +1,4 @@ -; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-info - | FileCheck %s - -; The formal parameter 'b' for Function 'x' when inlined within 'a' is lost on -; powerpc64 (and on x86_64 at at least -O2). Presumably this is a SelectionDAG -; issue. -; FIXME: arm64 is an alias for aarch64 on macs, apparently? -; XFAIL: powerpc64, aarch64, arm64, hexagon, riscv, sparc, loongarch +; RUN: %llc_dwarf -mtriple x86_64-gnu-linux -O0 -filetype=obj < %s | llvm-dwarfdump -debug-info - | FileCheck %s ; Build from the following source with clang -O2. diff --git a/llvm/test/Examples/OrcV2Examples/lljit-with-remote-debugging.test b/llvm/test/Examples/OrcV2Examples/lljit-with-remote-debugging.test index a09d72a76bef9..d34208136c3e2 100644 --- a/llvm/test/Examples/OrcV2Examples/lljit-with-remote-debugging.test +++ b/llvm/test/Examples/OrcV2Examples/lljit-with-remote-debugging.test @@ -1,6 +1,8 @@ # This test makes sure that the example builds and executes as expected. # Instructions for debugging can be found in LLJITWithRemoteDebugging.cpp +# REQUIRES: default_triple + # RUN: LLJITWithRemoteDebugging %p/Inputs/argc_sub1_elf.ll | FileCheck --check-prefix=CHECK0 %s # CHECK0: Parsing input IR code from: {{.*}}/Inputs/argc_sub1_elf.ll # CHECK0: Running: main() diff --git a/llvm/test/Examples/OrcV2Examples/lljit-with-thinlto-summaries.test b/llvm/test/Examples/OrcV2Examples/lljit-with-thinlto-summaries.test index f82ac41bce38d..21112b825ba5b 100644 --- a/llvm/test/Examples/OrcV2Examples/lljit-with-thinlto-summaries.test +++ b/llvm/test/Examples/OrcV2Examples/lljit-with-thinlto-summaries.test @@ -2,6 +2,8 @@ # RUN: opt -module-summary %p/Inputs/foo-mod.ll -o foo-mod.bc # RUN: opt -module-summary %p/Inputs/bar-mod.ll -o bar-mod.bc +# REQUIRES: default_triple + # RUN: llvm-lto -thinlto -o main-foo-bar main-mod.bc foo-mod.bc bar-mod.bc # RUN: LLJITWithThinLTOSummaries main-foo-bar.thinlto.bc 2>&1 | FileCheck %s diff --git a/llvm/test/ExecutionEngine/JITLink/X86/COFF_comdat_weak_plus_strong.s b/llvm/test/ExecutionEngine/JITLink/X86/COFF_comdat_weak_plus_strong.s index 30e0193b11a65..2754855e428e0 100644 --- a/llvm/test/ExecutionEngine/JITLink/X86/COFF_comdat_weak_plus_strong.s +++ b/llvm/test/ExecutionEngine/JITLink/X86/COFF_comdat_weak_plus_strong.s @@ -1,22 +1,23 @@ # FIXME: Comdat any + ordinary strong symbol should generate duplicate section error # XFAIL: * +# # RUN: rm -rf %t && mkdir -p %t # RUN: yaml2obj %S/Inputs/COFF_comdat_weak_def.yaml -o %t/COFF_weak_1.o # RUN: yaml2obj %S/Inputs/COFF_strong_def.yaml -o %t/COFF_strong.o # RUN: llvm-mc -filetype=obj -triple=x86_64-windows-msvc %s -o %t/COFF_main.o -# RUN: +# # RUN: not llvm-jitlink -noexec %t/COFF_main.o %t/COFF_weak_1.o %t/COFF_strong.o \ -# RUN: -slab-allocate 100Kb -slab-address 0xfff00000 -slab-page-size 4096 \ -# RUN: -show-graph -noexec 2>&1 | FileCheck %s +# RUN: -slab-allocate 64Kb -slab-address 0xfff00000 \ +# RUN: -slab-page-size 4096 -show-graph 2>&1 | FileCheck %s # -# Check that a combination of comdat any definition and strong definition generate -# duplicate definition error. +# Check that a combination of comdat any definition and strong definition +# generate duplicate definition error. # # CHECK: section strongfunc: # CHECK-EMPTY: -# CHECK-NEXT: block 0xfff02000 size = 0x00000001, align = 16, alignment-offset = 0 +# CHECK-NEXT: block 0xfff0[[LO:[0-9a-f]+]] size = 0x00000001, align = 16, alignment-offset = 0 # CHECK-NEXT: symbols: -# CHECK-NEXT: 0xfff02000 (block + 0x00000000): size: 0x00000001, linkage: strong, scope: default, live - func +# CHECK-NEXT: 0xfff0[[LO]] (block + 0x00000000): size: 0x00000001, linkage: strong, scope: default, live - func # CHECK-NEXT: no edges .text @@ -28,5 +29,5 @@ .globl main .p2align 4, 0x90 main: - callq func - retq \ No newline at end of file + callq func + retq diff --git a/llvm/test/ExecutionEngine/RuntimeDyld/X86/ELF_STT_GNU_IFUNC.s b/llvm/test/ExecutionEngine/RuntimeDyld/X86/ELF_STT_GNU_IFUNC.s new file mode 100644 index 0000000000000..adcd27613ea96 --- /dev/null +++ b/llvm/test/ExecutionEngine/RuntimeDyld/X86/ELF_STT_GNU_IFUNC.s @@ -0,0 +1,110 @@ +# REQUIRES: x86_64-linux +# RUN: rm -rf %t && mkdir -p %t +# RUN: split-file %s %t +# RUN: llvm-mc -triple=x86_64-unknown-linux-gnu -filetype=obj -o %t/test_runner.o %t/test_runner.s +# RUN: llvm-mc -triple=x86_64-unknown-linux-gnu -filetype=obj -o %t/func_defs.o %t/func_defs.s +# RUN: llvm-rtdyld -triple=x86_64-unknown-linux-gnu -verify -check=%s %t/test_runner.o %t/func_defs.o +# RUN: llvm-rtdyld -triple=x86_64-unknown-linux-gnu -execute %t/test_runner.o %t/func_defs.o + +#--- test_runner.s + +# The _main function of this file contains calls to the two external functions +# "indirect_func" and "normal_func" that are not yet defined. They are called via +# the PLT to simulate how a compiler would emit a call to an external function. +# Eventually, indirect_func will resolve to a STT_GNU_IFUNC and normal_func to a +# regular function. We include calls to both types of functions in this test to +# test that both types of functions are executed correctly when their types are +# not known initially. +# It also contains a call to a locally defined indirect function. As RuntimeDyld +# treats local functions a bit differently than external functions, we also test +# that. +# Verify that the functions return the excpeted value. If the external indirect +# function call fails, this returns the error code 1. If the external normal +# function call fails, it's the error code 2. If the call to the locally +# defined indirect function fails, return the error code 3. + +local_real_func: + mov $0x56, %eax + ret + +local_indirect_func_resolver: + lea local_real_func(%rip), %rax + ret + + .type local_indirect_func, @gnu_indirect_function + .set local_indirect_func, local_indirect_func_resolver + + .global _main +_main: + call indirect_func@plt + cmp $0x12, %eax + je 1f + mov $1, %eax + ret +1: + + call normal_func@plt + cmp $0x34, %eax + je 1f + mov $2, %eax + ret +1: + + call local_indirect_func@plt + cmp $0x56, %eax + je 1f + mov $3, %eax + ret +1: + + xor %eax, %eax + ret + +# Test that the indirect functions have the same addresses in both calls. +# rtdyld-check: decode_operand(test_indirect_func_address_1, 4) + next_pc(test_indirect_func_address_1) = decode_operand(test_indirect_func_address_2, 4) + next_pc(test_indirect_func_address_2) +test_indirect_func_address_1: + lea indirect_func(%rip), %rax + +test_indirect_func_address_2: + lea indirect_func(%rip), %rax + +# rtdyld-check: decode_operand(test_local_indirect_func_address_1, 4) + next_pc(test_indirect_func_address_1) = decode_operand(test_local_indirect_func_address_2, 4) + next_pc(test_indirect_func_address_2) +test_local_indirect_func_address_1: + lea local_indirect_func(%rip), %rax + +test_local_indirect_func_address_2: + lea local_indirect_func(%rip), %rax + +#--- func_defs.s + +# This file contains the external functions that are called above. The type of +# the indirect function is set to @gnu_indirect_function and its value is set +# to the value of ifunc_resolver. This is what gcc emits when using +# __attribute__((ifunc("ifunc_resolver"))) in C. The resolver function just +# returns the address of the real function "real_func". +# To test that everyting works correctly, the indirect function returns 0x12 +# and the direct function returns 0x23. This is verified in the _main function +# above. + +real_func: + mov $0x12, %eax + ret + +ifunc_resolver: + lea real_func(%rip), %rax + ret + + .global indirect_func + .type indirect_func, @gnu_indirect_function + .set indirect_func, ifunc_resolver + + .global normal_func +normal_func: + mov $0x34, %eax + ret + +# Test that the address of the indirect function is equal even when it is +# defined in another object file. +# rtdyld-check: decode_operand(test_indirect_func_address_1, 4) + next_pc(test_indirect_func_address_1) = decode_operand(test_indirect_func_address_3, 4) + next_pc(test_indirect_func_address_3) +test_indirect_func_address_3: + lea indirect_func(%rip), %rax diff --git a/llvm/test/Feature/OperandBundles/function-attrs.ll b/llvm/test/Feature/OperandBundles/function-attrs.ll index cfb67421ebb09..1db14cc7b5383 100644 --- a/llvm/test/Feature/OperandBundles/function-attrs.ll +++ b/llvm/test/Feature/OperandBundles/function-attrs.ll @@ -43,8 +43,8 @@ define void @test_3(i32* %x) { ret void } -; CHECK: attributes #0 = { nofree readonly } -; CHECK: attributes #1 = { nofree nosync readnone } -; CHECK: attributes #2 = { writeonly } +; CHECK: attributes #0 = { nofree memory(read) } +; CHECK: attributes #1 = { nofree nosync memory(none) } +; CHECK: attributes #2 = { memory(write) } ; CHECK: attributes #3 = { nofree } ; CHECK: attributes #4 = { nofree nosync } diff --git a/llvm/test/Feature/intrinsics.ll b/llvm/test/Feature/intrinsics.ll index e7078c4a952ba..bd2c469395ae3 100644 --- a/llvm/test/Feature/intrinsics.ll +++ b/llvm/test/Feature/intrinsics.ll @@ -69,5 +69,5 @@ define void @trap() { ret void } -; CHECK: attributes #0 = { nocallback nofree nosync nounwind readnone speculatable willreturn } +; CHECK: attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; CHECK: attributes #1 = { cold noreturn nounwind } diff --git a/llvm/test/Instrumentation/AddressSanitizer/global_metadata.ll b/llvm/test/Instrumentation/AddressSanitizer/global_metadata.ll index 068c079151dda..47df1d43549ce 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/global_metadata.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/global_metadata.ll @@ -16,8 +16,8 @@ target triple = "x86_64-unknown-linux-gnu" ; Check that globals were instrumented: -; CHECK: @global = global { i32, [28 x i8] } zeroinitializer, align 32 -; CHECK: @.str = internal constant { [14 x i8], [18 x i8] } { [14 x i8] c"Hello, world!\00", [18 x i8] zeroinitializer }, align 32 +; CHECK: @global = global { i32, [28 x i8] } zeroinitializer, comdat, align 32 +; CHECK: @.str = internal constant { [14 x i8], [18 x i8] } { [14 x i8] c"Hello, world!\00", [18 x i8] zeroinitializer }, comdat({{.*}}), align 32 ; Check emitted location descriptions: ; CHECK: [[VARNAME:@___asan_gen_.[0-9]+]] = private unnamed_addr constant [7 x i8] c"global\00", align 1 diff --git a/llvm/test/Instrumentation/AddressSanitizer/global_with_comdat.ll b/llvm/test/Instrumentation/AddressSanitizer/global_with_comdat.ll index 5831b81370680..47bb1f102e2fc 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/global_with_comdat.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/global_with_comdat.ll @@ -5,9 +5,9 @@ ; enabled as indicator symbols will cause link time odr violations. ; This is to fix PR 47925. ; -; RUN: opt < %s -passes=asan -asan-globals-live-support=1 -S | FileCheck %s --check-prefixes=CHECK,NOCOMDAT +; RUN: opt < %s -passes=asan -asan-globals-live-support=1 -asan-use-odr-indicator=0 -S | FileCheck %s --check-prefixes=CHECK,NOCOMDAT ; Check that enabling odr indicators enables comdat for globals. -; RUN: opt < %s -passes=asan -asan-globals-live-support=1 -asan-use-odr-indicator=1 -S | FileCheck %s --check-prefixes=CHECK,COMDAT +; RUN: opt < %s -passes=asan -asan-globals-live-support=1 -S | FileCheck %s --check-prefixes=CHECK,COMDAT target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -43,8 +43,8 @@ target triple = "x86_64-unknown-linux-gnu" ; Check emitted location descriptions: ; CHECK: [[VARNAME:@___asan_gen_.[0-9]+]] = private unnamed_addr constant [7 x i8] c"global\00", align 1 -; COMDAT: @__asan_global_global = {{.*}}i64 ptrtoint (ptr @global to i64){{.*}} section "asan_globals"{{.*}}, !associated -; COMDAT: @__asan_global_.str = {{.*}}i64 ptrtoint (ptr @{{.str|1}} to i64){{.*}} section "asan_globals"{{.*}}, !associated +; COMDAT: @__asan_global_global = {{.*}}i64 ptrtoint (ptr @__odr_asan_gen_global to i64){{.*}} section "asan_globals"{{.*}}, comdat($global), !associated +; COMDAT: @__asan_global_.str = {{.*}}i64 ptrtoint (ptr @___asan_gen_ to i64){{.*}} section "asan_globals"{{.*}}, comdat($.str.{{.*}}), !associated ; The metadata has to be inserted to llvm.compiler.used to avoid being stripped ; during LTO. diff --git a/llvm/test/Instrumentation/AddressSanitizer/local_alias.ll b/llvm/test/Instrumentation/AddressSanitizer/local_alias.ll index 88525ae98a6a3..b772d5c76167c 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/local_alias.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/local_alias.ll @@ -1,5 +1,5 @@ ; Defaults -; RUN: opt < %s -passes=asan -S | FileCheck %s --check-prefixes=CHECK-NOALIAS,CHECK-NOINDICATOR +; RUN: opt < %s -passes=asan -S | FileCheck %s --check-prefixes=CHECK-ALIAS,CHECK-INDICATOR ; {newPM,legacyPM} x {alias0,alias1} x {odr0,odr1} ; RUN: opt < %s -passes=asan -asan-use-private-alias=0 -asan-use-odr-indicator=0 -S | FileCheck %s --check-prefixes=CHECK-NOALIAS,CHECK-NOINDICATOR diff --git a/llvm/test/Instrumentation/AddressSanitizer/odr-check-ignore.ll b/llvm/test/Instrumentation/AddressSanitizer/odr-check-ignore.ll index e5dbb6b17b750..facf6eca2d6a4 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/odr-check-ignore.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/odr-check-ignore.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -opaque-pointers -passes=asan -asan-use-private-alias=0 -S | FileCheck %s --check-prefix=NOALIAS -; RUN: opt < %s -opaque-pointers -passes=asan -asan-use-private-alias=1 -S | FileCheck %s --check-prefix=ALIAS +; RUN: opt < %s -opaque-pointers -passes=asan -asan-use-odr-indicator=0 -asan-use-private-alias=0 -S | FileCheck %s --check-prefix=NOALIAS +; RUN: opt < %s -opaque-pointers -passes=asan -asan-use-odr-indicator=0 -asan-use-private-alias=1 -S | FileCheck %s --check-prefix=ALIAS target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Instrumentation/AddressSanitizer/win-string-literal.ll b/llvm/test/Instrumentation/AddressSanitizer/win-string-literal.ll index 14c2aa891e958..6d1eec29796c5 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/win-string-literal.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/win-string-literal.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -passes=asan -S | FileCheck %s +; RUN: opt < %s -passes=asan -asan-use-odr-indicator=0 -asan-use-private-alias=0 -S | FileCheck %s ; Generated like so: ; $ clang -S -emit-llvm -Xclang -disable-llvm-passes -fsanitize=address -O1 t.cpp -o t.ll diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/basic.ll b/llvm/test/Instrumentation/DataFlowSanitizer/basic.ll index 0c065d6c29b0e..5cfc1f16af8fe 100644 --- a/llvm/test/Instrumentation/DataFlowSanitizer/basic.ll +++ b/llvm/test/Instrumentation/DataFlowSanitizer/basic.ll @@ -35,10 +35,10 @@ define void @store(i8* %p) { ; CHECK: declare void @__dfsan_mem_transfer_callback(i[[#SBITS]]*, i64) ; CHECK: declare void @__dfsan_cmp_callback(i[[#SBITS]]) -; CHECK: ; Function Attrs: nounwind readonly +; CHECK: ; Function Attrs: nounwind memory(read) ; CHECK-NEXT: declare zeroext i[[#SBITS]] @__dfsan_union_load(i[[#SBITS]]*, i64) -; CHECK: ; Function Attrs: nounwind readonly +; CHECK: ; Function Attrs: nounwind memory(read) ; CHECK-NEXT: declare zeroext i64 @__dfsan_load_label_and_origin(i8*, i64) ; CHECK: declare void @__dfsan_unimplemented(i8*) diff --git a/llvm/test/Instrumentation/MemorySanitizer/attributes.ll b/llvm/test/Instrumentation/MemorySanitizer/attributes.ll index 43452f570b28c..c2825ab3fc630 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/attributes.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/attributes.ll @@ -44,11 +44,8 @@ entry: ret void } -; CHECK-NOT: readnone -; CHECK-NOT: readonly -; CHECK-NOT: writeonly -; CHECK-NOT: argmemonly +; CHECK-NOT: memory( ; CHECK-NOT: speculatable -; CHECK: Function Attrs: nocallback nofree nosync nounwind readnone willreturn +; CHECK: Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) ; CHECK-NEXT: declare void @llvm.donothing diff --git a/llvm/test/LTO/Resolution/X86/comdat-mixed-lto.ll b/llvm/test/LTO/Resolution/X86/comdat-mixed-lto.ll index d3730f4e9bcda..844c043f9588f 100644 --- a/llvm/test/LTO/Resolution/X86/comdat-mixed-lto.ll +++ b/llvm/test/LTO/Resolution/X86/comdat-mixed-lto.ll @@ -1,23 +1,36 @@ ; Test of comdat handling with mixed thinlto and regular lto compilation. ; This module is compiled with ThinLTO -; RUN: opt -module-summary -o %t1.o %s +; RUN: opt -opaque-pointers -module-summary -o %t1.o %s ; Input module compiled for regular LTO -; RUN: opt -o %t2.o %p/Inputs/comdat-mixed-lto.ll +; RUN: opt -opaque-pointers -o %t2.o %p/Inputs/comdat-mixed-lto.ll ; The copy of C from this module is prevailing. The copy of C from the ; regular LTO module is not prevailing, and will be dropped to ; available_externally. -; RUN: llvm-lto2 run -r=%t1.o,C,pl -r=%t2.o,C,l -r=%t2.o,testglobfunc,lxp -r=%t1.o,testglobfunc,lx -o %t3 %t1.o %t2.o -save-temps +; RUN: llvm-lto2 run -opaque-pointers -r=%t1.o,C,pl -r=%t2.o,C,l -r=%t1.o,testglobfunc,lxp -r=%t2.o,testglobfunc,lx -o %t3 %t1.o %t2.o -save-temps ; The Input module (regular LTO) is %t3.0. Check to make sure that we removed ; __cxx_global_var_init and testglobfunc from comdat. Also check to ensure ; that testglobfunc was dropped to available_externally. Otherwise we would ; have linker multiply defined errors as it is no longer in a comdat and ; would clash with the copy from this module. -; RUN: llvm-dis %t3.0.0.preopt.bc -o - | FileCheck %s -; CHECK: define internal void @__cxx_global_var_init() section ".text.startup" { -; CHECK: define available_externally dso_local void @testglobfunc() section ".text.startup" { +; RUN: llvm-dis -opaque-pointers %t3.0.0.preopt.bc -o - | FileCheck %s + +; CHECK: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @__cxx_global_var_init, ptr @C }] +; CHECK: @C = available_externally dso_local global %"class.Test::ptr" zeroinitializer, align 4 +; CHECK-NOT: declare +; CHECK: declare dso_local void @__cxx_global_var_init() section ".text.startup" +; CHECK-NOT: declare + +; Check the behavior with the prevailing testglobfunc in %t2.o. +; RUN: llvm-lto2 run -opaque-pointers -r=%t1.o,C,pl -r=%t2.o,C,l -r=%t1.o,testglobfunc,lx -r=%t2.o,testglobfunc,plx -o %t4 %t1.o %t2.o -save-temps +; RUN: llvm-dis -opaque-pointers %t4.0.0.preopt.bc -o - | FileCheck %s --check-prefix=CHECK2 + +; CHECK2: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @__cxx_global_var_init, ptr @C }] +; CHECK2: @C = available_externally dso_local global %"class.Test::ptr" zeroinitializer, align 4 +; CHECK2: declare dso_local void @__cxx_global_var_init() section ".text.startup" +; CHECK2: define available_externally dso_local void @testglobfunc() section ".text.startup" { ; ModuleID = 'comdat-mixed-lto.o' source_filename = "comdat-mixed-lto.cpp" diff --git a/llvm/test/MC/AArch64/SME2/frinta-diagnostics.s b/llvm/test/MC/AArch64/SME2/frinta-diagnostics.s new file mode 100644 index 0000000000000..06f0447414e66 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/frinta-diagnostics.s @@ -0,0 +1,22 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +frinta {z0.s-z1.s}, {z0.s-z2.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: frinta {z0.s-z1.s}, {z0.s-z2.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +frinta {z1.s-z2.s}, {z0.s-z1.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: frinta {z1.s-z2.s}, {z0.s-z1.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +frinta {z0.s-z1.s}, {z2.d-z3.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: frinta {z0.s-z1.s}, {z2.d-z3.d} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/frinta.s b/llvm/test/MC/AArch64/SME2/frinta.s new file mode 100644 index 0000000000000..3970a0e0e2bff --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/frinta.s @@ -0,0 +1,62 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +frinta {z0.s - z1.s}, {z0.s - z1.s} // 11000001-10101100-11100000-00000000 +// CHECK-INST: frinta { z0.s, z1.s }, { z0.s, z1.s } +// CHECK-ENCODING: [0x00,0xe0,0xac,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ace000 + +frinta {z20.s - z21.s}, {z10.s - z11.s} // 11000001-10101100-11100001-01010100 +// CHECK-INST: frinta { z20.s, z21.s }, { z10.s, z11.s } +// CHECK-ENCODING: [0x54,0xe1,0xac,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ace154 + +frinta {z22.s - z23.s}, {z12.s - z13.s} // 11000001-10101100-11100001-10010110 +// CHECK-INST: frinta { z22.s, z23.s }, { z12.s, z13.s } +// CHECK-ENCODING: [0x96,0xe1,0xac,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ace196 + +frinta {z30.s - z31.s}, {z30.s - z31.s} // 11000001-10101100-11100011-11011110 +// CHECK-INST: frinta { z30.s, z31.s }, { z30.s, z31.s } +// CHECK-ENCODING: [0xde,0xe3,0xac,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ace3de + + +frinta {z0.s - z3.s}, {z0.s - z3.s} // 11000001-10111100-11100000-00000000 +// CHECK-INST: frinta { z0.s - z3.s }, { z0.s - z3.s } +// CHECK-ENCODING: [0x00,0xe0,0xbc,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bce000 + +frinta {z20.s - z23.s}, {z8.s - z11.s} // 11000001-10111100-11100001-00010100 +// CHECK-INST: frinta { z20.s - z23.s }, { z8.s - z11.s } +// CHECK-ENCODING: [0x14,0xe1,0xbc,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bce114 + +frinta {z20.s - z23.s}, {z12.s - z15.s} // 11000001-10111100-11100001-10010100 +// CHECK-INST: frinta { z20.s - z23.s }, { z12.s - z15.s } +// CHECK-ENCODING: [0x94,0xe1,0xbc,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bce194 + +frinta {z28.s - z31.s}, {z28.s - z31.s} // 11000001-10111100-11100011-10011100 +// CHECK-INST: frinta { z28.s - z31.s }, { z28.s - z31.s } +// CHECK-ENCODING: [0x9c,0xe3,0xbc,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bce39c diff --git a/llvm/test/MC/AArch64/SME2/frintm-diagnostics.s b/llvm/test/MC/AArch64/SME2/frintm-diagnostics.s new file mode 100644 index 0000000000000..7deb91aef15a9 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/frintm-diagnostics.s @@ -0,0 +1,22 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +frintm {z0.s-z1.s}, {z0.s-z2.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: frintm {z0.s-z1.s}, {z0.s-z2.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +frintm {z1.s-z2.s}, {z0.s-z1.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: frintm {z1.s-z2.s}, {z0.s-z1.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +frintm {z0.s-z1.s}, {z2.d-z3.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: frintm {z0.s-z1.s}, {z2.d-z3.d} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/frintm.s b/llvm/test/MC/AArch64/SME2/frintm.s new file mode 100644 index 0000000000000..ff294dfef3faa --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/frintm.s @@ -0,0 +1,63 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +frintm {z0.s - z1.s}, {z0.s - z1.s} // 11000001-10101010-11100000-00000000 +// CHECK-INST: frintm { z0.s, z1.s }, { z0.s, z1.s } +// CHECK-ENCODING: [0x00,0xe0,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aae000 + +frintm {z20.s - z21.s}, {z10.s - z11.s} // 11000001-10101010-11100001-01010100 +// CHECK-INST: frintm { z20.s, z21.s }, { z10.s, z11.s } +// CHECK-ENCODING: [0x54,0xe1,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aae154 + +frintm {z22.s - z23.s}, {z12.s - z13.s} // 11000001-10101010-11100001-10010110 +// CHECK-INST: frintm { z22.s, z23.s }, { z12.s, z13.s } +// CHECK-ENCODING: [0x96,0xe1,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aae196 + +frintm {z30.s - z31.s}, {z30.s - z31.s} // 11000001-10101010-11100011-11011110 +// CHECK-INST: frintm { z30.s, z31.s }, { z30.s, z31.s } +// CHECK-ENCODING: [0xde,0xe3,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aae3de + + +frintm {z0.s - z3.s}, {z0.s - z3.s} // 11000001-10111010-11100000-00000000 +// CHECK-INST: frintm { z0.s - z3.s }, { z0.s - z3.s } +// CHECK-ENCODING: [0x00,0xe0,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bae000 + +frintm {z20.s - z23.s}, {z8.s - z11.s} // 11000001-10111010-11100001-00010100 +// CHECK-INST: frintm { z20.s - z23.s }, { z8.s - z11.s } +// CHECK-ENCODING: [0x14,0xe1,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bae114 + +frintm {z20.s - z23.s}, {z12.s - z15.s} // 11000001-10111010-11100001-10010100 +// CHECK-INST: frintm { z20.s - z23.s }, { z12.s - z15.s } +// CHECK-ENCODING: [0x94,0xe1,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bae194 + +frintm {z28.s - z31.s}, {z28.s - z31.s} // 11000001-10111010-11100011-10011100 +// CHECK-INST: frintm { z28.s - z31.s }, { z28.s - z31.s } +// CHECK-ENCODING: [0x9c,0xe3,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bae39c + diff --git a/llvm/test/MC/AArch64/SME2/frintn-diagnostics.s b/llvm/test/MC/AArch64/SME2/frintn-diagnostics.s new file mode 100644 index 0000000000000..2fe00351df8f0 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/frintn-diagnostics.s @@ -0,0 +1,22 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +frintn {z0.s-z1.s}, {z0.s-z2.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: frintn {z0.s-z1.s}, {z0.s-z2.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +frintn {z1.s-z2.s}, {z0.s-z1.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: frintn {z1.s-z2.s}, {z0.s-z1.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +frintn {z0.s-z1.s}, {z2.d-z3.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: frintn {z0.s-z1.s}, {z2.d-z3.d} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/frintn.s b/llvm/test/MC/AArch64/SME2/frintn.s new file mode 100644 index 0000000000000..2d85b4ca878c6 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/frintn.s @@ -0,0 +1,63 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +frintn {z0.s - z1.s}, {z0.s - z1.s} // 11000001-10101000-11100000-00000000 +// CHECK-INST: frintn { z0.s, z1.s }, { z0.s, z1.s } +// CHECK-ENCODING: [0x00,0xe0,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8e000 + +frintn {z20.s - z21.s}, {z10.s - z11.s} // 11000001-10101000-11100001-01010100 +// CHECK-INST: frintn { z20.s, z21.s }, { z10.s, z11.s } +// CHECK-ENCODING: [0x54,0xe1,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8e154 + +frintn {z22.s - z23.s}, {z12.s - z13.s} // 11000001-10101000-11100001-10010110 +// CHECK-INST: frintn { z22.s, z23.s }, { z12.s, z13.s } +// CHECK-ENCODING: [0x96,0xe1,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8e196 + +frintn {z30.s - z31.s}, {z30.s - z31.s} // 11000001-10101000-11100011-11011110 +// CHECK-INST: frintn { z30.s, z31.s }, { z30.s, z31.s } +// CHECK-ENCODING: [0xde,0xe3,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8e3de + + +frintn {z0.s - z3.s}, {z0.s - z3.s} // 11000001-10111000-11100000-00000000 +// CHECK-INST: frintn { z0.s - z3.s }, { z0.s - z3.s } +// CHECK-ENCODING: [0x00,0xe0,0xb8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b8e000 + +frintn {z20.s - z23.s}, {z8.s - z11.s} // 11000001-10111000-11100001-00010100 +// CHECK-INST: frintn { z20.s - z23.s }, { z8.s - z11.s } +// CHECK-ENCODING: [0x14,0xe1,0xb8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b8e114 + +frintn {z20.s - z23.s}, {z12.s - z15.s} // 11000001-10111000-11100001-10010100 +// CHECK-INST: frintn { z20.s - z23.s }, { z12.s - z15.s } +// CHECK-ENCODING: [0x94,0xe1,0xb8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b8e194 + +frintn {z28.s - z31.s}, {z28.s - z31.s} // 11000001-10111000-11100011-10011100 +// CHECK-INST: frintn { z28.s - z31.s }, { z28.s - z31.s } +// CHECK-ENCODING: [0x9c,0xe3,0xb8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b8e39c + diff --git a/llvm/test/MC/AArch64/SME2/frintp-diagnostics.s b/llvm/test/MC/AArch64/SME2/frintp-diagnostics.s new file mode 100644 index 0000000000000..7013df058690c --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/frintp-diagnostics.s @@ -0,0 +1,22 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +frintp {z0.s-z1.s}, {z0.s-z2.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: frintp {z0.s-z1.s}, {z0.s-z2.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +frintp {z1.s-z2.s}, {z0.s-z1.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: frintp {z1.s-z2.s}, {z0.s-z1.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +frintp {z0.s-z1.s}, {z2.d-z3.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: frintp {z0.s-z1.s}, {z2.d-z3.d} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/frintp.s b/llvm/test/MC/AArch64/SME2/frintp.s new file mode 100644 index 0000000000000..600f26063ca5e --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/frintp.s @@ -0,0 +1,62 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +frintp {z0.s - z1.s}, {z0.s - z1.s} // 11000001-10101001-11100000-00000000 +// CHECK-INST: frintp { z0.s, z1.s }, { z0.s, z1.s } +// CHECK-ENCODING: [0x00,0xe0,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a9e000 + +frintp {z20.s - z21.s}, {z10.s - z11.s} // 11000001-10101001-11100001-01010100 +// CHECK-INST: frintp { z20.s, z21.s }, { z10.s, z11.s } +// CHECK-ENCODING: [0x54,0xe1,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a9e154 + +frintp {z22.s - z23.s}, {z12.s - z13.s} // 11000001-10101001-11100001-10010110 +// CHECK-INST: frintp { z22.s, z23.s }, { z12.s, z13.s } +// CHECK-ENCODING: [0x96,0xe1,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a9e196 + +frintp {z30.s - z31.s}, {z30.s - z31.s} // 11000001-10101001-11100011-11011110 +// CHECK-INST: frintp { z30.s, z31.s }, { z30.s, z31.s } +// CHECK-ENCODING: [0xde,0xe3,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a9e3de + + +frintp {z0.s - z3.s}, {z0.s - z3.s} // 11000001-10111001-11100000-00000000 +// CHECK-INST: frintp { z0.s - z3.s }, { z0.s - z3.s } +// CHECK-ENCODING: [0x00,0xe0,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b9e000 + +frintp {z20.s - z23.s}, {z8.s - z11.s} // 11000001-10111001-11100001-00010100 +// CHECK-INST: frintp { z20.s - z23.s }, { z8.s - z11.s } +// CHECK-ENCODING: [0x14,0xe1,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b9e114 + +frintp {z20.s - z23.s}, {z12.s - z15.s} // 11000001-10111001-11100001-10010100 +// CHECK-INST: frintp { z20.s - z23.s }, { z12.s - z15.s } +// CHECK-ENCODING: [0x94,0xe1,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b9e194 + +frintp {z28.s - z31.s}, {z28.s - z31.s} // 11000001-10111001-11100011-10011100 +// CHECK-INST: frintp { z28.s - z31.s }, { z28.s - z31.s } +// CHECK-ENCODING: [0x9c,0xe3,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b9e39c diff --git a/llvm/test/MC/AArch64/SME2/ldr-diagnostics.s b/llvm/test/MC/AArch64/SME2/ldr-diagnostics.s new file mode 100644 index 0000000000000..29c19316529fc --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/ldr-diagnostics.s @@ -0,0 +1,6 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s| FileCheck %s + +ldr zt1, [x0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid lookup table, expected zt0 +// CHECK-NEXT: ldr zt1, [x0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/ldr.s b/llvm/test/MC/AArch64/SME2/ldr.s new file mode 100644 index 0000000000000..4038b39ad3f92 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/ldr.s @@ -0,0 +1,38 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +ldr zt0, [x0] // 11100001-00011111-10000000-00000000 +// CHECK-INST: ldr zt0, [x0] +// CHECK-ENCODING: [0x00,0x80,0x1f,0xe1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: e11f8000 + +ldr zt0, [x10] // 11100001-00011111-10000001-01000000 +// CHECK-INST: ldr zt0, [x10] +// CHECK-ENCODING: [0x40,0x81,0x1f,0xe1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: e11f8140 + +ldr zt0, [x13] // 11100001-00011111-10000001-10100000 +// CHECK-INST: ldr zt0, [x13] +// CHECK-ENCODING: [0xa0,0x81,0x1f,0xe1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: e11f81a0 + +ldr zt0, [sp] // 11100001-00011111-10000011-11100000 +// CHECK-INST: ldr zt0, [sp] +// CHECK-ENCODING: [0xe0,0x83,0x1f,0xe1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: e11f83e0 + diff --git a/llvm/test/MC/AArch64/SME2/luti2-diagnostics.s b/llvm/test/MC/AArch64/SME2/luti2-diagnostics.s new file mode 100644 index 0000000000000..730f3e7532335 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/luti2-diagnostics.s @@ -0,0 +1,60 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid lane indices + +luti2 z0.h, zt0, z0[16] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 15]. +// CHECK-NEXT: luti2 z0.h, zt0, z0[16] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti2 z0.s, zt0, z0[-1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 15]. +// CHECK-NEXT: luti2 z0.s, zt0, z0[-1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti2 {z0.b-z1.b}, zt0, z0[8] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7]. +// CHECK-NEXT: luti2 {z0.b-z1.b}, zt0, z0[8] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti2 {z0.h-z1.h}, zt0, z0[-1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7]. +// CHECK-NEXT: luti2 {z0.h-z1.h}, zt0, z0[-1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti2 {z0.s-z3.s}, zt0, z0[4] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]. +// CHECK-NEXT: luti2 {z0.s-z3.s}, zt0, z0[4] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti2 {z0.b-z3.b}, zt0, z0[-1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]. +// CHECK-NEXT: luti2 {z0.b-z3.b}, zt0, z0[-1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector lists + +luti2 {z0.h-z2.h}, zt0, z0[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: luti2 {z0.h-z2.h}, zt0, z0[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti2 {z1.s-z2.s}, zt0, z0[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: luti2 {z1.s-z2.s}, zt0, z0[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti2 {z1.s-z4.s}, zt0, z0[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: luti2 {z1.s-z4.s}, zt0, z0[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector suffix + +luti2 {z0.d-z1.d}, zt0, z0[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: luti2 {z0.d-z1.d}, zt0, z0[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/luti2.s b/llvm/test/MC/AArch64/SME2/luti2.s new file mode 100644 index 0000000000000..c622ed0a4285c --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/luti2.s @@ -0,0 +1,238 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +luti2 z0.h, zt0, z0[0] // 11000000-11001100-00010000-00000000 +// CHECK-INST: luti2 z0.h, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x10,0xcc,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cc1000 + +luti2 z21.h, zt0, z10[5] // 11000000-11001101-01010001-01010101 +// CHECK-INST: luti2 z21.h, zt0, z10[5] +// CHECK-ENCODING: [0x55,0x51,0xcd,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cd5155 + +luti2 z23.h, zt0, z13[3] // 11000000-11001100-11010001-10110111 +// CHECK-INST: luti2 z23.h, zt0, z13[3] +// CHECK-ENCODING: [0xb7,0xd1,0xcc,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0ccd1b7 + +luti2 z31.h, zt0, z31[15] // 11000000-11001111-11010011-11111111 +// CHECK-INST: luti2 z31.h, zt0, z31[15] +// CHECK-ENCODING: [0xff,0xd3,0xcf,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cfd3ff + + +luti2 z0.s, zt0, z0[0] // 11000000-11001100-00100000-00000000 +// CHECK-INST: luti2 z0.s, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x20,0xcc,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cc2000 + +luti2 z21.s, zt0, z10[5] // 11000000-11001101-01100001-01010101 +// CHECK-INST: luti2 z21.s, zt0, z10[5] +// CHECK-ENCODING: [0x55,0x61,0xcd,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cd6155 + +luti2 z23.s, zt0, z13[3] // 11000000-11001100-11100001-10110111 +// CHECK-INST: luti2 z23.s, zt0, z13[3] +// CHECK-ENCODING: [0xb7,0xe1,0xcc,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cce1b7 + +luti2 z31.s, zt0, z31[15] // 11000000-11001111-11100011-11111111 +// CHECK-INST: luti2 z31.s, zt0, z31[15] +// CHECK-ENCODING: [0xff,0xe3,0xcf,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cfe3ff + + +luti2 z0.b, zt0, z0[0] // 11000000-11001100-00000000-00000000 +// CHECK-INST: luti2 z0.b, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x00,0xcc,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cc0000 + +luti2 z21.b, zt0, z10[5] // 11000000-11001101-01000001-01010101 +// CHECK-INST: luti2 z21.b, zt0, z10[5] +// CHECK-ENCODING: [0x55,0x41,0xcd,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cd4155 + +luti2 z23.b, zt0, z13[3] // 11000000-11001100-11000001-10110111 +// CHECK-INST: luti2 z23.b, zt0, z13[3] +// CHECK-ENCODING: [0xb7,0xc1,0xcc,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0ccc1b7 + +luti2 z31.b, zt0, z31[15] // 11000000-11001111-11000011-11111111 +// CHECK-INST: luti2 z31.b, zt0, z31[15] +// CHECK-ENCODING: [0xff,0xc3,0xcf,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cfc3ff + + +luti2 {z0.h - z1.h}, zt0, z0[0] // 11000000-10001100-01010000-00000000 +// CHECK-INST: luti2 { z0.h, z1.h }, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x50,0x8c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08c5000 + +luti2 {z20.h - z21.h}, zt0, z10[2] // 11000000-10001101-01010001-01010100 +// CHECK-INST: luti2 { z20.h, z21.h }, zt0, z10[2] +// CHECK-ENCODING: [0x54,0x51,0x8d,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08d5154 + +luti2 {z22.h - z23.h}, zt0, z13[1] // 11000000-10001100-11010001-10110110 +// CHECK-INST: luti2 { z22.h, z23.h }, zt0, z13[1] +// CHECK-ENCODING: [0xb6,0xd1,0x8c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08cd1b6 + +luti2 {z30.h - z31.h}, zt0, z31[7] // 11000000-10001111-11010011-11111110 +// CHECK-INST: luti2 { z30.h, z31.h }, zt0, z31[7] +// CHECK-ENCODING: [0xfe,0xd3,0x8f,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08fd3fe + + +luti2 {z0.s - z1.s}, zt0, z0[0] // 11000000-10001100-01100000-00000000 +// CHECK-INST: luti2 { z0.s, z1.s }, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x60,0x8c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08c6000 + +luti2 {z20.s - z21.s}, zt0, z10[2] // 11000000-10001101-01100001-01010100 +// CHECK-INST: luti2 { z20.s, z21.s }, zt0, z10[2] +// CHECK-ENCODING: [0x54,0x61,0x8d,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08d6154 + +luti2 {z22.s - z23.s}, zt0, z13[1] // 11000000-10001100-11100001-10110110 +// CHECK-INST: luti2 { z22.s, z23.s }, zt0, z13[1] +// CHECK-ENCODING: [0xb6,0xe1,0x8c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08ce1b6 + +luti2 {z30.s - z31.s}, zt0, z31[7] // 11000000-10001111-11100011-11111110 +// CHECK-INST: luti2 { z30.s, z31.s }, zt0, z31[7] +// CHECK-ENCODING: [0xfe,0xe3,0x8f,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08fe3fe + + +luti2 {z0.b - z1.b}, zt0, z0[0] // 11000000-10001100-01000000-00000000 +// CHECK-INST: luti2 { z0.b, z1.b }, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x40,0x8c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08c4000 + +luti2 {z20.b - z21.b}, zt0, z10[2] // 11000000-10001101-01000001-01010100 +// CHECK-INST: luti2 { z20.b, z21.b }, zt0, z10[2] +// CHECK-ENCODING: [0x54,0x41,0x8d,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08d4154 + +luti2 {z22.b - z23.b}, zt0, z13[1] // 11000000-10001100-11000001-10110110 +// CHECK-INST: luti2 { z22.b, z23.b }, zt0, z13[1] +// CHECK-ENCODING: [0xb6,0xc1,0x8c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08cc1b6 + +luti2 {z30.b - z31.b}, zt0, z31[7] // 11000000-10001111-11000011-11111110 +// CHECK-INST: luti2 { z30.b, z31.b }, zt0, z31[7] +// CHECK-ENCODING: [0xfe,0xc3,0x8f,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08fc3fe + + +luti2 {z0.h - z3.h}, zt0, z0[0] // 11000000-10001100-10010000-00000000 +// CHECK-INST: luti2 { z0.h - z3.h }, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x90,0x8c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08c9000 + +luti2 {z20.h - z23.h}, zt0, z10[1] // 11000000-10001101-10010001-01010100 +// CHECK-INST: luti2 { z20.h - z23.h }, zt0, z10[1] +// CHECK-ENCODING: [0x54,0x91,0x8d,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08d9154 + +luti2 {z20.h - z23.h}, zt0, z13[0] // 11000000-10001100-10010001-10110100 +// CHECK-INST: luti2 { z20.h - z23.h }, zt0, z13[0] +// CHECK-ENCODING: [0xb4,0x91,0x8c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08c91b4 + +luti2 {z28.h - z31.h}, zt0, z31[3] // 11000000-10001111-10010011-11111100 +// CHECK-INST: luti2 { z28.h - z31.h }, zt0, z31[3] +// CHECK-ENCODING: [0xfc,0x93,0x8f,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08f93fc + + +luti2 {z0.s - z3.s}, zt0, z0[0] // 11000000-10001100-10100000-00000000 +// CHECK-INST: luti2 { z0.s - z3.s }, zt0, z0[0] +// CHECK-ENCODING: [0x00,0xa0,0x8c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08ca000 + +luti2 {z20.s - z23.s}, zt0, z10[1] // 11000000-10001101-10100001-01010100 +// CHECK-INST: luti2 { z20.s - z23.s }, zt0, z10[1] +// CHECK-ENCODING: [0x54,0xa1,0x8d,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08da154 + +luti2 {z20.s - z23.s}, zt0, z13[0] // 11000000-10001100-10100001-10110100 +// CHECK-INST: luti2 { z20.s - z23.s }, zt0, z13[0] +// CHECK-ENCODING: [0xb4,0xa1,0x8c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08ca1b4 + +luti2 {z28.s - z31.s}, zt0, z31[3] // 11000000-10001111-10100011-11111100 +// CHECK-INST: luti2 { z28.s - z31.s }, zt0, z31[3] +// CHECK-ENCODING: [0xfc,0xa3,0x8f,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08fa3fc + + +luti2 {z0.b - z3.b}, zt0, z0[0] // 11000000-10001100-10000000-00000000 +// CHECK-INST: luti2 { z0.b - z3.b }, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x80,0x8c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08c8000 + +luti2 {z20.b - z23.b}, zt0, z10[1] // 11000000-10001101-10000001-01010100 +// CHECK-INST: luti2 { z20.b - z23.b }, zt0, z10[1] +// CHECK-ENCODING: [0x54,0x81,0x8d,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08d8154 + +luti2 {z20.b - z23.b}, zt0, z13[0] // 11000000-10001100-10000001-10110100 +// CHECK-INST: luti2 { z20.b - z23.b }, zt0, z13[0] +// CHECK-ENCODING: [0xb4,0x81,0x8c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08c81b4 + +luti2 {z28.b - z31.b}, zt0, z31[3] // 11000000-10001111-10000011-11111100 +// CHECK-INST: luti2 { z28.b - z31.b }, zt0, z31[3] +// CHECK-ENCODING: [0xfc,0x83,0x8f,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08f83fc + diff --git a/llvm/test/MC/AArch64/SME2/luti4-diagnostics.s b/llvm/test/MC/AArch64/SME2/luti4-diagnostics.s new file mode 100644 index 0000000000000..7a8590d0bac29 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/luti4-diagnostics.s @@ -0,0 +1,60 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid lane indices + +luti4 z0.h, zt0, z0[8] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7]. +// CHECK-NEXT: luti4 z0.h, zt0, z0[8] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti4 z0.s, zt0, z0[-1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7]. +// CHECK-NEXT: luti4 z0.s, zt0, z0[-1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti4 {z0.b-z1.b}, zt0, z0[4] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]. +// CHECK-NEXT: luti4 {z0.b-z1.b}, zt0, z0[4] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti4 {z0.h-z1.h}, zt0, z0[-1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]. +// CHECK-NEXT: luti4 {z0.h-z1.h}, zt0, z0[-1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti4 {z0.s-z3.s}, zt0, z0[2] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 1]. +// CHECK-NEXT: luti4 {z0.s-z3.s}, zt0, z0[2] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti4 {z0.h-z3.h}, zt0, z0[-1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 1]. +// CHECK-NEXT: luti4 {z0.h-z3.h}, zt0, z0[-1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector lists + +luti4 {z0.h-z2.h}, zt0, z0[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: luti4 {z0.h-z2.h}, zt0, z0[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti4 {z1.s-z2.s}, zt0, z0[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: luti4 {z1.s-z2.s}, zt0, z0[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti4 {z1.s-z4.s}, zt0, z0[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: luti4 {z1.s-z4.s}, zt0, z0[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector suffix + +luti4 {z0.d-z1.d}, zt0, z0[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: luti4 {z0.d-z1.d}, zt0, z0[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/luti4.s b/llvm/test/MC/AArch64/SME2/luti4.s new file mode 100644 index 0000000000000..c784d823eba45 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/luti4.s @@ -0,0 +1,213 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +luti4 z0.h, zt0, z0[0] // 11000000-11001010-00010000-00000000 +// CHECK-INST: luti4 z0.h, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x10,0xca,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0ca1000 + +luti4 z21.h, zt0, z10[5] // 11000000-11001011-01010001-01010101 +// CHECK-INST: luti4 z21.h, zt0, z10[5] +// CHECK-ENCODING: [0x55,0x51,0xcb,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cb5155 + +luti4 z23.h, zt0, z13[3] // 11000000-11001010-11010001-10110111 +// CHECK-INST: luti4 z23.h, zt0, z13[3] +// CHECK-ENCODING: [0xb7,0xd1,0xca,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cad1b7 + +luti4 z31.h, zt0, z31[7] // 11000000-11001011-11010011-11111111 +// CHECK-INST: luti4 z31.h, zt0, z31[7] +// CHECK-ENCODING: [0xff,0xd3,0xcb,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cbd3ff + + +luti4 z0.s, zt0, z0[0] // 11000000-11001010-00100000-00000000 +// CHECK-INST: luti4 z0.s, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x20,0xca,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0ca2000 + +luti4 z21.s, zt0, z10[5] // 11000000-11001011-01100001-01010101 +// CHECK-INST: luti4 z21.s, zt0, z10[5] +// CHECK-ENCODING: [0x55,0x61,0xcb,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cb6155 + +luti4 z23.s, zt0, z13[3] // 11000000-11001010-11100001-10110111 +// CHECK-INST: luti4 z23.s, zt0, z13[3] +// CHECK-ENCODING: [0xb7,0xe1,0xca,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cae1b7 + +luti4 z31.s, zt0, z31[7] // 11000000-11001011-11100011-11111111 +// CHECK-INST: luti4 z31.s, zt0, z31[7] +// CHECK-ENCODING: [0xff,0xe3,0xcb,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cbe3ff + + +luti4 z0.b, zt0, z0[0] // 11000000-11001010-00000000-00000000 +// CHECK-INST: luti4 z0.b, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x00,0xca,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0ca0000 + +luti4 z21.b, zt0, z10[5] // 11000000-11001011-01000001-01010101 +// CHECK-INST: luti4 z21.b, zt0, z10[5] +// CHECK-ENCODING: [0x55,0x41,0xcb,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cb4155 + +luti4 z23.b, zt0, z13[3] // 11000000-11001010-11000001-10110111 +// CHECK-INST: luti4 z23.b, zt0, z13[3] +// CHECK-ENCODING: [0xb7,0xc1,0xca,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cac1b7 + +luti4 z31.b, zt0, z31[7] // 11000000-11001011-11000011-11111111 +// CHECK-INST: luti4 z31.b, zt0, z31[7] +// CHECK-ENCODING: [0xff,0xc3,0xcb,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cbc3ff + + +luti4 {z0.h - z1.h}, zt0, z0[0] // 11000000-10001010-01010000-00000000 +// CHECK-INST: luti4 { z0.h, z1.h }, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x50,0x8a,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08a5000 + +luti4 {z20.h - z21.h}, zt0, z10[2] // 11000000-10001011-01010001-01010100 +// CHECK-INST: luti4 { z20.h, z21.h }, zt0, z10[2] +// CHECK-ENCODING: [0x54,0x51,0x8b,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08b5154 + +luti4 {z22.h - z23.h}, zt0, z13[1] // 11000000-10001010-11010001-10110110 +// CHECK-INST: luti4 { z22.h, z23.h }, zt0, z13[1] +// CHECK-ENCODING: [0xb6,0xd1,0x8a,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08ad1b6 + +luti4 {z30.h - z31.h}, zt0, z31[3] // 11000000-10001011-11010011-11111110 +// CHECK-INST: luti4 { z30.h, z31.h }, zt0, z31[3] +// CHECK-ENCODING: [0xfe,0xd3,0x8b,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08bd3fe + + +luti4 {z0.s - z1.s}, zt0, z0[0] // 11000000-10001010-01100000-00000000 +// CHECK-INST: luti4 { z0.s, z1.s }, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x60,0x8a,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08a6000 + +luti4 {z20.s - z21.s}, zt0, z10[2] // 11000000-10001011-01100001-01010100 +// CHECK-INST: luti4 { z20.s, z21.s }, zt0, z10[2] +// CHECK-ENCODING: [0x54,0x61,0x8b,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08b6154 + +luti4 {z22.s - z23.s}, zt0, z13[1] // 11000000-10001010-11100001-10110110 +// CHECK-INST: luti4 { z22.s, z23.s }, zt0, z13[1] +// CHECK-ENCODING: [0xb6,0xe1,0x8a,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08ae1b6 + +luti4 {z30.s - z31.s}, zt0, z31[3] // 11000000-10001011-11100011-11111110 +// CHECK-INST: luti4 { z30.s, z31.s }, zt0, z31[3] +// CHECK-ENCODING: [0xfe,0xe3,0x8b,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08be3fe + + +luti4 {z0.b - z1.b}, zt0, z0[0] // 11000000-10001010-01000000-00000000 +// CHECK-INST: luti4 { z0.b, z1.b }, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x40,0x8a,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08a4000 + +luti4 {z20.b - z21.b}, zt0, z10[2] // 11000000-10001011-01000001-01010100 +// CHECK-INST: luti4 { z20.b, z21.b }, zt0, z10[2] +// CHECK-ENCODING: [0x54,0x41,0x8b,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08b4154 + +luti4 {z22.b - z23.b}, zt0, z13[1] // 11000000-10001010-11000001-10110110 +// CHECK-INST: luti4 { z22.b, z23.b }, zt0, z13[1] +// CHECK-ENCODING: [0xb6,0xc1,0x8a,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08ac1b6 + +luti4 {z30.b - z31.b}, zt0, z31[3] // 11000000-10001011-11000011-11111110 +// CHECK-INST: luti4 { z30.b, z31.b }, zt0, z31[3] +// CHECK-ENCODING: [0xfe,0xc3,0x8b,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08bc3fe + + +luti4 {z0.h - z3.h}, zt0, z0[0] // 11000000-10001010-10010000-00000000 +// CHECK-INST: luti4 { z0.h - z3.h }, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x90,0x8a,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08a9000 + +luti4 {z20.h - z23.h}, zt0, z10[1] // 11000000-10001011-10010001-01010100 +// CHECK-INST: luti4 { z20.h - z23.h }, zt0, z10[1] +// CHECK-ENCODING: [0x54,0x91,0x8b,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08b9154 + +luti4 {z20.h - z23.h}, zt0, z13[0] // 11000000-10001010-10010001-10110100 +// CHECK-INST: luti4 { z20.h - z23.h }, zt0, z13[0] +// CHECK-ENCODING: [0xb4,0x91,0x8a,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08a91b4 + +luti4 {z28.h - z31.h}, zt0, z31[1] // 11000000-10001011-10010011-11111100 +// CHECK-INST: luti4 { z28.h - z31.h }, zt0, z31[1] +// CHECK-ENCODING: [0xfc,0x93,0x8b,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08b93fc + + +luti4 {z0.s - z3.s}, zt0, z0[0] // 11000000-10001010-10100000-00000000 +// CHECK-INST: luti4 { z0.s - z3.s }, zt0, z0[0] +// CHECK-ENCODING: [0x00,0xa0,0x8a,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08aa000 + +luti4 {z20.s - z23.s}, zt0, z10[1] // 11000000-10001011-10100001-01010100 +// CHECK-INST: luti4 { z20.s - z23.s }, zt0, z10[1] +// CHECK-ENCODING: [0x54,0xa1,0x8b,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08ba154 + +luti4 {z20.s - z23.s}, zt0, z13[0] // 11000000-10001010-10100001-10110100 +// CHECK-INST: luti4 { z20.s - z23.s }, zt0, z13[0] +// CHECK-ENCODING: [0xb4,0xa1,0x8a,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08aa1b4 + +luti4 {z28.s - z31.s}, zt0, z31[1] // 11000000-10001011-10100011-11111100 +// CHECK-INST: luti4 { z28.s - z31.s }, zt0, z31[1] +// CHECK-ENCODING: [0xfc,0xa3,0x8b,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08ba3fc + diff --git a/llvm/test/MC/AArch64/SME2/movt-diagnostics.s b/llvm/test/MC/AArch64/SME2/movt-diagnostics.s new file mode 100644 index 0000000000000..d3696bf114623 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/movt-diagnostics.s @@ -0,0 +1,32 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s| FileCheck %s + +// index must be a multiple of 8 in range [0, 56]. +// --------------------------------------------------------------------------// + +movt x0, zt0[57] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 8 in range [0, 56]. +// CHECK-NEXT: movt x0, zt0[57] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movt x0, zt0[58] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 8 in range [0, 56]. +// CHECK-NEXT: movt x0, zt0[58] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movt x0, zt0[64] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 8 in range [0, 56]. +// CHECK-NEXT: movt x0, zt0[64] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movt x0, zt0[72] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 8 in range [0, 56]. +// CHECK-NEXT: movt x0, zt0[72] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid zt0 register + +movt x0, zt1[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: unexpected token in argument list +// CHECK-NEXT: movt x0, zt1[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/movt.s b/llvm/test/MC/AArch64/SME2/movt.s new file mode 100644 index 0000000000000..a673eceeebed4 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/movt.s @@ -0,0 +1,63 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +movt x0, zt0[0] // 11000000-01001100-00000011-11100000 +// CHECK-INST: movt x0, zt0[0] +// CHECK-ENCODING: [0xe0,0x03,0x4c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c04c03e0 + +movt x21, zt0[40] // 11000000-01001100-01010011-11110101 +// CHECK-INST: movt x21, zt0[40] +// CHECK-ENCODING: [0xf5,0x53,0x4c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c04c53f5 + +movt x23, zt0[48] // 11000000-01001100-01100011-11110111 +// CHECK-INST: movt x23, zt0[48] +// CHECK-ENCODING: [0xf7,0x63,0x4c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c04c63f7 + +movt xzr, zt0[56] // 11000000-01001100-01110011-11111111 +// CHECK-INST: movt xzr, zt0[56] +// CHECK-ENCODING: [0xff,0x73,0x4c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c04c73ff + + +movt zt0[0], x0 // 11000000-01001110-00000011-11100000 +// CHECK-INST: movt zt0[0], x0 +// CHECK-ENCODING: [0xe0,0x03,0x4e,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c04e03e0 + +movt zt0[40], x21 // 11000000-01001110-01010011-11110101 +// CHECK-INST: movt zt0[40], x21 +// CHECK-ENCODING: [0xf5,0x53,0x4e,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c04e53f5 + +movt zt0[48], x23 // 11000000-01001110-01100011-11110111 +// CHECK-INST: movt zt0[48], x23 +// CHECK-ENCODING: [0xf7,0x63,0x4e,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c04e63f7 + +movt zt0[56], xzr // 11000000-01001110-01110011-11111111 +// CHECK-INST: movt zt0[56], xzr +// CHECK-ENCODING: [0xff,0x73,0x4e,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c04e73ff + diff --git a/llvm/test/MC/AArch64/SME2/str-diagnostics.s b/llvm/test/MC/AArch64/SME2/str-diagnostics.s new file mode 100644 index 0000000000000..00659829fe616 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/str-diagnostics.s @@ -0,0 +1,6 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s| FileCheck %s + +str zt, [x0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid lookup table, expected zt0 +// CHECK-NEXT: str zt, [x0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/str.s b/llvm/test/MC/AArch64/SME2/str.s new file mode 100644 index 0000000000000..97abc4ce01cd6 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/str.s @@ -0,0 +1,38 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +str zt0, [x0] // 11100001-00111111-10000000-00000000 +// CHECK-INST: str zt0, [x0] +// CHECK-ENCODING: [0x00,0x80,0x3f,0xe1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: e13f8000 + +str zt0, [x10] // 11100001-00111111-10000001-01000000 +// CHECK-INST: str zt0, [x10] +// CHECK-ENCODING: [0x40,0x81,0x3f,0xe1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: e13f8140 + +str zt0, [x13] // 11100001-00111111-10000001-10100000 +// CHECK-INST: str zt0, [x13] +// CHECK-ENCODING: [0xa0,0x81,0x3f,0xe1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: e13f81a0 + +str zt0, [sp] // 11100001-00111111-10000011-11100000 +// CHECK-INST: str zt0, [sp] +// CHECK-ENCODING: [0xe0,0x83,0x3f,0xe1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: e13f83e0 + diff --git a/llvm/test/MC/AArch64/SME2/sunpk-diagnostics.s b/llvm/test/MC/AArch64/SME2/sunpk-diagnostics.s new file mode 100644 index 0000000000000..4e7ea5c66114c --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/sunpk-diagnostics.s @@ -0,0 +1,32 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +sunpk {z0.h-z2.h}, z0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: sunpk {z0.h-z2.h}, z0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sunpk {z1.s-z2.s}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: sunpk {z1.s-z2.s}, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sunpk {z0.d-z5.d}, {z8.s-z9.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: sunpk {z0.d-z5.d}, {z8.s-z9.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sunpk {z0.s-z3.s}, {z9.h-z11.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: sunpk {z0.s-z3.s}, {z9.h-z11.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +sunpk {z0.s-z3.s}, {z8.s-z9.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: sunpk {z0.s-z3.s}, {z8.s-z9.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/sunpk.s b/llvm/test/MC/AArch64/SME2/sunpk.s new file mode 100644 index 0000000000000..86543aa4b4f22 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/sunpk.s @@ -0,0 +1,163 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +sunpk {z0.h - z1.h}, z0.b // 11000001-01100101-11100000-00000000 +// CHECK-INST: sunpk { z0.h, z1.h }, z0.b +// CHECK-ENCODING: [0x00,0xe0,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165e000 + +sunpk {z20.h - z21.h}, z10.b // 11000001-01100101-11100001-01010100 +// CHECK-INST: sunpk { z20.h, z21.h }, z10.b +// CHECK-ENCODING: [0x54,0xe1,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165e154 + +sunpk {z22.h - z23.h}, z13.b // 11000001-01100101-11100001-10110110 +// CHECK-INST: sunpk { z22.h, z23.h }, z13.b +// CHECK-ENCODING: [0xb6,0xe1,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165e1b6 + +sunpk {z30.h - z31.h}, z31.b // 11000001-01100101-11100011-11111110 +// CHECK-INST: sunpk { z30.h, z31.h }, z31.b +// CHECK-ENCODING: [0xfe,0xe3,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165e3fe + + +sunpk {z0.s - z1.s}, z0.h // 11000001-10100101-11100000-00000000 +// CHECK-INST: sunpk { z0.s, z1.s }, z0.h +// CHECK-ENCODING: [0x00,0xe0,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5e000 + +sunpk {z20.s - z21.s}, z10.h // 11000001-10100101-11100001-01010100 +// CHECK-INST: sunpk { z20.s, z21.s }, z10.h +// CHECK-ENCODING: [0x54,0xe1,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5e154 + +sunpk {z22.s - z23.s}, z13.h // 11000001-10100101-11100001-10110110 +// CHECK-INST: sunpk { z22.s, z23.s }, z13.h +// CHECK-ENCODING: [0xb6,0xe1,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5e1b6 + +sunpk {z30.s - z31.s}, z31.h // 11000001-10100101-11100011-11111110 +// CHECK-INST: sunpk { z30.s, z31.s }, z31.h +// CHECK-ENCODING: [0xfe,0xe3,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5e3fe + + +sunpk {z0.d - z1.d}, z0.s // 11000001-11100101-11100000-00000000 +// CHECK-INST: sunpk { z0.d, z1.d }, z0.s +// CHECK-ENCODING: [0x00,0xe0,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5e000 + +sunpk {z20.d - z21.d}, z10.s // 11000001-11100101-11100001-01010100 +// CHECK-INST: sunpk { z20.d, z21.d }, z10.s +// CHECK-ENCODING: [0x54,0xe1,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5e154 + +sunpk {z22.d - z23.d}, z13.s // 11000001-11100101-11100001-10110110 +// CHECK-INST: sunpk { z22.d, z23.d }, z13.s +// CHECK-ENCODING: [0xb6,0xe1,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5e1b6 + +sunpk {z30.d - z31.d}, z31.s // 11000001-11100101-11100011-11111110 +// CHECK-INST: sunpk { z30.d, z31.d }, z31.s +// CHECK-ENCODING: [0xfe,0xe3,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5e3fe + + +sunpk {z0.h - z3.h}, {z0.b - z1.b} // 11000001-01110101-11100000-00000000 +// CHECK-INST: sunpk { z0.h - z3.h }, { z0.b, z1.b } +// CHECK-ENCODING: [0x00,0xe0,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175e000 + +sunpk {z20.h - z23.h}, {z10.b - z11.b} // 11000001-01110101-11100001-01010100 +// CHECK-INST: sunpk { z20.h - z23.h }, { z10.b, z11.b } +// CHECK-ENCODING: [0x54,0xe1,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175e154 + +sunpk {z20.h - z23.h}, {z12.b - z13.b} // 11000001-01110101-11100001-10010100 +// CHECK-INST: sunpk { z20.h - z23.h }, { z12.b, z13.b } +// CHECK-ENCODING: [0x94,0xe1,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175e194 + +sunpk {z28.h - z31.h}, {z30.b - z31.b} // 11000001-01110101-11100011-11011100 +// CHECK-INST: sunpk { z28.h - z31.h }, { z30.b, z31.b } +// CHECK-ENCODING: [0xdc,0xe3,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175e3dc + + +sunpk {z0.s - z3.s}, {z0.h - z1.h} // 11000001-10110101-11100000-00000000 +// CHECK-INST: sunpk { z0.s - z3.s }, { z0.h, z1.h } +// CHECK-ENCODING: [0x00,0xe0,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b5e000 + +sunpk {z20.s - z23.s}, {z10.h - z11.h} // 11000001-10110101-11100001-01010100 +// CHECK-INST: sunpk { z20.s - z23.s }, { z10.h, z11.h } +// CHECK-ENCODING: [0x54,0xe1,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b5e154 + +sunpk {z20.s - z23.s}, {z12.h - z13.h} // 11000001-10110101-11100001-10010100 +// CHECK-INST: sunpk { z20.s - z23.s }, { z12.h, z13.h } +// CHECK-ENCODING: [0x94,0xe1,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b5e194 + +sunpk {z28.s - z31.s}, {z30.h - z31.h} // 11000001-10110101-11100011-11011100 +// CHECK-INST: sunpk { z28.s - z31.s }, { z30.h, z31.h } +// CHECK-ENCODING: [0xdc,0xe3,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b5e3dc + + +sunpk {z0.d - z3.d}, {z0.s - z1.s} // 11000001-11110101-11100000-00000000 +// CHECK-INST: sunpk { z0.d - z3.d }, { z0.s, z1.s } +// CHECK-ENCODING: [0x00,0xe0,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5e000 + +sunpk {z20.d - z23.d}, {z10.s - z11.s} // 11000001-11110101-11100001-01010100 +// CHECK-INST: sunpk { z20.d - z23.d }, { z10.s, z11.s } +// CHECK-ENCODING: [0x54,0xe1,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5e154 + +sunpk {z20.d - z23.d}, {z12.s - z13.s} // 11000001-11110101-11100001-10010100 +// CHECK-INST: sunpk { z20.d - z23.d }, { z12.s, z13.s } +// CHECK-ENCODING: [0x94,0xe1,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5e194 + +sunpk {z28.d - z31.d}, {z30.s - z31.s} // 11000001-11110101-11100011-11011100 +// CHECK-INST: sunpk { z28.d - z31.d }, { z30.s, z31.s } +// CHECK-ENCODING: [0xdc,0xe3,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5e3dc + diff --git a/llvm/test/MC/AArch64/SME2/uunpk-diagnostics.s b/llvm/test/MC/AArch64/SME2/uunpk-diagnostics.s new file mode 100644 index 0000000000000..05fdf348e5a96 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/uunpk-diagnostics.s @@ -0,0 +1,32 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +uunpk {z0.h-z2.h}, z0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: uunpk {z0.h-z2.h}, z0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +uunpk {z1.s-z2.s}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: uunpk {z1.s-z2.s}, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +uunpk {z0.d-z5.d}, {z8.s-z9.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: uunpk {z0.d-z5.d}, {z8.s-z9.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +uunpk {z0.s-z3.s}, {z9.h-z11.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: uunpk {z0.s-z3.s}, {z9.h-z11.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +uunpk {z0.s-z3.s}, {z8.s-z9.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: uunpk {z0.s-z3.s}, {z8.s-z9.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/uunpk.s b/llvm/test/MC/AArch64/SME2/uunpk.s new file mode 100644 index 0000000000000..414dcec8928f2 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/uunpk.s @@ -0,0 +1,163 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +uunpk {z0.h - z1.h}, z0.b // 11000001-01100101-11100000-00000001 +// CHECK-INST: uunpk { z0.h, z1.h }, z0.b +// CHECK-ENCODING: [0x01,0xe0,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165e001 + +uunpk {z20.h - z21.h}, z10.b // 11000001-01100101-11100001-01010101 +// CHECK-INST: uunpk { z20.h, z21.h }, z10.b +// CHECK-ENCODING: [0x55,0xe1,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165e155 + +uunpk {z22.h - z23.h}, z13.b // 11000001-01100101-11100001-10110111 +// CHECK-INST: uunpk { z22.h, z23.h }, z13.b +// CHECK-ENCODING: [0xb7,0xe1,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165e1b7 + +uunpk {z30.h - z31.h}, z31.b // 11000001-01100101-11100011-11111111 +// CHECK-INST: uunpk { z30.h, z31.h }, z31.b +// CHECK-ENCODING: [0xff,0xe3,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165e3ff + + +uunpk {z0.s - z1.s}, z0.h // 11000001-10100101-11100000-00000001 +// CHECK-INST: uunpk { z0.s, z1.s }, z0.h +// CHECK-ENCODING: [0x01,0xe0,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5e001 + +uunpk {z20.s - z21.s}, z10.h // 11000001-10100101-11100001-01010101 +// CHECK-INST: uunpk { z20.s, z21.s }, z10.h +// CHECK-ENCODING: [0x55,0xe1,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5e155 + +uunpk {z22.s - z23.s}, z13.h // 11000001-10100101-11100001-10110111 +// CHECK-INST: uunpk { z22.s, z23.s }, z13.h +// CHECK-ENCODING: [0xb7,0xe1,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5e1b7 + +uunpk {z30.s - z31.s}, z31.h // 11000001-10100101-11100011-11111111 +// CHECK-INST: uunpk { z30.s, z31.s }, z31.h +// CHECK-ENCODING: [0xff,0xe3,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5e3ff + + +uunpk {z0.d - z1.d}, z0.s // 11000001-11100101-11100000-00000001 +// CHECK-INST: uunpk { z0.d, z1.d }, z0.s +// CHECK-ENCODING: [0x01,0xe0,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5e001 + +uunpk {z20.d - z21.d}, z10.s // 11000001-11100101-11100001-01010101 +// CHECK-INST: uunpk { z20.d, z21.d }, z10.s +// CHECK-ENCODING: [0x55,0xe1,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5e155 + +uunpk {z22.d - z23.d}, z13.s // 11000001-11100101-11100001-10110111 +// CHECK-INST: uunpk { z22.d, z23.d }, z13.s +// CHECK-ENCODING: [0xb7,0xe1,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5e1b7 + +uunpk {z30.d - z31.d}, z31.s // 11000001-11100101-11100011-11111111 +// CHECK-INST: uunpk { z30.d, z31.d }, z31.s +// CHECK-ENCODING: [0xff,0xe3,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5e3ff + + +uunpk {z0.h - z3.h}, {z0.b - z1.b} // 11000001-01110101-11100000-00000001 +// CHECK-INST: uunpk { z0.h - z3.h }, { z0.b, z1.b } +// CHECK-ENCODING: [0x01,0xe0,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175e001 + +uunpk {z20.h - z23.h}, {z10.b - z11.b} // 11000001-01110101-11100001-01010101 +// CHECK-INST: uunpk { z20.h - z23.h }, { z10.b, z11.b } +// CHECK-ENCODING: [0x55,0xe1,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175e155 + +uunpk {z20.h - z23.h}, {z12.b - z13.b} // 11000001-01110101-11100001-10010101 +// CHECK-INST: uunpk { z20.h - z23.h }, { z12.b, z13.b } +// CHECK-ENCODING: [0x95,0xe1,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175e195 + +uunpk {z28.h - z31.h}, {z30.b - z31.b} // 11000001-01110101-11100011-11011101 +// CHECK-INST: uunpk { z28.h - z31.h }, { z30.b, z31.b } +// CHECK-ENCODING: [0xdd,0xe3,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175e3dd + + +uunpk {z0.s - z3.s}, {z0.h - z1.h} // 11000001-10110101-11100000-00000001 +// CHECK-INST: uunpk { z0.s - z3.s }, { z0.h, z1.h } +// CHECK-ENCODING: [0x01,0xe0,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b5e001 + +uunpk {z20.s - z23.s}, {z10.h - z11.h} // 11000001-10110101-11100001-01010101 +// CHECK-INST: uunpk { z20.s - z23.s }, { z10.h, z11.h } +// CHECK-ENCODING: [0x55,0xe1,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b5e155 + +uunpk {z20.s - z23.s}, {z12.h - z13.h} // 11000001-10110101-11100001-10010101 +// CHECK-INST: uunpk { z20.s - z23.s }, { z12.h, z13.h } +// CHECK-ENCODING: [0x95,0xe1,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b5e195 + +uunpk {z28.s - z31.s}, {z30.h - z31.h} // 11000001-10110101-11100011-11011101 +// CHECK-INST: uunpk { z28.s - z31.s }, { z30.h, z31.h } +// CHECK-ENCODING: [0xdd,0xe3,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b5e3dd + + +uunpk {z0.d - z3.d}, {z0.s - z1.s} // 11000001-11110101-11100000-00000001 +// CHECK-INST: uunpk { z0.d - z3.d }, { z0.s, z1.s } +// CHECK-ENCODING: [0x01,0xe0,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5e001 + +uunpk {z20.d - z23.d}, {z10.s - z11.s} // 11000001-11110101-11100001-01010101 +// CHECK-INST: uunpk { z20.d - z23.d }, { z10.s, z11.s } +// CHECK-ENCODING: [0x55,0xe1,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5e155 + +uunpk {z20.d - z23.d}, {z12.s - z13.s} // 11000001-11110101-11100001-10010101 +// CHECK-INST: uunpk { z20.d - z23.d }, { z12.s, z13.s } +// CHECK-ENCODING: [0x95,0xe1,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5e195 + +uunpk {z28.d - z31.d}, {z30.s - z31.s} // 11000001-11110101-11100011-11011101 +// CHECK-INST: uunpk { z28.d - z31.d }, { z30.s, z31.s } +// CHECK-ENCODING: [0xdd,0xe3,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5e3dd + diff --git a/llvm/test/MC/AArch64/SME2/uzp-diagnostics.s b/llvm/test/MC/AArch64/SME2/uzp-diagnostics.s new file mode 100644 index 0000000000000..aa853fe23194d --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/uzp-diagnostics.s @@ -0,0 +1,25 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +uzp {z0.q-z2.q}, z0.q, z0.q +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: uzp {z0.q-z2.q}, z0.q, z0.q +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +uzp {z21.h-z22.h}, z10.h, z21.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: uzp {z21.h-z22.h}, z10.h, z21.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +uzp {z0.s-z4.s}, {z0.s-z3.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: uzp {z0.s-z4.s}, {z0.s-z3.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +uzp {z20.b-z23.b}, {z9.b-z12.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: uzp {z20.b-z23.b}, {z9.b-z12.b} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + diff --git a/llvm/test/MC/AArch64/SME2/uzp.s b/llvm/test/MC/AArch64/SME2/uzp.s new file mode 100644 index 0000000000000..4eb673e8dc700 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/uzp.s @@ -0,0 +1,263 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +uzp {z0.q - z1.q}, z0.q, z0.q // 11000001-00100000-11010100-00000001 +// CHECK-INST: uzp { z0.q, z1.q }, z0.q, z0.q +// CHECK-ENCODING: [0x01,0xd4,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120d401 + +uzp {z20.q - z21.q}, z10.q, z21.q // 11000001-00110101-11010101-01010101 +// CHECK-INST: uzp { z20.q, z21.q }, z10.q, z21.q +// CHECK-ENCODING: [0x55,0xd5,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c135d555 + +uzp {z22.q - z23.q}, z13.q, z8.q // 11000001-00101000-11010101-10110111 +// CHECK-INST: uzp { z22.q, z23.q }, z13.q, z8.q +// CHECK-ENCODING: [0xb7,0xd5,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128d5b7 + +uzp {z30.q - z31.q}, z31.q, z31.q // 11000001-00111111-11010111-11111111 +// CHECK-INST: uzp { z30.q, z31.q }, z31.q, z31.q +// CHECK-ENCODING: [0xff,0xd7,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13fd7ff + + +uzp {z0.h - z1.h}, z0.h, z0.h // 11000001-01100000-11010000-00000001 +// CHECK-INST: uzp { z0.h, z1.h }, z0.h, z0.h +// CHECK-ENCODING: [0x01,0xd0,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160d001 + +uzp {z20.h - z21.h}, z10.h, z21.h // 11000001-01110101-11010001-01010101 +// CHECK-INST: uzp { z20.h, z21.h }, z10.h, z21.h +// CHECK-ENCODING: [0x55,0xd1,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175d155 + +uzp {z22.h - z23.h}, z13.h, z8.h // 11000001-01101000-11010001-10110111 +// CHECK-INST: uzp { z22.h, z23.h }, z13.h, z8.h +// CHECK-ENCODING: [0xb7,0xd1,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168d1b7 + +uzp {z30.h - z31.h}, z31.h, z31.h // 11000001-01111111-11010011-11111111 +// CHECK-INST: uzp { z30.h, z31.h }, z31.h, z31.h +// CHECK-ENCODING: [0xff,0xd3,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17fd3ff + + +uzp {z0.s - z1.s}, z0.s, z0.s // 11000001-10100000-11010000-00000001 +// CHECK-INST: uzp { z0.s, z1.s }, z0.s, z0.s +// CHECK-ENCODING: [0x01,0xd0,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0d001 + +uzp {z20.s - z21.s}, z10.s, z21.s // 11000001-10110101-11010001-01010101 +// CHECK-INST: uzp { z20.s, z21.s }, z10.s, z21.s +// CHECK-ENCODING: [0x55,0xd1,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b5d155 + +uzp {z22.s - z23.s}, z13.s, z8.s // 11000001-10101000-11010001-10110111 +// CHECK-INST: uzp { z22.s, z23.s }, z13.s, z8.s +// CHECK-ENCODING: [0xb7,0xd1,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8d1b7 + +uzp {z30.s - z31.s}, z31.s, z31.s // 11000001-10111111-11010011-11111111 +// CHECK-INST: uzp { z30.s, z31.s }, z31.s, z31.s +// CHECK-ENCODING: [0xff,0xd3,0xbf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bfd3ff + + +uzp {z0.d - z1.d}, z0.d, z0.d // 11000001-11100000-11010000-00000001 +// CHECK-INST: uzp { z0.d, z1.d }, z0.d, z0.d +// CHECK-ENCODING: [0x01,0xd0,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0d001 + +uzp {z20.d - z21.d}, z10.d, z21.d // 11000001-11110101-11010001-01010101 +// CHECK-INST: uzp { z20.d, z21.d }, z10.d, z21.d +// CHECK-ENCODING: [0x55,0xd1,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5d155 + +uzp {z22.d - z23.d}, z13.d, z8.d // 11000001-11101000-11010001-10110111 +// CHECK-INST: uzp { z22.d, z23.d }, z13.d, z8.d +// CHECK-ENCODING: [0xb7,0xd1,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8d1b7 + +uzp {z30.d - z31.d}, z31.d, z31.d // 11000001-11111111-11010011-11111111 +// CHECK-INST: uzp { z30.d, z31.d }, z31.d, z31.d +// CHECK-ENCODING: [0xff,0xd3,0xff,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ffd3ff + + +uzp {z0.b - z1.b}, z0.b, z0.b // 11000001-00100000-11010000-00000001 +// CHECK-INST: uzp { z0.b, z1.b }, z0.b, z0.b +// CHECK-ENCODING: [0x01,0xd0,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120d001 + +uzp {z20.b - z21.b}, z10.b, z21.b // 11000001-00110101-11010001-01010101 +// CHECK-INST: uzp { z20.b, z21.b }, z10.b, z21.b +// CHECK-ENCODING: [0x55,0xd1,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c135d155 + +uzp {z22.b - z23.b}, z13.b, z8.b // 11000001-00101000-11010001-10110111 +// CHECK-INST: uzp { z22.b, z23.b }, z13.b, z8.b +// CHECK-ENCODING: [0xb7,0xd1,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128d1b7 + +uzp {z30.b - z31.b}, z31.b, z31.b // 11000001-00111111-11010011-11111111 +// CHECK-INST: uzp { z30.b, z31.b }, z31.b, z31.b +// CHECK-ENCODING: [0xff,0xd3,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13fd3ff + + +uzp {z0.q - z3.q}, {z0.q - z3.q} // 11000001-00110111-11100000-00000010 +// CHECK-INST: uzp { z0.q - z3.q }, { z0.q - z3.q } +// CHECK-ENCODING: [0x02,0xe0,0x37,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c137e002 + +uzp {z20.q - z23.q}, {z8.q - z11.q} // 11000001-00110111-11100001-00010110 +// CHECK-INST: uzp { z20.q - z23.q }, { z8.q - z11.q } +// CHECK-ENCODING: [0x16,0xe1,0x37,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c137e116 + +uzp {z20.q - z23.q}, {z12.q - z15.q} // 11000001-00110111-11100001-10010110 +// CHECK-INST: uzp { z20.q - z23.q }, { z12.q - z15.q } +// CHECK-ENCODING: [0x96,0xe1,0x37,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c137e196 + +uzp {z28.q - z31.q}, {z28.q - z31.q} // 11000001-00110111-11100011-10011110 +// CHECK-INST: uzp { z28.q - z31.q }, { z28.q - z31.q } +// CHECK-ENCODING: [0x9e,0xe3,0x37,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c137e39e + + +uzp {z0.h - z3.h}, {z0.h - z3.h} // 11000001-01110110-11100000-00000010 +// CHECK-INST: uzp { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x02,0xe0,0x76,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c176e002 + +uzp {z20.h - z23.h}, {z8.h - z11.h} // 11000001-01110110-11100001-00010110 +// CHECK-INST: uzp { z20.h - z23.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x16,0xe1,0x76,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c176e116 + +uzp {z20.h - z23.h}, {z12.h - z15.h} // 11000001-01110110-11100001-10010110 +// CHECK-INST: uzp { z20.h - z23.h }, { z12.h - z15.h } +// CHECK-ENCODING: [0x96,0xe1,0x76,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c176e196 + +uzp {z28.h - z31.h}, {z28.h - z31.h} // 11000001-01110110-11100011-10011110 +// CHECK-INST: uzp { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x9e,0xe3,0x76,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c176e39e + + +uzp {z0.s - z3.s}, {z0.s - z3.s} // 11000001-10110110-11100000-00000010 +// CHECK-INST: uzp { z0.s - z3.s }, { z0.s - z3.s } +// CHECK-ENCODING: [0x02,0xe0,0xb6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b6e002 + +uzp {z20.s - z23.s}, {z8.s - z11.s} // 11000001-10110110-11100001-00010110 +// CHECK-INST: uzp { z20.s - z23.s }, { z8.s - z11.s } +// CHECK-ENCODING: [0x16,0xe1,0xb6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b6e116 + +uzp {z20.s - z23.s}, {z12.s - z15.s} // 11000001-10110110-11100001-10010110 +// CHECK-INST: uzp { z20.s - z23.s }, { z12.s - z15.s } +// CHECK-ENCODING: [0x96,0xe1,0xb6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b6e196 + +uzp {z28.s - z31.s}, {z28.s - z31.s} // 11000001-10110110-11100011-10011110 +// CHECK-INST: uzp { z28.s - z31.s }, { z28.s - z31.s } +// CHECK-ENCODING: [0x9e,0xe3,0xb6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b6e39e + + +uzp {z0.d - z3.d}, {z0.d - z3.d} // 11000001-11110110-11100000-00000010 +// CHECK-INST: uzp { z0.d - z3.d }, { z0.d - z3.d } +// CHECK-ENCODING: [0x02,0xe0,0xf6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f6e002 + +uzp {z20.d - z23.d}, {z8.d - z11.d} // 11000001-11110110-11100001-00010110 +// CHECK-INST: uzp { z20.d - z23.d }, { z8.d - z11.d } +// CHECK-ENCODING: [0x16,0xe1,0xf6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f6e116 + +uzp {z20.d - z23.d}, {z12.d - z15.d} // 11000001-11110110-11100001-10010110 +// CHECK-INST: uzp { z20.d - z23.d }, { z12.d - z15.d } +// CHECK-ENCODING: [0x96,0xe1,0xf6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f6e196 + +uzp {z28.d - z31.d}, {z28.d - z31.d} // 11000001-11110110-11100011-10011110 +// CHECK-INST: uzp { z28.d - z31.d }, { z28.d - z31.d } +// CHECK-ENCODING: [0x9e,0xe3,0xf6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f6e39e + + +uzp {z0.b - z3.b}, {z0.b - z3.b} // 11000001-00110110-11100000-00000010 +// CHECK-INST: uzp { z0.b - z3.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x02,0xe0,0x36,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c136e002 + +uzp {z20.b - z23.b}, {z8.b - z11.b} // 11000001-00110110-11100001-00010110 +// CHECK-INST: uzp { z20.b - z23.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x16,0xe1,0x36,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c136e116 + +uzp {z20.b - z23.b}, {z12.b - z15.b} // 11000001-00110110-11100001-10010110 +// CHECK-INST: uzp { z20.b - z23.b }, { z12.b - z15.b } +// CHECK-ENCODING: [0x96,0xe1,0x36,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c136e196 + +uzp {z28.b - z31.b}, {z28.b - z31.b} // 11000001-00110110-11100011-10011110 +// CHECK-INST: uzp { z28.b - z31.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x9e,0xe3,0x36,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c136e39e + diff --git a/llvm/test/MC/AArch64/SME2/zero.s b/llvm/test/MC/AArch64/SME2/zero.s new file mode 100644 index 0000000000000..511aff8d57e8c --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/zero.s @@ -0,0 +1,20 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +zero {zt0} // 11000000-01001000-00000000-00000001 +// CHECK-INST: zero { zt0 } +// CHECK-ENCODING: [0x01,0x00,0x48,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0480001 + diff --git a/llvm/test/MC/AArch64/SME2/zip-diagnostics.s b/llvm/test/MC/AArch64/SME2/zip-diagnostics.s new file mode 100644 index 0000000000000..6c80096ab4865 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/zip-diagnostics.s @@ -0,0 +1,25 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +zip {z0.q-z2.q}, z0.q, z0.q +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: zip {z0.q-z2.q}, z0.q, z0.q +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +zip {z21.h-z22.h}, z10.h, z21.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: zip {z21.h-z22.h}, z10.h, z21.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +zip {z0.s-z4.s}, {z0.s-z3.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: zip {z0.s-z4.s}, {z0.s-z3.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +zip {z20.b-z23.b}, {z9.b-z12.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: zip {z20.b-z23.b}, {z9.b-z12.b} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + diff --git a/llvm/test/MC/AArch64/SME2/zip.s b/llvm/test/MC/AArch64/SME2/zip.s new file mode 100644 index 0000000000000..cfb048710ca43 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/zip.s @@ -0,0 +1,263 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +zip {z0.q - z1.q}, z0.q, z0.q // 11000001-00100000-11010100-00000000 +// CHECK-INST: zip { z0.q, z1.q }, z0.q, z0.q +// CHECK-ENCODING: [0x00,0xd4,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120d400 + +zip {z20.q - z21.q}, z10.q, z21.q // 11000001-00110101-11010101-01010100 +// CHECK-INST: zip { z20.q, z21.q }, z10.q, z21.q +// CHECK-ENCODING: [0x54,0xd5,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c135d554 + +zip {z22.q - z23.q}, z13.q, z8.q // 11000001-00101000-11010101-10110110 +// CHECK-INST: zip { z22.q, z23.q }, z13.q, z8.q +// CHECK-ENCODING: [0xb6,0xd5,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128d5b6 + +zip {z30.q - z31.q}, z31.q, z31.q // 11000001-00111111-11010111-11111110 +// CHECK-INST: zip { z30.q, z31.q }, z31.q, z31.q +// CHECK-ENCODING: [0xfe,0xd7,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13fd7fe + + +zip {z0.h - z1.h}, z0.h, z0.h // 11000001-01100000-11010000-00000000 +// CHECK-INST: zip { z0.h, z1.h }, z0.h, z0.h +// CHECK-ENCODING: [0x00,0xd0,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160d000 + +zip {z20.h - z21.h}, z10.h, z21.h // 11000001-01110101-11010001-01010100 +// CHECK-INST: zip { z20.h, z21.h }, z10.h, z21.h +// CHECK-ENCODING: [0x54,0xd1,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175d154 + +zip {z22.h - z23.h}, z13.h, z8.h // 11000001-01101000-11010001-10110110 +// CHECK-INST: zip { z22.h, z23.h }, z13.h, z8.h +// CHECK-ENCODING: [0xb6,0xd1,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168d1b6 + +zip {z30.h - z31.h}, z31.h, z31.h // 11000001-01111111-11010011-11111110 +// CHECK-INST: zip { z30.h, z31.h }, z31.h, z31.h +// CHECK-ENCODING: [0xfe,0xd3,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17fd3fe + + +zip {z0.s - z1.s}, z0.s, z0.s // 11000001-10100000-11010000-00000000 +// CHECK-INST: zip { z0.s, z1.s }, z0.s, z0.s +// CHECK-ENCODING: [0x00,0xd0,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0d000 + +zip {z20.s - z21.s}, z10.s, z21.s // 11000001-10110101-11010001-01010100 +// CHECK-INST: zip { z20.s, z21.s }, z10.s, z21.s +// CHECK-ENCODING: [0x54,0xd1,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b5d154 + +zip {z22.s - z23.s}, z13.s, z8.s // 11000001-10101000-11010001-10110110 +// CHECK-INST: zip { z22.s, z23.s }, z13.s, z8.s +// CHECK-ENCODING: [0xb6,0xd1,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8d1b6 + +zip {z30.s - z31.s}, z31.s, z31.s // 11000001-10111111-11010011-11111110 +// CHECK-INST: zip { z30.s, z31.s }, z31.s, z31.s +// CHECK-ENCODING: [0xfe,0xd3,0xbf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bfd3fe + + +zip {z0.d - z1.d}, z0.d, z0.d // 11000001-11100000-11010000-00000000 +// CHECK-INST: zip { z0.d, z1.d }, z0.d, z0.d +// CHECK-ENCODING: [0x00,0xd0,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0d000 + +zip {z20.d - z21.d}, z10.d, z21.d // 11000001-11110101-11010001-01010100 +// CHECK-INST: zip { z20.d, z21.d }, z10.d, z21.d +// CHECK-ENCODING: [0x54,0xd1,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5d154 + +zip {z22.d - z23.d}, z13.d, z8.d // 11000001-11101000-11010001-10110110 +// CHECK-INST: zip { z22.d, z23.d }, z13.d, z8.d +// CHECK-ENCODING: [0xb6,0xd1,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8d1b6 + +zip {z30.d - z31.d}, z31.d, z31.d // 11000001-11111111-11010011-11111110 +// CHECK-INST: zip { z30.d, z31.d }, z31.d, z31.d +// CHECK-ENCODING: [0xfe,0xd3,0xff,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ffd3fe + + +zip {z0.b - z1.b}, z0.b, z0.b // 11000001-00100000-11010000-00000000 +// CHECK-INST: zip { z0.b, z1.b }, z0.b, z0.b +// CHECK-ENCODING: [0x00,0xd0,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120d000 + +zip {z20.b, z21.b}, z10.b, z21.b // 11000001-00110101-11010001-01010100 +// CHECK-INST: zip { z20.b, z21.b }, z10.b, z21.b +// CHECK-ENCODING: [0x54,0xd1,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c135d154 + +zip {z22.b - z23.b}, z13.b, z8.b // 11000001-00101000-11010001-10110110 +// CHECK-INST: zip { z22.b, z23.b }, z13.b, z8.b +// CHECK-ENCODING: [0xb6,0xd1,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128d1b6 + +zip {z30.b - z31.b}, z31.b, z31.b // 11000001-00111111-11010011-11111110 +// CHECK-INST: zip { z30.b, z31.b }, z31.b, z31.b +// CHECK-ENCODING: [0xfe,0xd3,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13fd3fe + + +zip {z0.q - z3.q}, {z0.q - z3.q} // 11000001-00110111-11100000-00000000 +// CHECK-INST: zip { z0.q - z3.q }, { z0.q - z3.q } +// CHECK-ENCODING: [0x00,0xe0,0x37,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c137e000 + +zip {z20.q - z23.q}, {z8.q - z11.q} // 11000001-00110111-11100001-00010100 +// CHECK-INST: zip { z20.q - z23.q }, { z8.q - z11.q } +// CHECK-ENCODING: [0x14,0xe1,0x37,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c137e114 + +zip {z20.q - z23.q}, {z12.q - z15.q} // 11000001-00110111-11100001-10010100 +// CHECK-INST: zip { z20.q - z23.q }, { z12.q - z15.q } +// CHECK-ENCODING: [0x94,0xe1,0x37,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c137e194 + +zip {z28.q - z31.q}, {z28.q - z31.q} // 11000001-00110111-11100011-10011100 +// CHECK-INST: zip { z28.q - z31.q }, { z28.q - z31.q } +// CHECK-ENCODING: [0x9c,0xe3,0x37,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c137e39c + + +zip {z0.h - z3.h}, {z0.h - z3.h} // 11000001-01110110-11100000-00000000 +// CHECK-INST: zip { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x00,0xe0,0x76,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c176e000 + +zip {z20.h - z23.h}, {z8.h - z11.h} // 11000001-01110110-11100001-00010100 +// CHECK-INST: zip { z20.h - z23.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x14,0xe1,0x76,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c176e114 + +zip {z20.h - z23.h}, {z12.h - z15.h} // 11000001-01110110-11100001-10010100 +// CHECK-INST: zip { z20.h - z23.h }, { z12.h - z15.h } +// CHECK-ENCODING: [0x94,0xe1,0x76,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c176e194 + +zip {z28.h - z31.h}, {z28.h - z31.h} // 11000001-01110110-11100011-10011100 +// CHECK-INST: zip { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x9c,0xe3,0x76,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c176e39c + + +zip {z0.s - z3.s}, {z0.s - z3.s} // 11000001-10110110-11100000-00000000 +// CHECK-INST: zip { z0.s - z3.s }, { z0.s - z3.s } +// CHECK-ENCODING: [0x00,0xe0,0xb6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b6e000 + +zip {z20.s - z23.s}, {z8.s - z11.s} // 11000001-10110110-11100001-00010100 +// CHECK-INST: zip { z20.s - z23.s }, { z8.s - z11.s } +// CHECK-ENCODING: [0x14,0xe1,0xb6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b6e114 + +zip {z20.s - z23.s}, {z12.s - z15.s} // 11000001-10110110-11100001-10010100 +// CHECK-INST: zip { z20.s - z23.s }, { z12.s - z15.s } +// CHECK-ENCODING: [0x94,0xe1,0xb6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b6e194 + +zip {z28.s - z31.s}, {z28.s - z31.s} // 11000001-10110110-11100011-10011100 +// CHECK-INST: zip { z28.s - z31.s }, { z28.s - z31.s } +// CHECK-ENCODING: [0x9c,0xe3,0xb6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b6e39c + + +zip {z0.d - z3.d}, {z0.d - z3.d} // 11000001-11110110-11100000-00000000 +// CHECK-INST: zip { z0.d - z3.d }, { z0.d - z3.d } +// CHECK-ENCODING: [0x00,0xe0,0xf6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f6e000 + +zip {z20.d - z23.d}, {z8.d - z11.d} // 11000001-11110110-11100001-00010100 +// CHECK-INST: zip { z20.d - z23.d }, { z8.d - z11.d } +// CHECK-ENCODING: [0x14,0xe1,0xf6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f6e114 + +zip {z20.d - z23.d}, {z12.d - z15.d} // 11000001-11110110-11100001-10010100 +// CHECK-INST: zip { z20.d - z23.d }, { z12.d - z15.d } +// CHECK-ENCODING: [0x94,0xe1,0xf6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f6e194 + +zip {z28.d - z31.d}, {z28.d - z31.d} // 11000001-11110110-11100011-10011100 +// CHECK-INST: zip { z28.d - z31.d }, { z28.d - z31.d } +// CHECK-ENCODING: [0x9c,0xe3,0xf6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f6e39c + + +zip {z0.b - z3.b}, {z0.b - z3.b} // 11000001-00110110-11100000-00000000 +// CHECK-INST: zip { z0.b - z3.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x00,0xe0,0x36,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c136e000 + +zip {z20.b - z23.b}, {z8.b - z11.b} // 11000001-00110110-11100001-00010100 +// CHECK-INST: zip { z20.b - z23.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x14,0xe1,0x36,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c136e114 + +zip {z20.b - z23.b}, {z12.b - z15.b} // 11000001-00110110-11100001-10010100 +// CHECK-INST: zip { z20.b - z23.b }, { z12.b - z15.b } +// CHECK-ENCODING: [0x94,0xe1,0x36,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c136e194 + +zip {z28.b - z31.b}, {z28.b - z31.b} // 11000001-00110110-11100011-10011100 +// CHECK-INST: zip { z28.b - z31.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x9c,0xe3,0x36,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c136e39c + diff --git a/llvm/test/MC/AArch64/SME2p1/directive-arch-negative.s b/llvm/test/MC/AArch64/SME2p1/directive-arch-negative.s new file mode 100644 index 0000000000000..948d8f996c156 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2p1/directive-arch-negative.s @@ -0,0 +1,7 @@ +// RUN: not llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s + +.arch armv9-a+sme2p1 +.arch armv9-a+nosme2p1 +sqcvt z0.h, {z0.s, z1.s} +// CHECK: error: instruction requires: sme2 +// CHECK: sqcvt z0.h, {z0.s, z1.s} diff --git a/llvm/test/MC/AArch64/SME2p1/directive-arch.s b/llvm/test/MC/AArch64/SME2p1/directive-arch.s new file mode 100644 index 0000000000000..112de2530ca8b --- /dev/null +++ b/llvm/test/MC/AArch64/SME2p1/directive-arch.s @@ -0,0 +1,8 @@ +// RUN: llvm-mc -triple aarch64 -o - %s 2>&1 | FileCheck %s + +// SME2p1 should imply SME2 +.arch armv9-a+sme2p1 +sqcvt z0.h, {z0.s, z1.s} +// CHECK: sqcvt z0.h, { z0.s, z1.s } + +.arch armv9-a+nosme2p1 diff --git a/llvm/test/MC/AArch64/SME2p1/directive-arch_extension-negative.s b/llvm/test/MC/AArch64/SME2p1/directive-arch_extension-negative.s new file mode 100644 index 0000000000000..19f11b88173ff --- /dev/null +++ b/llvm/test/MC/AArch64/SME2p1/directive-arch_extension-negative.s @@ -0,0 +1,7 @@ +// RUN: not llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s + +.arch_extension sme2p1 +.arch_extension nosme2 +sqcvt z0.h, { z0.s, z1.s } +// CHECK: error: instruction requires: sme2 +// CHECK: sqcvt z0.h diff --git a/llvm/test/MC/AArch64/SME2p1/directive-arch_extension.s b/llvm/test/MC/AArch64/SME2p1/directive-arch_extension.s new file mode 100644 index 0000000000000..653956d733450 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2p1/directive-arch_extension.s @@ -0,0 +1,5 @@ +// RUN: llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s + +.arch_extension sme2p1 +sqcvt z0.h, { z0.s, z1.s } +// CHECK: sqcvt z0.h, { z0.s, z1.s } diff --git a/llvm/test/MC/AArch64/SVE2p1/bfadd-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/bfadd-diagnostics.s new file mode 100644 index 0000000000000..1ead9d28277aa --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfadd-diagnostics.s @@ -0,0 +1,36 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid predicate register + +bfadd z23.h, p8/m, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: bfadd z23.h, p8/m, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfadd z23.h, p1/z, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bfadd z23.h, p1/z, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector suffix + +bfadd z23.h, p1/m, z23.s, z13.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfadd z23.h, p1/m, z23.s, z13.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfadd z23.s, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfadd z23.s, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid use of movprfx + +movprfx z23.h, p1/m, z31.h +bfadd z23.h, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: bfadd z23.h, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/bfadd.s b/llvm/test/MC/AArch64/SVE2p1/bfadd.s new file mode 100644 index 0000000000000..1021df12fc050 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfadd.s @@ -0,0 +1,76 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sve2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p1,+b16b16 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +movprfx z23.h, p3/m, z31.h +bfadd z23.h, p3/m, z23.h, z13.h // 01100101-00000000-10001101-10110111 +// CHECK-INST: movprfx z23.h, p3/m, z31.h +// CHECK-INST: bfadd z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x00,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65008db7 + +movprfx z23, z31 +bfadd z23.h, p3/m, z23.h, z13.h // 01100101-00000000-10001101-10110111 +// CHECK-INST: movprfx z23, z31 +// CHECK-INST: bfadd z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x00,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65008db7 + +bfadd z0.h, p0/m, z0.h, z0.h // 01100101-00000000-10000000-00000000 +// CHECK-INST: bfadd z0.h, p0/m, z0.h, z0.h +// CHECK-ENCODING: [0x00,0x80,0x00,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65008000 + +bfadd z21.h, p5/m, z21.h, z10.h // 01100101-00000000-10010101-01010101 +// CHECK-INST: bfadd z21.h, p5/m, z21.h, z10.h +// CHECK-ENCODING: [0x55,0x95,0x00,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65009555 + +bfadd z23.h, p3/m, z23.h, z13.h // 01100101-00000000-10001101-10110111 +// CHECK-INST: bfadd z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x00,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65008db7 + +bfadd z31.h, p7/m, z31.h, z31.h // 01100101-00000000-10011111-11111111 +// CHECK-INST: bfadd z31.h, p7/m, z31.h, z31.h +// CHECK-ENCODING: [0xff,0x9f,0x00,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65009fff + +bfadd z0.h, z0.h, z0.h // 01100101-00000000-00000000-00000000 +// CHECK-INST: bfadd z0.h, z0.h, z0.h +// CHECK-ENCODING: [0x00,0x00,0x00,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65000000 + +bfadd z21.h, z10.h, z21.h // 01100101-00010101-00000001-01010101 +// CHECK-INST: bfadd z21.h, z10.h, z21.h +// CHECK-ENCODING: [0x55,0x01,0x15,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65150155 + +bfadd z23.h, z13.h, z8.h // 01100101-00001000-00000001-10110111 +// CHECK-INST: bfadd z23.h, z13.h, z8.h +// CHECK-ENCODING: [0xb7,0x01,0x08,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 650801b7 + +bfadd z31.h, z31.h, z31.h // 01100101-00011111-00000011-11111111 +// CHECK-INST: bfadd z31.h, z31.h, z31.h +// CHECK-ENCODING: [0xff,0x03,0x1f,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 651f03ff diff --git a/llvm/test/MC/AArch64/SVE2p1/bfclamp-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/bfclamp-diagnostics.s new file mode 100644 index 0000000000000..b18108fcdf08e --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfclamp-diagnostics.s @@ -0,0 +1,14 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector suffix + +bfclamp z23.h, z23.s, z13.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfclamp z23.h, z23.s, z13.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfclamp z23.s, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfclamp z23.s, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/bfclamp.s b/llvm/test/MC/AArch64/SVE2p1/bfclamp.s new file mode 100644 index 0000000000000..d7b85edb1730e --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfclamp.s @@ -0,0 +1,46 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sve2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p1,+b16b16 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +movprfx z23, z31 +bfclamp z23.h, z13.h, z8.h // 01100100-00101000-00100101-10110111 +// CHECK-INST: movprfx z23, z31 +// CHECK-INST: bfclamp z23.h, z13.h, z8.h +// CHECK-ENCODING: [0xb7,0x25,0x28,0x64] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 642825b7 + +bfclamp z0.h, z0.h, z0.h // 01100100-00100000-00100100-00000000 +// CHECK-INST: bfclamp z0.h, z0.h, z0.h +// CHECK-ENCODING: [0x00,0x24,0x20,0x64] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 64202400 + +bfclamp z21.h, z10.h, z21.h // 01100100-00110101-00100101-01010101 +// CHECK-INST: bfclamp z21.h, z10.h, z21.h +// CHECK-ENCODING: [0x55,0x25,0x35,0x64] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 64352555 + +bfclamp z23.h, z13.h, z8.h // 01100100-00101000-00100101-10110111 +// CHECK-INST: bfclamp z23.h, z13.h, z8.h +// CHECK-ENCODING: [0xb7,0x25,0x28,0x64] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 642825b7 + +bfclamp z31.h, z31.h, z31.h // 01100100-00111111-00100111-11111111 +// CHECK-INST: bfclamp z31.h, z31.h, z31.h +// CHECK-ENCODING: [0xff,0x27,0x3f,0x64] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 643f27ff + diff --git a/llvm/test/MC/AArch64/SVE2p1/bfmax-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/bfmax-diagnostics.s new file mode 100644 index 0000000000000..f7e30713e7d52 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfmax-diagnostics.s @@ -0,0 +1,27 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid predicate register + +bfmax z23.h, p8/m, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: bfmax z23.h, p8/m, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfmax z23.h, p1/z, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bfmax z23.h, p1/z, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector suffix + +bfmax z23.h, p1/z, z23.s, z13.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bfmax z23.h, p1/z, z23.s, z13.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfmax z23.s, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfmax z23.s, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/bfmax.s b/llvm/test/MC/AArch64/SVE2p1/bfmax.s new file mode 100644 index 0000000000000..cd67abc498f3b --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfmax.s @@ -0,0 +1,53 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sve2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p1,+b16b16 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +movprfx z23.h, p3/m, z31.h +bfmax z23.h, p3/m, z23.h, z13.h // 01100101-00000110-10001101-10110111 +// CHECK-INST: movprfx z23.h, p3/m, z31.h +// CHECK-INST: bfmax z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x06,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65068db7 + +movprfx z23, z31 +bfmax z23.h, p3/m, z23.h, z13.h // 01100101-00000110-10001101-10110111 +// CHECK-INST: movprfx z23, z31 +// CHECK-INST: bfmax z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x06,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65068db7 + +bfmax z0.h, p0/m, z0.h, z0.h // 01100101-00000110-10000000-00000000 +// CHECK-INST: bfmax z0.h, p0/m, z0.h, z0.h +// CHECK-ENCODING: [0x00,0x80,0x06,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65068000 + +bfmax z21.h, p5/m, z21.h, z10.h // 01100101-00000110-10010101-01010101 +// CHECK-INST: bfmax z21.h, p5/m, z21.h, z10.h +// CHECK-ENCODING: [0x55,0x95,0x06,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65069555 + +bfmax z23.h, p3/m, z23.h, z13.h // 01100101-00000110-10001101-10110111 +// CHECK-INST: bfmax z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x06,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65068db7 + +bfmax z31.h, p7/m, z31.h, z31.h // 01100101-00000110-10011111-11111111 +// CHECK-INST: bfmax z31.h, p7/m, z31.h, z31.h +// CHECK-ENCODING: [0xff,0x9f,0x06,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65069fff diff --git a/llvm/test/MC/AArch64/SVE2p1/bfmaxnm-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/bfmaxnm-diagnostics.s new file mode 100644 index 0000000000000..220b66b435ed4 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfmaxnm-diagnostics.s @@ -0,0 +1,27 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid predicate register + +bfmaxnm z23.h, p8/m, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: bfmaxnm z23.h, p8/m, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfmaxnm z23.h, p1/z, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bfmaxnm z23.h, p1/z, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector suffix + +bfmaxnm z23.h, p1/z, z23.s, z13.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bfmaxnm z23.h, p1/z, z23.s, z13.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfmaxnm z23.s, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfmaxnm z23.s, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/bfmaxnm.s b/llvm/test/MC/AArch64/SVE2p1/bfmaxnm.s new file mode 100644 index 0000000000000..83669ebc42b1f --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfmaxnm.s @@ -0,0 +1,54 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sve2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p1,+b16b16 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +movprfx z23.h, p3/m, z31.h +bfmaxnm z23.h, p3/m, z23.h, z13.h // 01100101-00000100-10001101-10110111 +// CHECK-INST: movprfx z23.h, p3/m, z31.h +// CHECK-INST: bfmaxnm z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x04,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65048db7 + +movprfx z23, z31 +bfmaxnm z23.h, p3/m, z23.h, z13.h // 01100101-00000100-10001101-10110111 +// CHECK-INST: movprfx z23, z31 +// CHECK-INST: bfmaxnm z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x04,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65048db7 + +bfmaxnm z0.h, p0/m, z0.h, z0.h // 01100101-00000100-10000000-00000000 +// CHECK-INST: bfmaxnm z0.h, p0/m, z0.h, z0.h +// CHECK-ENCODING: [0x00,0x80,0x04,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65048000 + +bfmaxnm z21.h, p5/m, z21.h, z10.h // 01100101-00000100-10010101-01010101 +// CHECK-INST: bfmaxnm z21.h, p5/m, z21.h, z10.h +// CHECK-ENCODING: [0x55,0x95,0x04,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65049555 + +bfmaxnm z23.h, p3/m, z23.h, z13.h // 01100101-00000100-10001101-10110111 +// CHECK-INST: bfmaxnm z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x04,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65048db7 + +bfmaxnm z31.h, p7/m, z31.h, z31.h // 01100101-00000100-10011111-11111111 +// CHECK-INST: bfmaxnm z31.h, p7/m, z31.h, z31.h +// CHECK-ENCODING: [0xff,0x9f,0x04,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65049fff + diff --git a/llvm/test/MC/AArch64/SVE2p1/bfmin-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/bfmin-diagnostics.s new file mode 100644 index 0000000000000..a7f8be225fac9 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfmin-diagnostics.s @@ -0,0 +1,27 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid predicate register + +bfmin z23.h, p8/m, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: bfmin z23.h, p8/m, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfmin z23.h, p1/z, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bfmin z23.h, p1/z, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector suffix + +bfmin z23.h, p1/z, z23.s, z13.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bfmin z23.h, p1/z, z23.s, z13.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfmin z23.s, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfmin z23.s, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/bfmin.s b/llvm/test/MC/AArch64/SVE2p1/bfmin.s new file mode 100644 index 0000000000000..1bb3a0e6f1f26 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfmin.s @@ -0,0 +1,54 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sve2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p1,+b16b16 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +movprfx z23.h, p3/m, z31.h +bfmin z23.h, p3/m, z23.h, z13.h // 01100101-00000111-10001101-10110111 +// CHECK-INST: movprfx z23.h, p3/m, z31.h +// CHECK-INST: bfmin z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x07,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65078db7 + +movprfx z23, z31 +bfmin z23.h, p3/m, z23.h, z13.h // 01100101-00000111-10001101-10110111 +// CHECK-INST: movprfx z23, z31 +// CHECK-INST: bfmin z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x07,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65078db7 + +bfmin z0.h, p0/m, z0.h, z0.h // 01100101-00000111-10000000-00000000 +// CHECK-INST: bfmin z0.h, p0/m, z0.h, z0.h +// CHECK-ENCODING: [0x00,0x80,0x07,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65078000 + +bfmin z21.h, p5/m, z21.h, z10.h // 01100101-00000111-10010101-01010101 +// CHECK-INST: bfmin z21.h, p5/m, z21.h, z10.h +// CHECK-ENCODING: [0x55,0x95,0x07,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65079555 + +bfmin z23.h, p3/m, z23.h, z13.h // 01100101-00000111-10001101-10110111 +// CHECK-INST: bfmin z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x07,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65078db7 + +bfmin z31.h, p7/m, z31.h, z31.h // 01100101-00000111-10011111-11111111 +// CHECK-INST: bfmin z31.h, p7/m, z31.h, z31.h +// CHECK-ENCODING: [0xff,0x9f,0x07,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65079fff + diff --git a/llvm/test/MC/AArch64/SVE2p1/bfminnm-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/bfminnm-diagnostics.s new file mode 100644 index 0000000000000..68c4211afa627 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfminnm-diagnostics.s @@ -0,0 +1,27 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid predicate register + +bfminnm z23.h, p8/m, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: bfminnm z23.h, p8/m, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfminnm z23.h, p1/z, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bfminnm z23.h, p1/z, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector suffix + +bfminnm z23.h, p1/z, z23.s, z13.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bfminnm z23.h, p1/z, z23.s, z13.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfminnm z23.s, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfminnm z23.s, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/bfminnm.s b/llvm/test/MC/AArch64/SVE2p1/bfminnm.s new file mode 100644 index 0000000000000..9f444c7ac26ae --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfminnm.s @@ -0,0 +1,54 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sve2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p1,+b16b16 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +movprfx z23.h, p3/m, z31.h +bfminnm z23.h, p3/m, z23.h, z13.h // 01100101-00000101-10001101-10110111 +// CHECK-INST: movprfx z23.h, p3/m, z31.h +// CHECK-INST: bfminnm z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x05,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65058db7 + +movprfx z23, z31 +bfminnm z23.h, p3/m, z23.h, z13.h // 01100101-00000101-10001101-10110111 +// CHECK-INST: movprfx z23, z31 +// CHECK-INST: bfminnm z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x05,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65058db7 + +bfminnm z0.h, p0/m, z0.h, z0.h // 01100101-00000101-10000000-00000000 +// CHECK-INST: bfminnm z0.h, p0/m, z0.h, z0.h +// CHECK-ENCODING: [0x00,0x80,0x05,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65058000 + +bfminnm z21.h, p5/m, z21.h, z10.h // 01100101-00000101-10010101-01010101 +// CHECK-INST: bfminnm z21.h, p5/m, z21.h, z10.h +// CHECK-ENCODING: [0x55,0x95,0x05,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65059555 + +bfminnm z23.h, p3/m, z23.h, z13.h // 01100101-00000101-10001101-10110111 +// CHECK-INST: bfminnm z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x05,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65058db7 + +bfminnm z31.h, p7/m, z31.h, z31.h // 01100101-00000101-10011111-11111111 +// CHECK-INST: bfminnm z31.h, p7/m, z31.h, z31.h +// CHECK-ENCODING: [0xff,0x9f,0x05,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65059fff + diff --git a/llvm/test/MC/AArch64/SVE2p1/bfmla-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/bfmla-diagnostics.s new file mode 100644 index 0000000000000..035f2898e2b92 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfmla-diagnostics.s @@ -0,0 +1,41 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector lane index + +bfmla z0.h, z0.h, z0.h[8] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7]. +// CHECK-NEXT: bfmla z0.h, z0.h, z0.h[8] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfmla z0.h, z0.h, z0.h[-1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7]. +// CHECK-NEXT: bfmla z0.h, z0.h, z0.h[-1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfmla z0.h, z0.h, z8.h[2] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z7.h +// CHECK-NEXT: bfmla z0.h, z0.h, z8.h[2] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector suffix + +bfmla z0.h, z0.s, z0.s[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfmla z0.h, z0.s, z0.s[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfmla z23.s, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfmla z23.s, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid use of movprfx + +movprfx z23.h, p1/m, z31.h +bfmla z23.h, z12.h, z0.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx +// CHECK-NEXT: bfmla z23.h, z12.h, z0.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/bfmla.s b/llvm/test/MC/AArch64/SVE2p1/bfmla.s new file mode 100644 index 0000000000000..ff257830a13da --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfmla.s @@ -0,0 +1,87 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sve2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p1,+b16b16 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +movprfx z23, z31 +bfmla z23.h, z13.h, z0.h[5] // 01100100-01101000-00001001-10110111 +// CHECK-INST: movprfx z23, z31 +// CHECK-INST: bfmla z23.h, z13.h, z0.h[5] +// CHECK-ENCODING: [0xb7,0x09,0x68,0x64] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 646809b7 + +bfmla z0.h, z0.h, z0.h[0] // 01100100-00100000-00001000-00000000 +// CHECK-INST: bfmla z0.h, z0.h, z0.h[0] +// CHECK-ENCODING: [0x00,0x08,0x20,0x64] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 64200800 + +bfmla z21.h, z10.h, z5.h[6] // 01100100-01110101-00001001-01010101 +// CHECK-INST: bfmla z21.h, z10.h, z5.h[6] +// CHECK-ENCODING: [0x55,0x09,0x75,0x64] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 64750955 + +bfmla z23.h, z13.h, z0.h[5] // 01100100-01101000-00001001-10110111 +// CHECK-INST: bfmla z23.h, z13.h, z0.h[5] +// CHECK-ENCODING: [0xb7,0x09,0x68,0x64] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 646809b7 + +bfmla z31.h, z31.h, z7.h[7] // 01100100-01111111-00001011-11111111 +// CHECK-INST: bfmla z31.h, z31.h, z7.h[7] +// CHECK-ENCODING: [0xff,0x0b,0x7f,0x64] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 647f0bff + + +movprfx z23.h, p3/m, z31.h +bfmla z23.h, p3/m, z13.h, z8.h // 01100101-00101000-00001101-10110111 +// CHECK-INST: movprfx z23.h, p3/m, z31.h +// CHECK-INST: bfmla z23.h, p3/m, z13.h, z8.h +// CHECK-ENCODING: [0xb7,0x0d,0x28,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65280db7 + +movprfx z23, z31 +bfmla z23.h, p3/m, z13.h, z8.h // 01100101-00101000-00001101-10110111 +// CHECK-INST: movprfx z23, z31 +// CHECK-INST: bfmla z23.h, p3/m, z13.h, z8.h +// CHECK-ENCODING: [0xb7,0x0d,0x28,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65280db7 + +bfmla z0.h, p0/m, z0.h, z0.h // 01100101-00100000-00000000-00000000 +// CHECK-INST: bfmla z0.h, p0/m, z0.h, z0.h +// CHECK-ENCODING: [0x00,0x00,0x20,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65200000 + +bfmla z21.h, p5/m, z10.h, z21.h // 01100101-00110101-00010101-01010101 +// CHECK-INST: bfmla z21.h, p5/m, z10.h, z21.h +// CHECK-ENCODING: [0x55,0x15,0x35,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65351555 + +bfmla z23.h, p3/m, z13.h, z8.h // 01100101-00101000-00001101-10110111 +// CHECK-INST: bfmla z23.h, p3/m, z13.h, z8.h +// CHECK-ENCODING: [0xb7,0x0d,0x28,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65280db7 + +bfmla z31.h, p7/m, z31.h, z31.h // 01100101-00111111-00011111-11111111 +// CHECK-INST: bfmla z31.h, p7/m, z31.h, z31.h +// CHECK-ENCODING: [0xff,0x1f,0x3f,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 653f1fff + diff --git a/llvm/test/MC/AArch64/SVE2p1/bfmls-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/bfmls-diagnostics.s new file mode 100644 index 0000000000000..cbc7efe9df7aa --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfmls-diagnostics.s @@ -0,0 +1,41 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector lane index + +bfmls z0.h, z0.h, z0.h[8] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7]. +// CHECK-NEXT: bfmls z0.h, z0.h, z0.h[8] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfmls z0.h, z0.h, z0.h[-1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7]. +// CHECK-NEXT: bfmls z0.h, z0.h, z0.h[-1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfmls z0.h, z0.h, z8.h[2] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z7.h +// CHECK-NEXT: bfmls z0.h, z0.h, z8.h[2] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector suffix + +bfmls z0.h, z0.s, z0.s[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfmls z0.h, z0.s, z0.s[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfmls z23.s, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfmls z23.s, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid use of movprfx + +movprfx z23.h, p1/m, z31.h +bfmls z23.h, z12.h, z0.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx, suggest using unpredicated movprfx +// CHECK-NEXT: bfmls z23.h, z12.h, z0.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/bfmls.s b/llvm/test/MC/AArch64/SVE2p1/bfmls.s new file mode 100644 index 0000000000000..c153b56b9586b --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfmls.s @@ -0,0 +1,87 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sve2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p1,+b16b16 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +movprfx z23, z31 +bfmls z23.h, z13.h, z0.h[5] // 01100100-01101000-00001101-10110111 +// CHECK-INST: movprfx z23, z31 +// CHECK-INST: bfmls z23.h, z13.h, z0.h[5] +// CHECK-ENCODING: [0xb7,0x0d,0x68,0x64] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 64680db7 + +bfmls z0.h, z0.h, z0.h[0] // 01100100-00100000-00001100-00000000 +// CHECK-INST: bfmls z0.h, z0.h, z0.h[0] +// CHECK-ENCODING: [0x00,0x0c,0x20,0x64] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 64200c00 + +bfmls z21.h, z10.h, z5.h[6] // 01100100-01110101-00001101-01010101 +// CHECK-INST: bfmls z21.h, z10.h, z5.h[6] +// CHECK-ENCODING: [0x55,0x0d,0x75,0x64] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 64750d55 + +bfmls z23.h, z13.h, z0.h[5] // 01100100-01101000-00001101-10110111 +// CHECK-INST: bfmls z23.h, z13.h, z0.h[5] +// CHECK-ENCODING: [0xb7,0x0d,0x68,0x64] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 64680db7 + +bfmls z31.h, z31.h, z7.h[7] // 01100100-01111111-00001111-11111111 +// CHECK-INST: bfmls z31.h, z31.h, z7.h[7] +// CHECK-ENCODING: [0xff,0x0f,0x7f,0x64] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 647f0fff + + +movprfx z23.h, p3/m, z31.h +bfmls z23.h, p3/m, z13.h, z8.h // 01100101-00101000-00101101-10110111 +// CHECK-INST: movprfx z23.h, p3/m, z31.h +// CHECK-INST: bfmls z23.h, p3/m, z13.h, z8.h +// CHECK-ENCODING: [0xb7,0x2d,0x28,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65282db7 + +movprfx z23, z31 +bfmls z23.h, p3/m, z13.h, z8.h // 01100101-00101000-00101101-10110111 +// CHECK-INST: movprfx z23, z31 +// CHECK-INST: bfmls z23.h, p3/m, z13.h, z8.h +// CHECK-ENCODING: [0xb7,0x2d,0x28,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65282db7 + +bfmls z0.h, p0/m, z0.h, z0.h // 01100101-00100000-00100000-00000000 +// CHECK-INST: bfmls z0.h, p0/m, z0.h, z0.h +// CHECK-ENCODING: [0x00,0x20,0x20,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65202000 + +bfmls z21.h, p5/m, z10.h, z21.h // 01100101-00110101-00110101-01010101 +// CHECK-INST: bfmls z21.h, p5/m, z10.h, z21.h +// CHECK-ENCODING: [0x55,0x35,0x35,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65353555 + +bfmls z23.h, p3/m, z13.h, z8.h // 01100101-00101000-00101101-10110111 +// CHECK-INST: bfmls z23.h, p3/m, z13.h, z8.h +// CHECK-ENCODING: [0xb7,0x2d,0x28,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65282db7 + +bfmls z31.h, p7/m, z31.h, z31.h // 01100101-00111111-00111111-11111111 +// CHECK-INST: bfmls z31.h, p7/m, z31.h, z31.h +// CHECK-ENCODING: [0xff,0x3f,0x3f,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 653f3fff + diff --git a/llvm/test/MC/AArch64/SVE2p1/bfmlslb.s b/llvm/test/MC/AArch64/SVE2p1/bfmlslb.s index b1109838b88b4..127b54cb240e4 100644 --- a/llvm/test/MC/AArch64/SVE2p1/bfmlslb.s +++ b/llvm/test/MC/AArch64/SVE2p1/bfmlslb.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/bfmlslt.s b/llvm/test/MC/AArch64/SVE2p1/bfmlslt.s index bbb8b15964797..28fa3759771bb 100644 --- a/llvm/test/MC/AArch64/SVE2p1/bfmlslt.s +++ b/llvm/test/MC/AArch64/SVE2p1/bfmlslt.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/bfmul-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/bfmul-diagnostics.s new file mode 100644 index 0000000000000..51adae0689603 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfmul-diagnostics.s @@ -0,0 +1,36 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid predicate register + +bfmul z23.h, p8/m, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: bfmul z23.h, p8/m, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfmul z23.h, p1/z, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bfmul z23.h, p1/z, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector suffix + +bfmul z23.h, p1/m, z23.s, z13.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfmul z23.h, p1/m, z23.s, z13.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfmul z23.s, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfmul z23.s, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid use of movprfx + +movprfx z23.h, p1/m, z31.h +bfmul z23.h, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: bfmul z23.h, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/bfmul.s b/llvm/test/MC/AArch64/SVE2p1/bfmul.s new file mode 100644 index 0000000000000..e0b93bcbb1035 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfmul.s @@ -0,0 +1,101 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sve2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p1,+b16b16 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +bfmul z0.h, z0.h, z0.h[0] // 01100100-00100000-00101000-00000000 +// CHECK-INST: bfmul z0.h, z0.h, z0.h[0] +// CHECK-ENCODING: [0x00,0x28,0x20,0x64] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 64202800 + +bfmul z21.h, z10.h, z5.h[6] // 01100100-01110101-00101001-01010101 +// CHECK-INST: bfmul z21.h, z10.h, z5.h[6] +// CHECK-ENCODING: [0x55,0x29,0x75,0x64] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 64752955 + +bfmul z23.h, z13.h, z0.h[5] // 01100100-01101000-00101001-10110111 +// CHECK-INST: bfmul z23.h, z13.h, z0.h[5] +// CHECK-ENCODING: [0xb7,0x29,0x68,0x64] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 646829b7 + +bfmul z31.h, z31.h, z7.h[7] // 01100100-01111111-00101011-11111111 +// CHECK-INST: bfmul z31.h, z31.h, z7.h[7] +// CHECK-ENCODING: [0xff,0x2b,0x7f,0x64] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 647f2bff + +movprfx z23.h, p3/m, z31.h +bfmul z23.h, p3/m, z23.h, z13.h // 01100101-00000010-10001101-10110111 +// CHECK-INST: movprfx z23.h, p3/m, z31.h +// CHECK-INST: bfmul z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x02,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65028db7 + +movprfx z23, z31 +bfmul z23.h, p3/m, z23.h, z13.h // 01100101-00000010-10001101-10110111 +// CHECK-INST: movprfx z23, z31 +// CHECK-INST: bfmul z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x02,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65028db7 + +bfmul z0.h, p0/m, z0.h, z0.h // 01100101-00000010-10000000-00000000 +// CHECK-INST: bfmul z0.h, p0/m, z0.h, z0.h +// CHECK-ENCODING: [0x00,0x80,0x02,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65028000 + +bfmul z21.h, p5/m, z21.h, z10.h // 01100101-00000010-10010101-01010101 +// CHECK-INST: bfmul z21.h, p5/m, z21.h, z10.h +// CHECK-ENCODING: [0x55,0x95,0x02,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65029555 + +bfmul z23.h, p3/m, z23.h, z13.h // 01100101-00000010-10001101-10110111 +// CHECK-INST: bfmul z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x02,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65028db7 + +bfmul z31.h, p7/m, z31.h, z31.h // 01100101-00000010-10011111-11111111 +// CHECK-INST: bfmul z31.h, p7/m, z31.h, z31.h +// CHECK-ENCODING: [0xff,0x9f,0x02,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65029fff + +bfmul z0.h, z0.h, z0.h // 01100101-00000000-00001000-00000000 +// CHECK-INST: bfmul z0.h, z0.h, z0.h +// CHECK-ENCODING: [0x00,0x08,0x00,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65000800 + +bfmul z21.h, z10.h, z21.h // 01100101-00010101-00001001-01010101 +// CHECK-INST: bfmul z21.h, z10.h, z21.h +// CHECK-ENCODING: [0x55,0x09,0x15,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65150955 + +bfmul z23.h, z13.h, z8.h // 01100101-00001000-00001001-10110111 +// CHECK-INST: bfmul z23.h, z13.h, z8.h +// CHECK-ENCODING: [0xb7,0x09,0x08,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 650809b7 + +bfmul z31.h, z31.h, z31.h // 01100101-00011111-00001011-11111111 +// CHECK-INST: bfmul z31.h, z31.h, z31.h +// CHECK-ENCODING: [0xff,0x0b,0x1f,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 651f0bff + diff --git a/llvm/test/MC/AArch64/SVE2p1/bfsub-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/bfsub-diagnostics.s new file mode 100644 index 0000000000000..86cb32075f501 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfsub-diagnostics.s @@ -0,0 +1,36 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid predicate register + +bfsub z23.h, p8/m, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: bfsub z23.h, p8/m, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfsub z23.h, p1/z, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bfsub z23.h, p1/z, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector suffix + +bfsub z23.h, p1/m, z23.s, z13.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfsub z23.h, p1/m, z23.s, z13.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfsub z23.s, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfsub z23.s, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid use of movprfx + +movprfx z23.h, p1/m, z31.h +bfsub z23.h, z23.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: bfsub z23.h, z23.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/bfsub.s b/llvm/test/MC/AArch64/SVE2p1/bfsub.s new file mode 100644 index 0000000000000..42cb6772c3a51 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/bfsub.s @@ -0,0 +1,76 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sve2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p1,+b16b16 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +movprfx z23.h, p3/m, z31.h +bfsub z23.h, p3/m, z23.h, z13.h // 01100101-00000001-10001101-10110111 +// CHECK-INST: movprfx z23.h, p3/m, z31.h +// CHECK-INST: bfsub z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x01,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65018db7 + +movprfx z23, z31 +bfsub z23.h, p3/m, z23.h, z13.h // 01100101-00000001-10001101-10110111 +// CHECK-INST: movprfx z23, z31 +// CHECK-INST: bfsub z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x01,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65018db7 + +bfsub z0.h, p0/m, z0.h, z0.h // 01100101-00000001-10000000-00000000 +// CHECK-INST: bfsub z0.h, p0/m, z0.h, z0.h +// CHECK-ENCODING: [0x00,0x80,0x01,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65018000 + +bfsub z21.h, p5/m, z21.h, z10.h // 01100101-00000001-10010101-01010101 +// CHECK-INST: bfsub z21.h, p5/m, z21.h, z10.h +// CHECK-ENCODING: [0x55,0x95,0x01,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65019555 + +bfsub z23.h, p3/m, z23.h, z13.h // 01100101-00000001-10001101-10110111 +// CHECK-INST: bfsub z23.h, p3/m, z23.h, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x01,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65018db7 + +bfsub z31.h, p7/m, z31.h, z31.h // 01100101-00000001-10011111-11111111 +// CHECK-INST: bfsub z31.h, p7/m, z31.h, z31.h +// CHECK-ENCODING: [0xff,0x9f,0x01,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65019fff + +bfsub z0.h, z0.h, z0.h // 01100101-00000000-00000100-00000000 +// CHECK-INST: bfsub z0.h, z0.h, z0.h +// CHECK-ENCODING: [0x00,0x04,0x00,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65000400 + +bfsub z21.h, z10.h, z21.h // 01100101-00010101-00000101-01010101 +// CHECK-INST: bfsub z21.h, z10.h, z21.h +// CHECK-ENCODING: [0x55,0x05,0x15,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 65150555 + +bfsub z23.h, z13.h, z8.h // 01100101-00001000-00000101-10110111 +// CHECK-INST: bfsub z23.h, z13.h, z8.h +// CHECK-ENCODING: [0xb7,0x05,0x08,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 650805b7 + +bfsub z31.h, z31.h, z31.h // 01100101-00011111-00000111-11111111 +// CHECK-INST: bfsub z31.h, z31.h, z31.h +// CHECK-ENCODING: [0xff,0x07,0x1f,0x65] +// CHECK-ERROR: instruction requires: b16b16 sme2p1 or sve2p1 +// CHECK-UNKNOWN: 651f07ff diff --git a/llvm/test/MC/AArch64/SVE2p1/cntp.s b/llvm/test/MC/AArch64/SVE2p1/cntp.s index 817c5f05dddb0..6f6c6a0bc612a 100644 --- a/llvm/test/MC/AArch64/SVE2p1/cntp.s +++ b/llvm/test/MC/AArch64/SVE2p1/cntp.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/fclamp.s b/llvm/test/MC/AArch64/SVE2p1/fclamp.s index bd52cba1d31fc..8512d6077699d 100644 --- a/llvm/test/MC/AArch64/SVE2p1/fclamp.s +++ b/llvm/test/MC/AArch64/SVE2p1/fclamp.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/fdot.s b/llvm/test/MC/AArch64/SVE2p1/fdot.s index 3677adc444fe2..9005e1f7f0eb2 100644 --- a/llvm/test/MC/AArch64/SVE2p1/fdot.s +++ b/llvm/test/MC/AArch64/SVE2p1/fdot.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/ld1b.s b/llvm/test/MC/AArch64/SVE2p1/ld1b.s index 2692e1684af11..7e4b9ff9ee7d7 100644 --- a/llvm/test/MC/AArch64/SVE2p1/ld1b.s +++ b/llvm/test/MC/AArch64/SVE2p1/ld1b.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/ld1d.s b/llvm/test/MC/AArch64/SVE2p1/ld1d.s index 2edfea2a72b8e..387e10ee283a8 100644 --- a/llvm/test/MC/AArch64/SVE2p1/ld1d.s +++ b/llvm/test/MC/AArch64/SVE2p1/ld1d.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/ld1d_q-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/ld1d_q-diagnostics.s new file mode 100644 index 0000000000000..7860d9a610d01 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/ld1d_q-diagnostics.s @@ -0,0 +1,32 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid predicate register + +ld1d {z0.q}, p8/z, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: ld1d {z0.q}, p8/z, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ld1d {z23.q}, p2/m, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: ld1d {z23.q}, p2/m, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ld1d {z23.q}, p2.q, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: ld1d {z23.q}, p2.q, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid immediate range + +ld1d {z0.q}, p0/z, [x0, #-9, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be an integer in range [-8, 7]. +// CHECK-NEXT: ld1d {z0.q}, p0/z, [x0, #-9, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ld1d {z3.q}, p0/z, [x0, #8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be an integer in range [-8, 7]. +// CHECK-NEXT: ld1d {z3.q}, p0/z, [x0, #8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/ld1d_q.s b/llvm/test/MC/AArch64/SVE2p1/ld1d_q.s new file mode 100644 index 0000000000000..12ecde8dc80f8 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/ld1d_q.s @@ -0,0 +1,73 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1 < %s \ +// RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sve2p1 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p1 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +ld1d {z0.q}, p0/z, [x0, x0, lsl #3] // 10100101-10000000-10000000-00000000 +// CHECK-INST: ld1d { z0.q }, p0/z, [x0, x0, lsl #3] +// CHECK-ENCODING: [0x00,0x80,0x80,0xa5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: a5808000 + +ld1d {z21.q}, p5/z, [x10, x21, lsl #3] // 10100101-10010101-10010101-01010101 +// CHECK-INST: ld1d { z21.q }, p5/z, [x10, x21, lsl #3] +// CHECK-ENCODING: [0x55,0x95,0x95,0xa5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: a5959555 + +ld1d {z23.q}, p3/z, [x13, x8, lsl #3] // 10100101-10001000-10001101-10110111 +// CHECK-INST: ld1d { z23.q }, p3/z, [x13, x8, lsl #3] +// CHECK-ENCODING: [0xb7,0x8d,0x88,0xa5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: a5888db7 + +ld1d z23.q, p3/z, [x13, x8, lsl #3] // 10100101-10001000-10001101-10110111 +// CHECK-INST: ld1d { z23.q }, p3/z, [x13, x8, lsl #3] +// CHECK-ENCODING: [0xb7,0x8d,0x88,0xa5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: a5888db7 + +ld1d {z0.q}, p0/z, [x0] // 10100101-10010000-00100000-00000000 +// CHECK-INST: ld1d { z0.q }, p0/z, [x0] +// CHECK-ENCODING: [0x00,0x20,0x90,0xa5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: a5902000 + +ld1d z0.q, p0/z, [x0] // 10100101-10010000-00100000-00000000 +// CHECK-INST: ld1d { z0.q }, p0/z, [x0] +// CHECK-ENCODING: [0x00,0x20,0x90,0xa5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: a5902000 + +ld1d {z21.q}, p5/z, [x10, #5, mul vl] // 10100101-10010101-00110101-01010101 +// CHECK-INST: ld1d { z21.q }, p5/z, [x10, #5, mul vl] +// CHECK-ENCODING: [0x55,0x35,0x95,0xa5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: a5953555 + +ld1d {z23.q}, p3/z, [x13, #-8, mul vl] // 10100101-10011000-00101101-10110111 +// CHECK-INST: ld1d { z23.q }, p3/z, [x13, #-8, mul vl] +// CHECK-ENCODING: [0xb7,0x2d,0x98,0xa5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: a5982db7 + +ld1d {z31.q}, p7/z, [sp, #-1, mul vl] // 10100101-10011111-00111111-11111111 +// CHECK-INST: ld1d { z31.q }, p7/z, [sp, #-1, mul vl] +// CHECK-ENCODING: [0xff,0x3f,0x9f,0xa5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: a59f3fff + +ld1d z31.q, p7/z, [sp, #-1, mul vl] // 10100101-10011111-00111111-11111111 +// CHECK-INST: ld1d { z31.q }, p7/z, [sp, #-1, mul vl] +// CHECK-ENCODING: [0xff,0x3f,0x9f,0xa5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: a59f3fff diff --git a/llvm/test/MC/AArch64/SVE2p1/ld1h.s b/llvm/test/MC/AArch64/SVE2p1/ld1h.s index 0146fb11caa08..833c940af5ef8 100644 --- a/llvm/test/MC/AArch64/SVE2p1/ld1h.s +++ b/llvm/test/MC/AArch64/SVE2p1/ld1h.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/ld1q.s b/llvm/test/MC/AArch64/SVE2p1/ld1q.s index dc6e904d0fa0a..2adc657497d58 100644 --- a/llvm/test/MC/AArch64/SVE2p1/ld1q.s +++ b/llvm/test/MC/AArch64/SVE2p1/ld1q.s @@ -5,7 +5,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1 < %s \ // RUN: | llvm-objdump -d --mattr=+sve2p1 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p1 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/ld1w.s b/llvm/test/MC/AArch64/SVE2p1/ld1w.s index 2849ee106ba00..177b0a88be86b 100644 --- a/llvm/test/MC/AArch64/SVE2p1/ld1w.s +++ b/llvm/test/MC/AArch64/SVE2p1/ld1w.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/ld1w_q-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/ld1w_q-diagnostics.s new file mode 100644 index 0000000000000..f049add93eb75 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/ld1w_q-diagnostics.s @@ -0,0 +1,32 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid predicate register + +ld1w {z0.q}, p8/z, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: ld1w {z0.q}, p8/z, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ld1w {z23.q}, p2/m, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: ld1w {z23.q}, p2/m, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ld1w {z23.q}, p2.q, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: ld1w {z23.q}, p2.q, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid immediate range + +ld1w {z0.q}, p0/z, [x0, #-9, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be an integer in range [-8, 7]. +// CHECK-NEXT: ld1w {z0.q}, p0/z, [x0, #-9, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ld1w {z3.q}, p0/z, [x0, #8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be an integer in range [-8, 7]. +// CHECK-NEXT: ld1w {z3.q}, p0/z, [x0, #8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/ld1w_q.s b/llvm/test/MC/AArch64/SVE2p1/ld1w_q.s new file mode 100644 index 0000000000000..9450ac5b0fe92 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/ld1w_q.s @@ -0,0 +1,62 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1 < %s \ +// RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sve2p1 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p1 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +ld1w {z0.q}, p0/z, [x0, x0, lsl #2] // 10100101-00000000-10000000-00000000 +// CHECK-INST: ld1w { z0.q }, p0/z, [x0, x0, lsl #2] +// CHECK-ENCODING: [0x00,0x80,0x00,0xa5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: a5008000 + +ld1w {z21.q}, p5/z, [x10, x21, lsl #2] // 10100101-00010101-10010101-01010101 +// CHECK-INST: ld1w { z21.q }, p5/z, [x10, x21, lsl #2] +// CHECK-ENCODING: [0x55,0x95,0x15,0xa5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: a5159555 + +ld1w {z23.q}, p3/z, [x13, x8, lsl #2] // 10100101-00001000-10001101-10110111 +// CHECK-INST: ld1w { z23.q }, p3/z, [x13, x8, lsl #2] +// CHECK-ENCODING: [0xb7,0x8d,0x08,0xa5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: a5088db7 + +ld1w z23.q, p3/z, [x13, x8, lsl #2] // 10100101-00001000-10001101-10110111 +// CHECK-INST: ld1w { z23.q }, p3/z, [x13, x8, lsl #2] +// CHECK-ENCODING: [0xb7,0x8d,0x08,0xa5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: a5088db7 + +ld1w {z0.q}, p0/z, [x0] // 10100101-00010000-00100000-00000000 +// CHECK-INST: ld1w { z0.q }, p0/z, [x0] +// CHECK-ENCODING: [0x00,0x20,0x10,0xa5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: a5102000 + +ld1w {z21.q}, p5/z, [x10, #5, mul vl] // 10100101-00010101-00110101-01010101 +// CHECK-INST: ld1w { z21.q }, p5/z, [x10, #5, mul vl] +// CHECK-ENCODING: [0x55,0x35,0x15,0xa5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: a5153555 + +ld1w {z23.q}, p3/z, [x13, #-8, mul vl] // 10100101-00011000-00101101-10110111 +// CHECK-INST: ld1w { z23.q }, p3/z, [x13, #-8, mul vl] +// CHECK-ENCODING: [0xb7,0x2d,0x18,0xa5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: a5182db7 + +ld1w {z31.q}, p7/z, [sp, #-1, mul vl] // 10100101-00011111-00111111-11111111 +// CHECK-INST: ld1w { z31.q }, p7/z, [sp, #-1, mul vl] +// CHECK-ENCODING: [0xff,0x3f,0x1f,0xa5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: a51f3fff + diff --git a/llvm/test/MC/AArch64/SVE2p1/ldnt1b.s b/llvm/test/MC/AArch64/SVE2p1/ldnt1b.s index 4389f84bba6a9..e582b94730e60 100644 --- a/llvm/test/MC/AArch64/SVE2p1/ldnt1b.s +++ b/llvm/test/MC/AArch64/SVE2p1/ldnt1b.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/ldnt1d.s b/llvm/test/MC/AArch64/SVE2p1/ldnt1d.s index b459309405b5a..7ee3a20c1209d 100644 --- a/llvm/test/MC/AArch64/SVE2p1/ldnt1d.s +++ b/llvm/test/MC/AArch64/SVE2p1/ldnt1d.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/ldnt1h.s b/llvm/test/MC/AArch64/SVE2p1/ldnt1h.s index 7027efeb2442c..acaca8f4d4f98 100644 --- a/llvm/test/MC/AArch64/SVE2p1/ldnt1h.s +++ b/llvm/test/MC/AArch64/SVE2p1/ldnt1h.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/ldnt1w.s b/llvm/test/MC/AArch64/SVE2p1/ldnt1w.s index 662d2c54ec3ab..fffc1e81750f8 100644 --- a/llvm/test/MC/AArch64/SVE2p1/ldnt1w.s +++ b/llvm/test/MC/AArch64/SVE2p1/ldnt1w.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/pext.s b/llvm/test/MC/AArch64/SVE2p1/pext.s index cefade005a6ca..86a79316a1e6c 100644 --- a/llvm/test/MC/AArch64/SVE2p1/pext.s +++ b/llvm/test/MC/AArch64/SVE2p1/pext.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/ptrue.s b/llvm/test/MC/AArch64/SVE2p1/ptrue.s index ae2a3456ff3cb..c499460ea9a13 100644 --- a/llvm/test/MC/AArch64/SVE2p1/ptrue.s +++ b/llvm/test/MC/AArch64/SVE2p1/ptrue.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/sdot.s b/llvm/test/MC/AArch64/SVE2p1/sdot.s index 3f5517ae7f26a..13d4e2d08cf61 100644 --- a/llvm/test/MC/AArch64/SVE2p1/sdot.s +++ b/llvm/test/MC/AArch64/SVE2p1/sdot.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/sqcvtn.s b/llvm/test/MC/AArch64/SVE2p1/sqcvtn.s index 5bb66364d4907..b50e2ff47afad 100644 --- a/llvm/test/MC/AArch64/SVE2p1/sqcvtn.s +++ b/llvm/test/MC/AArch64/SVE2p1/sqcvtn.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/sqcvtun.s b/llvm/test/MC/AArch64/SVE2p1/sqcvtun.s index 573da67149986..b17e6a4757788 100644 --- a/llvm/test/MC/AArch64/SVE2p1/sqcvtun.s +++ b/llvm/test/MC/AArch64/SVE2p1/sqcvtun.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/sqrshrn.s b/llvm/test/MC/AArch64/SVE2p1/sqrshrn.s index 1ee4e7aa75da9..4ae7bd32b1b69 100644 --- a/llvm/test/MC/AArch64/SVE2p1/sqrshrn.s +++ b/llvm/test/MC/AArch64/SVE2p1/sqrshrn.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/sqrshrun.s b/llvm/test/MC/AArch64/SVE2p1/sqrshrun.s index ba47e02e297ff..c88b430b18e92 100644 --- a/llvm/test/MC/AArch64/SVE2p1/sqrshrun.s +++ b/llvm/test/MC/AArch64/SVE2p1/sqrshrun.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/st1b.s b/llvm/test/MC/AArch64/SVE2p1/st1b.s index eeadf4799fe90..9293d1fbe272c 100644 --- a/llvm/test/MC/AArch64/SVE2p1/st1b.s +++ b/llvm/test/MC/AArch64/SVE2p1/st1b.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/st1d.s b/llvm/test/MC/AArch64/SVE2p1/st1d.s index 4b9451e92b85b..367bc7d43e6b1 100644 --- a/llvm/test/MC/AArch64/SVE2p1/st1d.s +++ b/llvm/test/MC/AArch64/SVE2p1/st1d.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/st1d_q-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/st1d_q-diagnostics.s new file mode 100644 index 0000000000000..4ad52197095dc --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/st1d_q-diagnostics.s @@ -0,0 +1,33 @@ +-26 +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid predicate register + +st1d {z0.q}, p8, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: st1d {z0.q}, p8, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +st1d {z23.q}, p2/m, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: st1d {z23.q}, p2/m, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +st1d {z23.q}, p2.q, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: st1d {z23.q}, p2.q, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid immediate range + +st1d {z0.q}, p0, [x0, #-9, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be an integer in range [-8, 7]. +// CHECK-NEXT: st1d {z0.q}, p0, [x0, #-9, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +st1d {z3.q}, p0, [x0, #8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be an integer in range [-8, 7]. +// CHECK-NEXT: st1d {z3.q}, p0, [x0, #8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/st1d_q.s b/llvm/test/MC/AArch64/SVE2p1/st1d_q.s new file mode 100644 index 0000000000000..52d1f1635bf34 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/st1d_q.s @@ -0,0 +1,74 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1 < %s \ +// RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sve2p1 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p1 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +st1d {z0.q}, p0, [x0, x0, lsl #3] // 11100101-11000000-01000000-00000000 +// CHECK-INST: st1d { z0.q }, p0, [x0, x0, lsl #3] +// CHECK-ENCODING: [0x00,0x40,0xc0,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e5c04000 + +st1d {z21.q}, p5, [x10, x21, lsl #3] // 11100101-11010101-01010101-01010101 +// CHECK-INST: st1d { z21.q }, p5, [x10, x21, lsl #3] +// CHECK-ENCODING: [0x55,0x55,0xd5,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e5d55555 + +st1d {z23.q}, p3, [x13, x8, lsl #3] // 11100101-11001000-01001101-10110111 +// CHECK-INST: st1d { z23.q }, p3, [x13, x8, lsl #3] +// CHECK-ENCODING: [0xb7,0x4d,0xc8,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e5c84db7 + +st1d z23.q, p3, [x13, x8, lsl #3] // 11100101-11001000-01001101-10110111 +// CHECK-INST: st1d { z23.q }, p3, [x13, x8, lsl #3] +// CHECK-ENCODING: [0xb7,0x4d,0xc8,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e5c84db7 + +st1d {z0.q}, p0, [x0] // 11100101-11000000-11100000-00000000 +// CHECK-INST: st1d { z0.q }, p0, [x0] +// CHECK-ENCODING: [0x00,0xe0,0xc0,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e5c0e000 + +st1d z0.q, p0, [x0] // 11100101-11000000-11100000-00000000 +// CHECK-INST: st1d { z0.q }, p0, [x0] +// CHECK-ENCODING: [0x00,0xe0,0xc0,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e5c0e000 + +st1d {z21.q}, p5, [x10, #5, mul vl] // 11100101-11000101-11110101-01010101 +// CHECK-INST: st1d { z21.q }, p5, [x10, #5, mul vl] +// CHECK-ENCODING: [0x55,0xf5,0xc5,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e5c5f555 + +st1d {z23.q}, p3, [x13, #-8, mul vl] // 11100101-11001000-11101101-10110111 +// CHECK-INST: st1d { z23.q }, p3, [x13, #-8, mul vl] +// CHECK-ENCODING: [0xb7,0xed,0xc8,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e5c8edb7 + +st1d {z31.q}, p7, [sp, #-1, mul vl] // 11100101-11001111-11111111-11111111 +// CHECK-INST: st1d { z31.q }, p7, [sp, #-1, mul vl] +// CHECK-ENCODING: [0xff,0xff,0xcf,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e5cfffff + +st1d z31.q, p7, [sp, #-1, mul vl] // 11100101-11001111-11111111-11111111 +// CHECK-INST: st1d { z31.q }, p7, [sp, #-1, mul vl] +// CHECK-ENCODING: [0xff,0xff,0xcf,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e5cfffff + diff --git a/llvm/test/MC/AArch64/SVE2p1/st1h.s b/llvm/test/MC/AArch64/SVE2p1/st1h.s index 2f855cb4688ff..d81900856197a 100644 --- a/llvm/test/MC/AArch64/SVE2p1/st1h.s +++ b/llvm/test/MC/AArch64/SVE2p1/st1h.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/st1q.s b/llvm/test/MC/AArch64/SVE2p1/st1q.s index 313e7d7e0f17e..5bf1892339699 100644 --- a/llvm/test/MC/AArch64/SVE2p1/st1q.s +++ b/llvm/test/MC/AArch64/SVE2p1/st1q.s @@ -5,7 +5,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1 < %s \ // RUN: | llvm-objdump -d --mattr=+sve2p1 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p1 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/st1w.s b/llvm/test/MC/AArch64/SVE2p1/st1w.s index 33703969bc023..374b03da30c2f 100644 --- a/llvm/test/MC/AArch64/SVE2p1/st1w.s +++ b/llvm/test/MC/AArch64/SVE2p1/st1w.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/st1w_q-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/st1w_q-diagnostics.s new file mode 100644 index 0000000000000..d337e62666360 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/st1w_q-diagnostics.s @@ -0,0 +1,32 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid predicate register + +st1w {z0.q}, p8, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: st1w {z0.q}, p8, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +st1w {z23.q}, p2/m, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: st1w {z23.q}, p2/m, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +st1w {z23.q}, p2.q, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: st1w {z23.q}, p2.q, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid immediate range + +st1w {z0.q}, p0, [x0, #-9, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be an integer in range [-8, 7]. +// CHECK-NEXT: st1w {z0.q}, p0, [x0, #-9, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +st1w {z3.q}, p0, [x0, #8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be an integer in range [-8, 7]. +// CHECK-NEXT: st1w {z3.q}, p0, [x0, #8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/st1w_q.s b/llvm/test/MC/AArch64/SVE2p1/st1w_q.s new file mode 100644 index 0000000000000..efb682692224e --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/st1w_q.s @@ -0,0 +1,74 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1 < %s \ +// RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sve2p1 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p1 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +st1w {z0.q}, p0, [x0, x0, lsl #2] // 11100101-00000000-01000000-00000000 +// CHECK-INST: st1w { z0.q }, p0, [x0, x0, lsl #2] +// CHECK-ENCODING: [0x00,0x40,0x00,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e5004000 + +st1w {z21.q}, p5, [x10, x21, lsl #2] // 11100101-00010101-01010101-01010101 +// CHECK-INST: st1w { z21.q }, p5, [x10, x21, lsl #2] +// CHECK-ENCODING: [0x55,0x55,0x15,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e5155555 + +st1w {z23.q}, p3, [x13, x8, lsl #2] // 11100101-00001000-01001101-10110111 +// CHECK-INST: st1w { z23.q }, p3, [x13, x8, lsl #2] +// CHECK-ENCODING: [0xb7,0x4d,0x08,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e5084db7 + +st1w z23.q, p3, [x13, x8, lsl #2] // 11100101-00001000-01001101-10110111 +// CHECK-INST: st1w { z23.q }, p3, [x13, x8, lsl #2] +// CHECK-ENCODING: [0xb7,0x4d,0x08,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e5084db7 + +st1w {z0.q}, p0, [x0] // 11100101-00000000-11100000-00000000 +// CHECK-INST: st1w { z0.q }, p0, [x0] +// CHECK-ENCODING: [0x00,0xe0,0x00,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e500e000 + +st1w z0.q, p0, [x0] // 11100101-00000000-11100000-00000000 +// CHECK-INST: st1w { z0.q }, p0, [x0] +// CHECK-ENCODING: [0x00,0xe0,0x00,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e500e000 + +st1w {z21.q}, p5, [x10, #5, mul vl] // 11100101-00000101-11110101-01010101 +// CHECK-INST: st1w { z21.q }, p5, [x10, #5, mul vl] +// CHECK-ENCODING: [0x55,0xf5,0x05,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e505f555 + +st1w {z23.q}, p3, [x13, #-8, mul vl] // 11100101-00001000-11101101-10110111 +// CHECK-INST: st1w { z23.q }, p3, [x13, #-8, mul vl] +// CHECK-ENCODING: [0xb7,0xed,0x08,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e508edb7 + +st1w {z31.q}, p7, [sp, #-1, mul vl] // 11100101-00001111-11111111-11111111 +// CHECK-INST: st1w { z31.q }, p7, [sp, #-1, mul vl] +// CHECK-ENCODING: [0xff,0xff,0x0f,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e50fffff + +st1w z31.q, p7, [sp, #-1, mul vl] // 11100101-00001111-11111111-11111111 +// CHECK-INST: st1w { z31.q }, p7, [sp, #-1, mul vl] +// CHECK-ENCODING: [0xff,0xff,0x0f,0xe5] +// CHECK-ERROR: instruction requires: sve2p1 +// CHECK-UNKNOWN: e50fffff + diff --git a/llvm/test/MC/AArch64/SVE2p1/stnt1b.s b/llvm/test/MC/AArch64/SVE2p1/stnt1b.s index 7b03e20279a51..7f0ec3160d99d 100644 --- a/llvm/test/MC/AArch64/SVE2p1/stnt1b.s +++ b/llvm/test/MC/AArch64/SVE2p1/stnt1b.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/stnt1d.s b/llvm/test/MC/AArch64/SVE2p1/stnt1d.s index 6b0215d943665..af87f2c388afb 100644 --- a/llvm/test/MC/AArch64/SVE2p1/stnt1d.s +++ b/llvm/test/MC/AArch64/SVE2p1/stnt1d.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/stnt1h.s b/llvm/test/MC/AArch64/SVE2p1/stnt1h.s index 954494c6cc330..433a3fdeea9b3 100644 --- a/llvm/test/MC/AArch64/SVE2p1/stnt1h.s +++ b/llvm/test/MC/AArch64/SVE2p1/stnt1h.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/stnt1w.s b/llvm/test/MC/AArch64/SVE2p1/stnt1w.s index fba4873f8c720..f9836869eb6c8 100644 --- a/llvm/test/MC/AArch64/SVE2p1/stnt1w.s +++ b/llvm/test/MC/AArch64/SVE2p1/stnt1w.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/udot.s b/llvm/test/MC/AArch64/SVE2p1/udot.s index c88cc631543a1..2c3628b5dbc56 100644 --- a/llvm/test/MC/AArch64/SVE2p1/udot.s +++ b/llvm/test/MC/AArch64/SVE2p1/udot.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/uqcvtn.s b/llvm/test/MC/AArch64/SVE2p1/uqcvtn.s index 68be7a1670703..701fee430dc65 100644 --- a/llvm/test/MC/AArch64/SVE2p1/uqcvtn.s +++ b/llvm/test/MC/AArch64/SVE2p1/uqcvtn.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/uqrshrn.s b/llvm/test/MC/AArch64/SVE2p1/uqrshrn.s index b68223edc5169..e8d2e1239bbda 100644 --- a/llvm/test/MC/AArch64/SVE2p1/uqrshrn.s +++ b/llvm/test/MC/AArch64/SVE2p1/uqrshrn.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --no-print-imm-hex --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/whilege.s b/llvm/test/MC/AArch64/SVE2p1/whilege.s index f1acef11f69c6..1b6f09087ef23 100644 --- a/llvm/test/MC/AArch64/SVE2p1/whilege.s +++ b/llvm/test/MC/AArch64/SVE2p1/whilege.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/whilegt.s b/llvm/test/MC/AArch64/SVE2p1/whilegt.s index 69e3bc93c0c7a..e7c2badb443d4 100644 --- a/llvm/test/MC/AArch64/SVE2p1/whilegt.s +++ b/llvm/test/MC/AArch64/SVE2p1/whilegt.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/whilehi.s b/llvm/test/MC/AArch64/SVE2p1/whilehi.s index ddb7a6829c920..2c7b7f75be7b4 100644 --- a/llvm/test/MC/AArch64/SVE2p1/whilehi.s +++ b/llvm/test/MC/AArch64/SVE2p1/whilehi.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/whilehs.s b/llvm/test/MC/AArch64/SVE2p1/whilehs.s index 0a4c50da6d4c1..f199c561d4c56 100644 --- a/llvm/test/MC/AArch64/SVE2p1/whilehs.s +++ b/llvm/test/MC/AArch64/SVE2p1/whilehs.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/whilele.s b/llvm/test/MC/AArch64/SVE2p1/whilele.s index f1824ecc8a411..44e8aab57c7c6 100644 --- a/llvm/test/MC/AArch64/SVE2p1/whilele.s +++ b/llvm/test/MC/AArch64/SVE2p1/whilele.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/whilelo.s b/llvm/test/MC/AArch64/SVE2p1/whilelo.s index 5be4b66fc8c09..9d4a8442fdfc5 100644 --- a/llvm/test/MC/AArch64/SVE2p1/whilelo.s +++ b/llvm/test/MC/AArch64/SVE2p1/whilelo.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/whilels.s b/llvm/test/MC/AArch64/SVE2p1/whilels.s index 89d98517cd498..9412373a8580f 100644 --- a/llvm/test/MC/AArch64/SVE2p1/whilels.s +++ b/llvm/test/MC/AArch64/SVE2p1/whilels.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AArch64/SVE2p1/whilelt.s b/llvm/test/MC/AArch64/SVE2p1/whilelt.s index dd0d3b77f00ae..d0d02d0fc351a 100644 --- a/llvm/test/MC/AArch64/SVE2p1/whilelt.s +++ b/llvm/test/MC/AArch64/SVE2p1/whilelt.s @@ -7,7 +7,7 @@ // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ // RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST // RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ -// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: | llvm-objdump -d --mattr=-sme2,-sve2p1 - | FileCheck %s --check-prefix=CHECK-UNKNOWN // RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ // RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ // RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx10_asm_vop3.s index d369973d56dd4..b05bab15e2008 100644 --- a/llvm/test/MC/AMDGPU/gfx10_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx10_asm_vop3.s @@ -12797,6 +12797,9 @@ v_permlane16_b32 v5, v1, 0.5, s3 v_permlane16_b32 v5, v1, -4.0, s3 // GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0xef,0x0d,0x00] +v_permlane16_b32 v5, v1, 0xaf123456, s3 +// GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] + v_permlane16_b32 v5, v1, s2, s103 // GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0x9c,0x01] @@ -12830,6 +12833,12 @@ v_permlane16_b32 v5, v1, s2, 0.5 v_permlane16_b32 v5, v1, s2, -4.0 // GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0xdc,0x03] +v_permlane16_b32 v5, v1, s2, 0xaf123456 +// GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_permlane16_b32 v5, v1, 0x12345678, 0x12345678 +// GFX10: encoding: [0x05,0x00,0x77,0xd7,0x01,0xff,0xfd,0x03,0x78,0x56,0x34,0x12] + v_permlane16_b32 v5, v1, s2, s3 op_sel:[1,0] // GFX10: encoding: [0x05,0x08,0x77,0xd7,0x01,0x05,0x0c,0x00] @@ -12923,6 +12932,9 @@ v_permlanex16_b32 v5, v1, 0.5, s3 v_permlanex16_b32 v5, v1, -4.0, s3 // GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0xef,0x0d,0x00] +v_permlanex16_b32 v5, v1, 0xaf123456, s3 +// GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] + v_permlanex16_b32 v5, v1, s2, s103 // GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0x9c,0x01] @@ -12956,6 +12968,12 @@ v_permlanex16_b32 v5, v1, s2, 0.5 v_permlanex16_b32 v5, v1, s2, -4.0 // GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0xdc,0x03] +v_permlanex16_b32 v5, v1, s2, 0xaf123456 +// GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf] + +v_permlanex16_b32 v5, v1, 0x12345678, 0x12345678 +// GFX10: encoding: [0x05,0x00,0x78,0xd7,0x01,0xff,0xfd,0x03,0x78,0x56,0x34,0x12] + v_permlanex16_b32 v5, v1, s2, s3 op_sel:[1,0] // GFX10: encoding: [0x05,0x08,0x78,0xd7,0x01,0x05,0x0c,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vinterp_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_vinterp_err.s new file mode 100644 index 0000000000000..415f7348c9ee6 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vinterp_err.s @@ -0,0 +1,42 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 %s 2>&1 | FileCheck %s -check-prefix=GFX11-ERR --implicit-check-not=error: --strict-whitespace + +//===----------------------------------------------------------------------===// +// VINTERP src operands must be VGPRs. +// Check that other operand kinds are rejected by assembler. +//===----------------------------------------------------------------------===// + +v_interp_p10_f32 v0, s1, v2, v3 +// GFX11-ERR: error: invalid operand for instruction + +v_interp_p10_f32 v0, v1, s2, v3 +// GFX11-ERR: error: invalid operand for instruction + +v_interp_p10_f32 v0, v1, v2, s3 +// GFX11-ERR: error: invalid operand for instruction + +v_interp_p2_f32 v0, 1, v2, v3 +// GFX11-ERR: error: invalid operand for instruction + +v_interp_p2_f32 v0, v1, 2, v3 +// GFX11-ERR: error: invalid operand for instruction + +v_interp_p2_f32 v0, v1, v2, 3 +// GFX11-ERR: error: invalid operand for instruction + +v_interp_p10_f16_f32 v0, s1, v2, v3 +// GFX11-ERR: error: invalid operand for instruction + +v_interp_p10_f16_f32 v0, v1, s2, v3 +// GFX11-ERR: error: invalid operand for instruction + +v_interp_p10_f16_f32 v0, v1, v2, s3 +// GFX11-ERR: error: invalid operand for instruction + +v_interp_p2_f16_f32 v0, 1, v2, v3 +// GFX11-ERR: error: invalid operand for instruction + +v_interp_p2_f16_f32 v0, v1, 2, v3 +// GFX11-ERR: error: invalid operand for instruction + +v_interp_p2_f16_f32 v0, v1, v2, 3 +// GFX11-ERR: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s index 3df2843deade6..991ef34807e85 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s @@ -48,113 +48,6 @@ v_add3_u32 v5, src_scc, vcc_lo, -1 v_add3_u32 v255, 0xaf123456, vcc_hi, null // GFX11: encoding: [0xff,0x00,0x55,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_add_co_ci_u32_e64 v5, s6, v1, 0xaf123456, s3 -// W32: encoding: [0x05,0x06,0x20,0xd5,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s6, v255, src_scc, s3 -// W32: encoding: [0x05,0x06,0x20,0xd5,0xff,0xfb,0x0d,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s6, s105, s105, s3 -// W32: encoding: [0x05,0x06,0x20,0xd5,0x69,0xd2,0x0c,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s6, vcc_lo, v2, s3 -// W32: encoding: [0x05,0x06,0x20,0xd5,0x6a,0x04,0x0e,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s6, vcc_hi, v255, s3 -// W32: encoding: [0x05,0x06,0x20,0xd5,0x6b,0xfe,0x0f,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s6, ttmp15, ttmp15, s3 -// W32: encoding: [0x05,0x06,0x20,0xd5,0x7b,0xf6,0x0c,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s6, m0, 0.5, s3 -// W32: encoding: [0x05,0x06,0x20,0xd5,0x7d,0xe0,0x0d,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s6, exec_lo, exec_lo, s3 -// W32: encoding: [0x05,0x06,0x20,0xd5,0x7e,0xfc,0x0c,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s6, exec_hi, -1, s3 -// W32: encoding: [0x05,0x06,0x20,0xd5,0x7f,0x82,0x0d,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s105, null, exec_hi, s105 -// W32: encoding: [0x05,0x69,0x20,0xd5,0x7c,0xfe,0xa4,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, vcc_lo, -1, m0, vcc_lo -// W32: encoding: [0x05,0x6a,0x20,0xd5,0xc1,0xfa,0xa8,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, vcc_hi, 0.5, vcc_lo, vcc_hi -// W32: encoding: [0x05,0x6b,0x20,0xd5,0xf0,0xd4,0xac,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, ttmp15, src_scc, null, ttmp15 -// W32: encoding: [0x05,0x7b,0x20,0xd5,0xfd,0xf8,0xec,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s[12:13], v1, 0xaf123456, s[6:7] -// W64: encoding: [0x05,0x0c,0x20,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s[12:13], v255, src_scc, s[6:7] -// W64: encoding: [0x05,0x0c,0x20,0xd5,0xff,0xfb,0x19,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s[12:13], s105, s105, s[6:7] -// W64: encoding: [0x05,0x0c,0x20,0xd5,0x69,0xd2,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s[12:13], vcc_lo, v2, s[6:7] -// W64: encoding: [0x05,0x0c,0x20,0xd5,0x6a,0x04,0x1a,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s[12:13], vcc_hi, v255, s[6:7] -// W64: encoding: [0x05,0x0c,0x20,0xd5,0x6b,0xfe,0x1b,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s[12:13], ttmp15, ttmp15, s[6:7] -// W64: encoding: [0x05,0x0c,0x20,0xd5,0x7b,0xf6,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s[12:13], m0, 0.5, s[6:7] -// W64: encoding: [0x05,0x0c,0x20,0xd5,0x7d,0xe0,0x19,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s[12:13], exec_lo, exec_lo, s[6:7] -// W64: encoding: [0x05,0x0c,0x20,0xd5,0x7e,0xfc,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s[12:13], exec_hi, -1, s[6:7] -// W64: encoding: [0x05,0x0c,0x20,0xd5,0x7f,0x82,0x19,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s[12:13], null, exec_hi, s[6:7] -// W64: encoding: [0x05,0x0c,0x20,0xd5,0x7c,0xfe,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, s[104:105], -1, m0, s[104:105] -// W64: encoding: [0x05,0x68,0x20,0xd5,0xc1,0xfa,0xa0,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, vcc, 0.5, vcc_lo, vcc -// W64: encoding: [0x05,0x6a,0x20,0xd5,0xf0,0xd4,0xa8,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v5, ttmp[14:15], src_scc, null, ttmp[14:15] -// W64: encoding: [0x05,0x7a,0x20,0xd5,0xfd,0xf8,0xe8,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64 v255, null, 0xaf123456, vcc_hi, null clamp -// GFX11: encoding: [0xff,0xfc,0x20,0xd5,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] - v_add_co_u32 v5, s6, v1, v2 // W32: encoding: [0x05,0x06,0x00,0xd7,0x01,0x05,0x02,0x00] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -270,96 +163,6 @@ v_add_co_u32 v5, ttmp[14:15], src_scc, vcc_lo v_add_co_u32 v255, null, 0xaf123456, vcc_hi clamp // GFX11: encoding: [0xff,0xfc,0x00,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_add_f16_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x32,0xd5,0x01,0x05,0x02,0x00] - -v_add_f16_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x32,0xd5,0xff,0xff,0x03,0x00] - -v_add_f16_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x32,0xd5,0x01,0x04,0x00,0x00] - -v_add_f16_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x32,0xd5,0x69,0xd2,0x00,0x00] - -v_add_f16_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x32,0xd5,0x6a,0xf6,0x00,0x00] - -v_add_f16_e64 v5, vcc_hi, 0xfe0b -// GFX11: encoding: [0x05,0x00,0x32,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] - -v_add_f16_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x32,0xd5,0x7b,0xfa,0x01,0x00] - -v_add_f16_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x32,0xd5,0x7d,0xe0,0x01,0x00] - -v_add_f16_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x32,0xd5,0x7e,0x82,0x01,0x00] - -v_add_f16_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x32,0xd5,0x7f,0xf8,0x00,0x00] - -v_add_f16_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x32,0xd5,0x7c,0xfc,0x00,0x00] - -v_add_f16_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x32,0xd5,0xc1,0xfe,0x00,0x00] - -v_add_f16_e64 v5, 0.5, -m0 mul:2 -// GFX11: encoding: [0x05,0x00,0x32,0xd5,0xf0,0xfa,0x00,0x48] - -v_add_f16_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX11: encoding: [0x05,0x02,0x32,0xd5,0xfd,0xd4,0x00,0x30] - -v_add_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 -// GFX11: encoding: [0xff,0x83,0x32,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] - -v_add_f32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x03,0xd5,0x01,0x05,0x02,0x00] - -v_add_f32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x03,0xd5,0xff,0xff,0x03,0x00] - -v_add_f32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x03,0xd5,0x01,0x04,0x00,0x00] - -v_add_f32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x03,0xd5,0x69,0xd2,0x00,0x00] - -v_add_f32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x03,0xd5,0x6a,0xf6,0x00,0x00] - -v_add_f32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x03,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_add_f32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x03,0xd5,0x7b,0xfa,0x01,0x00] - -v_add_f32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x03,0xd5,0x7d,0xe0,0x01,0x00] - -v_add_f32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x03,0xd5,0x7e,0x82,0x01,0x00] - -v_add_f32_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x03,0xd5,0x7f,0xf8,0x00,0x00] - -v_add_f32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x03,0xd5,0x7c,0xfc,0x00,0x00] - -v_add_f32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x03,0xd5,0xc1,0xfe,0x00,0x00] - -v_add_f32_e64 v5, 0.5, -m0 mul:2 -// GFX11: encoding: [0x05,0x00,0x03,0xd5,0xf0,0xfa,0x00,0x48] - -v_add_f32_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX11: encoding: [0x05,0x02,0x03,0xd5,0xfd,0xd4,0x00,0x30] - -v_add_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 -// GFX11: encoding: [0xff,0x83,0x03,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] - v_add_f64 v[5:6], v[1:2], v[2:3] // GFX11: encoding: [0x05,0x00,0x27,0xd7,0x01,0x05,0x02,0x00] @@ -576,51 +379,6 @@ v_add_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp // GFX11: encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_add_nc_u32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x01,0x05,0x02,0x00] - -v_add_nc_u32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x25,0xd5,0xff,0xff,0x03,0x00] - -v_add_nc_u32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x01,0x04,0x00,0x00] - -v_add_nc_u32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x69,0xd2,0x00,0x00] - -v_add_nc_u32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x6a,0xf6,0x00,0x00] - -v_add_nc_u32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_add_nc_u32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x7b,0xfa,0x01,0x00] - -v_add_nc_u32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x7d,0xe0,0x01,0x00] - -v_add_nc_u32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x7e,0x82,0x01,0x00] - -v_add_nc_u32_e64 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x7f,0xf8,0x00,0x00] - -v_add_nc_u32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x7c,0xfc,0x00,0x00] - -v_add_nc_u32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x25,0xd5,0xc1,0xfe,0x00,0x00] - -v_add_nc_u32_e64 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x25,0xd5,0xf0,0xfa,0x00,0x00] - -v_add_nc_u32_e64 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x25,0xd5,0xfd,0xd4,0x00,0x00] - -v_add_nc_u32_e64 v255, 0xaf123456, vcc_hi clamp -// GFX11: encoding: [0xff,0x80,0x25,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] - v_alignbit_b32 v5, v1, v2, s3 // GFX11: encoding: [0x05,0x00,0x16,0xd6,0x01,0x05,0x0e,0x00] @@ -756,51 +514,6 @@ v_and_b16 v5, src_scc, vcc_lo v_and_b16 v255, 0xfe0b, vcc_hi // GFX11: encoding: [0xff,0x00,0x62,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_and_b32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x01,0x05,0x02,0x00] - -v_and_b32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0xff,0xff,0x03,0x00] - -v_and_b32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x01,0x04,0x00,0x00] - -v_and_b32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x69,0xd2,0x00,0x00] - -v_and_b32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x6a,0xf6,0x00,0x00] - -v_and_b32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_and_b32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x7b,0xfa,0x01,0x00] - -v_and_b32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x7d,0xe0,0x01,0x00] - -v_and_b32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x7e,0x82,0x01,0x00] - -v_and_b32_e64 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x7f,0xf8,0x00,0x00] - -v_and_b32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x7c,0xfc,0x00,0x00] - -v_and_b32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0xc1,0xfe,0x00,0x00] - -v_and_b32_e64 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0xf0,0xfa,0x00,0x00] - -v_and_b32_e64 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0xfd,0xd4,0x00,0x00] - -v_and_b32_e64 v255, 0xaf123456, vcc_hi -// GFX11: encoding: [0xff,0x00,0x1b,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] - v_and_or_b32 v5, v1, v2, s3 // GFX11: encoding: [0x05,0x00,0x57,0xd6,0x01,0x05,0x0e,0x00] @@ -891,51 +604,6 @@ v_ashrrev_i16 v5, src_scc, vcc_lo v_ashrrev_i16 v255, 0xfe0b, vcc_hi // GFX11: encoding: [0xff,0x00,0x3a,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_ashrrev_i32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x01,0x05,0x02,0x00] - -v_ashrrev_i32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0xff,0xff,0x03,0x00] - -v_ashrrev_i32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x01,0x04,0x00,0x00] - -v_ashrrev_i32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x69,0xd2,0x00,0x00] - -v_ashrrev_i32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x6a,0xf6,0x00,0x00] - -v_ashrrev_i32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_ashrrev_i32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x7b,0xfa,0x01,0x00] - -v_ashrrev_i32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x7d,0xe0,0x01,0x00] - -v_ashrrev_i32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x7e,0x82,0x01,0x00] - -v_ashrrev_i32_e64 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x7f,0xf8,0x00,0x00] - -v_ashrrev_i32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x7c,0xfc,0x00,0x00] - -v_ashrrev_i32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0xc1,0xfe,0x00,0x00] - -v_ashrrev_i32_e64 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0xf0,0xfa,0x00,0x00] - -v_ashrrev_i32_e64 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0xfd,0xd4,0x00,0x00] - -v_ashrrev_i32_e64 v255, 0xaf123456, vcc_hi -// GFX11: encoding: [0xff,0x00,0x1a,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] - v_ashrrev_i64 v[5:6], v1, vcc // GFX11: encoding: [0x05,0x00,0x3e,0xd7,0x01,0xd5,0x00,0x00] @@ -1188,267 +856,6 @@ v_bfm_b32 v5, src_scc, vcc_lo v_bfm_b32 v255, 0xaf123456, vcc_hi // GFX11: encoding: [0xff,0x00,0x1d,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_bfrev_b32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x01,0x01,0x00,0x00] - -v_bfrev_b32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0xff,0x01,0x00,0x00] - -v_bfrev_b32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x01,0x00,0x00,0x00] - -v_bfrev_b32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x69,0x00,0x00,0x00] - -v_bfrev_b32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x6a,0x00,0x00,0x00] - -v_bfrev_b32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x6b,0x00,0x00,0x00] - -v_bfrev_b32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x7b,0x00,0x00,0x00] - -v_bfrev_b32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x7d,0x00,0x00,0x00] - -v_bfrev_b32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x7e,0x00,0x00,0x00] - -v_bfrev_b32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x7f,0x00,0x00,0x00] - -v_bfrev_b32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x7c,0x00,0x00,0x00] - -v_bfrev_b32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0xc1,0x00,0x00,0x00] - -v_bfrev_b32_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0xf0,0x00,0x00,0x00] - -v_bfrev_b32_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0xfd,0x00,0x00,0x00] - -v_bfrev_b32_e64 v255, 0xaf123456 -// GFX11: encoding: [0xff,0x00,0xb8,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] - -v_ceil_f16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x01,0x01,0x00,0x00] - -v_ceil_f16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0xff,0x01,0x00,0x00] - -v_ceil_f16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x01,0x00,0x00,0x00] - -v_ceil_f16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x69,0x00,0x00,0x00] - -v_ceil_f16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x6a,0x00,0x00,0x00] - -v_ceil_f16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x6b,0x00,0x00,0x00] - -v_ceil_f16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x7b,0x00,0x00,0x00] - -v_ceil_f16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x7d,0x00,0x00,0x00] - -v_ceil_f16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x7e,0x00,0x00,0x00] - -v_ceil_f16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x7f,0x00,0x00,0x00] - -v_ceil_f16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x7c,0x00,0x00,0x00] - -v_ceil_f16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0xc1,0x00,0x00,0x00] - -v_ceil_f16_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0xf0,0x00,0x00,0x08] - -v_ceil_f16_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0xfd,0x00,0x00,0x10] - -v_ceil_f16_e64 v255, -|0xfe0b| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xdc,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] - -v_ceil_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x01,0x01,0x00,0x00] - -v_ceil_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0xff,0x01,0x00,0x00] - -v_ceil_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x01,0x00,0x00,0x00] - -v_ceil_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x69,0x00,0x00,0x00] - -v_ceil_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x6a,0x00,0x00,0x00] - -v_ceil_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x6b,0x00,0x00,0x00] - -v_ceil_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x7b,0x00,0x00,0x00] - -v_ceil_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x7d,0x00,0x00,0x00] - -v_ceil_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x7e,0x00,0x00,0x00] - -v_ceil_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x7f,0x00,0x00,0x00] - -v_ceil_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x7c,0x00,0x00,0x00] - -v_ceil_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0xc1,0x00,0x00,0x00] - -v_ceil_f32_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0xf0,0x00,0x00,0x08] - -v_ceil_f32_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0xfd,0x00,0x00,0x10] - -v_ceil_f32_e64 v255, -|0xaf123456| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xa2,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] - -v_ceil_f64_e64 v[5:6], v[1:2] -// GFX11: encoding: [0x05,0x00,0x98,0xd5,0x01,0x01,0x00,0x00] - -v_ceil_f64_e64 v[5:6], v[254:255] -// GFX11: encoding: [0x05,0x00,0x98,0xd5,0xfe,0x01,0x00,0x00] - -v_ceil_f64_e64 v[5:6], s[2:3] -// GFX11: encoding: [0x05,0x00,0x98,0xd5,0x02,0x00,0x00,0x00] - -v_ceil_f64_e64 v[5:6], s[104:105] -// GFX11: encoding: [0x05,0x00,0x98,0xd5,0x68,0x00,0x00,0x00] - -v_ceil_f64_e64 v[5:6], vcc -// GFX11: encoding: [0x05,0x00,0x98,0xd5,0x6a,0x00,0x00,0x00] - -v_ceil_f64_e64 v[5:6], ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0x98,0xd5,0x7a,0x00,0x00,0x00] - -v_ceil_f64_e64 v[5:6], exec -// GFX11: encoding: [0x05,0x00,0x98,0xd5,0x7e,0x00,0x00,0x00] - -v_ceil_f64_e64 v[5:6], null -// GFX11: encoding: [0x05,0x00,0x98,0xd5,0x7c,0x00,0x00,0x00] - -v_ceil_f64_e64 v[5:6], -1 -// GFX11: encoding: [0x05,0x00,0x98,0xd5,0xc1,0x00,0x00,0x00] - -v_ceil_f64_e64 v[5:6], 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x98,0xd5,0xf0,0x00,0x00,0x08] - -v_ceil_f64_e64 v[5:6], -|src_scc| mul:4 -// GFX11: encoding: [0x05,0x01,0x98,0xd5,0xfd,0x00,0x00,0x30] - -v_ceil_f64_e64 v[254:255], 0xaf123456 clamp div:2 -// GFX11: encoding: [0xfe,0x80,0x98,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] - -v_cls_i32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x01,0x01,0x00,0x00] - -v_cls_i32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0xff,0x01,0x00,0x00] - -v_cls_i32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x01,0x00,0x00,0x00] - -v_cls_i32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x69,0x00,0x00,0x00] - -v_cls_i32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x6a,0x00,0x00,0x00] - -v_cls_i32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x6b,0x00,0x00,0x00] - -v_cls_i32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7b,0x00,0x00,0x00] - -v_cls_i32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7d,0x00,0x00,0x00] - -v_cls_i32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7e,0x00,0x00,0x00] - -v_cls_i32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7f,0x00,0x00,0x00] - -v_cls_i32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7c,0x00,0x00,0x00] - -v_cls_i32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0xc1,0x00,0x00,0x00] - -v_cls_i32_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0xf0,0x00,0x00,0x00] - -v_cls_i32_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0xfd,0x00,0x00,0x00] - -v_cls_i32_e64 v255, 0xaf123456 -// GFX11: encoding: [0xff,0x00,0xbb,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] - -v_clz_i32_u32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x01,0x01,0x00,0x00] - -v_clz_i32_u32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0xff,0x01,0x00,0x00] - -v_clz_i32_u32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x01,0x00,0x00,0x00] - -v_clz_i32_u32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x69,0x00,0x00,0x00] - -v_clz_i32_u32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x6a,0x00,0x00,0x00] - -v_clz_i32_u32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x6b,0x00,0x00,0x00] - -v_clz_i32_u32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7b,0x00,0x00,0x00] - -v_clz_i32_u32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7d,0x00,0x00,0x00] - -v_clz_i32_u32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7e,0x00,0x00,0x00] - -v_clz_i32_u32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7f,0x00,0x00,0x00] - -v_clz_i32_u32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7c,0x00,0x00,0x00] - -v_clz_i32_u32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0xc1,0x00,0x00,0x00] - -v_clz_i32_u32_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0xf0,0x00,0x00,0x00] - -v_clz_i32_u32_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0xfd,0x00,0x00,0x00] - -v_clz_i32_u32_e64 v255, 0xaf123456 -// GFX11: encoding: [0xff,0x00,0xb9,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] - v_cndmask_b16 v5, v1, src_scc, s3 // W32: encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x0d,0x00] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1548,286 +955,44 @@ v_cndmask_b16 v5, -|src_scc|, null, ttmp[14:15] v_cndmask_b16 v255, -|0xfe0b|, -|vcc_hi|, null // GFX11: encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] -v_cndmask_b32_e64 v5, v1, 0xaf123456, s3 -// W32: encoding: [0x05,0x00,0x01,0xd5,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v5, v255, src_scc, s3 -// W32: encoding: [0x05,0x00,0x01,0xd5,0xff,0xfb,0x0d,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cubeid_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x00] -v_cndmask_b32_e64 v5, s105, s105, s3 -// W32: encoding: [0x05,0x00,0x01,0xd5,0x69,0xd2,0x0c,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cubeid_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x0c,0xd6,0xff,0x05,0xa4,0x01] -v_cndmask_b32_e64 v5, vcc_lo, v2, s3 -// W32: encoding: [0x05,0x00,0x01,0xd5,0x6a,0x04,0x0e,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cubeid_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x0c,0xd6,0x01,0xfe,0xff,0x01] -v_cndmask_b32_e64 v5, vcc_hi, v255, s3 -// W32: encoding: [0x05,0x00,0x01,0xd5,0x6b,0xfe,0x0f,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cubeid_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x0c,0xd6,0x69,0xd2,0xf8,0x01] -v_cndmask_b32_e64 v5, ttmp15, ttmp15, s3 -// W32: encoding: [0x05,0x00,0x01,0xd5,0x7b,0xf6,0x0c,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cubeid_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x0c,0xd6,0x6a,0xf6,0x0c,0x04] -v_cndmask_b32_e64 v5, m0, 0.5, s3 -// W32: encoding: [0x05,0x00,0x01,0xd5,0x7d,0xe0,0x0d,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cubeid_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x0c,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -v_cndmask_b32_e64 v5, exec_lo, exec_lo, s3 -// W32: encoding: [0x05,0x00,0x01,0xd5,0x7e,0xfc,0x0c,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cubeid_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x0c,0xd6,0x7b,0xfa,0xed,0xe1] -v_cndmask_b32_e64 v5, exec_hi, -1, s3 -// W32: encoding: [0x05,0x00,0x01,0xd5,0x7f,0x82,0x0d,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cubeid_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x0c,0xd6,0x7d,0xe0,0xf5,0x01] -v_cndmask_b32_e64 v5, null, exec_hi, s105 -// W32: encoding: [0x05,0x00,0x01,0xd5,0x7c,0xfe,0xa4,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cubeid_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x0c,0xd6,0x7e,0x82,0xad,0x01] -v_cndmask_b32_e64 v5, -1, m0, vcc_lo -// W32: encoding: [0x05,0x00,0x01,0xd5,0xc1,0xfa,0xa8,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cubeid_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x0c,0xd6,0x7f,0xf8,0xa8,0xa1] -v_cndmask_b32_e64 v5, 0.5, -|vcc_lo|, vcc_hi -// W32: encoding: [0x05,0x02,0x01,0xd5,0xf0,0xd4,0xac,0x41] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cubeid_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x0c,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] -v_cndmask_b32_e64 v5, -|src_scc|, null, ttmp15 -// W32: encoding: [0x05,0x01,0x01,0xd5,0xfd,0xf8,0xec,0x21] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cubeid_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x0c,0xd6,0xc1,0xfe,0xf4,0xc3] -v_cndmask_b32_e64 v5, v1, 0xaf123456, s[6:7] -// W64: encoding: [0x05,0x00,0x01,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v5, v255, src_scc, s[6:7] -// W64: encoding: [0x05,0x00,0x01,0xd5,0xff,0xfb,0x19,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v5, s105, s105, s[6:7] -// W64: encoding: [0x05,0x00,0x01,0xd5,0x69,0xd2,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v5, vcc_lo, v2, s[6:7] -// W64: encoding: [0x05,0x00,0x01,0xd5,0x6a,0x04,0x1a,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v5, vcc_hi, v255, s[6:7] -// W64: encoding: [0x05,0x00,0x01,0xd5,0x6b,0xfe,0x1b,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v5, ttmp15, ttmp15, s[6:7] -// W64: encoding: [0x05,0x00,0x01,0xd5,0x7b,0xf6,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v5, m0, 0.5, s[6:7] -// W64: encoding: [0x05,0x00,0x01,0xd5,0x7d,0xe0,0x19,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v5, exec_lo, exec_lo, s[6:7] -// W64: encoding: [0x05,0x00,0x01,0xd5,0x7e,0xfc,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v5, exec_hi, -1, s[6:7] -// W64: encoding: [0x05,0x00,0x01,0xd5,0x7f,0x82,0x19,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v5, null, exec_hi, s[6:7] -// W64: encoding: [0x05,0x00,0x01,0xd5,0x7c,0xfe,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v5, -1, m0, s[104:105] -// W64: encoding: [0x05,0x00,0x01,0xd5,0xc1,0xfa,0xa0,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v5, 0.5, -|vcc_lo|, vcc -// W64: encoding: [0x05,0x02,0x01,0xd5,0xf0,0xd4,0xa8,0x41] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v5, -|src_scc|, null, ttmp[14:15] -// W64: encoding: [0x05,0x01,0x01,0xd5,0xfd,0xf8,0xe8,0x21] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64 v255, -|0xaf123456|, -|vcc_hi|, null -// GFX11: encoding: [0xff,0x03,0x01,0xd5,0xff,0xd6,0xf0,0x61,0x56,0x34,0x12,0xaf] - -v_cos_f16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x01,0x01,0x00,0x00] - -v_cos_f16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0xff,0x01,0x00,0x00] - -v_cos_f16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x01,0x00,0x00,0x00] - -v_cos_f16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x69,0x00,0x00,0x00] - -v_cos_f16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x6a,0x00,0x00,0x00] - -v_cos_f16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x6b,0x00,0x00,0x00] - -v_cos_f16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x7b,0x00,0x00,0x00] - -v_cos_f16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x7d,0x00,0x00,0x00] - -v_cos_f16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x7e,0x00,0x00,0x00] - -v_cos_f16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x7f,0x00,0x00,0x00] - -v_cos_f16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x7c,0x00,0x00,0x00] - -v_cos_f16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0xc1,0x00,0x00,0x00] - -v_cos_f16_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0xf0,0x00,0x00,0x08] - -v_cos_f16_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0xfd,0x00,0x00,0x10] - -v_cos_f16_e64 v255, -|0xfe0b| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xe1,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] - -v_cos_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x01,0x01,0x00,0x00] - -v_cos_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0xff,0x01,0x00,0x00] - -v_cos_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x01,0x00,0x00,0x00] - -v_cos_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x69,0x00,0x00,0x00] - -v_cos_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x6a,0x00,0x00,0x00] - -v_cos_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x6b,0x00,0x00,0x00] - -v_cos_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x7b,0x00,0x00,0x00] - -v_cos_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x7d,0x00,0x00,0x00] - -v_cos_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x7e,0x00,0x00,0x00] - -v_cos_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x7f,0x00,0x00,0x00] - -v_cos_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x7c,0x00,0x00,0x00] - -v_cos_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0xc1,0x00,0x00,0x00] - -v_cos_f32_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0xf0,0x00,0x00,0x08] - -v_cos_f32_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0xfd,0x00,0x00,0x10] - -v_cos_f32_e64 v255, -|0xaf123456| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xb6,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] - -v_ctz_i32_b32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x01,0x01,0x00,0x00] - -v_ctz_i32_b32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0xff,0x01,0x00,0x00] - -v_ctz_i32_b32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x01,0x00,0x00,0x00] - -v_ctz_i32_b32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x69,0x00,0x00,0x00] - -v_ctz_i32_b32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x6a,0x00,0x00,0x00] - -v_ctz_i32_b32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x6b,0x00,0x00,0x00] - -v_ctz_i32_b32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7b,0x00,0x00,0x00] - -v_ctz_i32_b32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7d,0x00,0x00,0x00] - -v_ctz_i32_b32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7e,0x00,0x00,0x00] - -v_ctz_i32_b32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7f,0x00,0x00,0x00] - -v_ctz_i32_b32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7c,0x00,0x00,0x00] - -v_ctz_i32_b32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0xc1,0x00,0x00,0x00] - -v_ctz_i32_b32_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0xf0,0x00,0x00,0x00] - -v_ctz_i32_b32_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0xfd,0x00,0x00,0x00] - -v_ctz_i32_b32_e64 v255, 0xaf123456 -// GFX11: encoding: [0xff,0x00,0xba,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] - -v_cubeid_f32 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x00] - -v_cubeid_f32 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x0c,0xd6,0xff,0x05,0xa4,0x01] - -v_cubeid_f32 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x0c,0xd6,0x01,0xfe,0xff,0x01] - -v_cubeid_f32 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x0c,0xd6,0x69,0xd2,0xf8,0x01] - -v_cubeid_f32 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x0c,0xd6,0x6a,0xf6,0x0c,0x04] - -v_cubeid_f32 v5, vcc_hi, 0xaf123456, v255 -// GFX11: encoding: [0x05,0x00,0x0c,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] - -v_cubeid_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| -// GFX11: encoding: [0x05,0x07,0x0c,0xd6,0x7b,0xfa,0xed,0xe1] - -v_cubeid_f32 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x0c,0xd6,0x7d,0xe0,0xf5,0x01] - -v_cubeid_f32 v5, |exec_lo|, -1, vcc_hi -// GFX11: encoding: [0x05,0x01,0x0c,0xd6,0x7e,0x82,0xad,0x01] - -v_cubeid_f32 v5, -|exec_hi|, null, -|vcc_lo| -// GFX11: encoding: [0x05,0x05,0x0c,0xd6,0x7f,0xf8,0xa8,0xa1] - -v_cubeid_f32 v5, null, exec_lo, -|0xaf123456| -// GFX11: encoding: [0x05,0x04,0x0c,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] - -v_cubeid_f32 v5, -1, -|exec_hi|, -|src_scc| -// GFX11: encoding: [0x05,0x06,0x0c,0xd6,0xc1,0xfe,0xf4,0xc3] - -v_cubeid_f32 v5, 0.5, -m0, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x0c,0xd6,0xf0,0xfa,0xc0,0x4b] +v_cubeid_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x0c,0xd6,0xf0,0xfa,0xc0,0x4b] v_cubeid_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 // GFX11: encoding: [0x05,0x02,0x0c,0xd6,0xfd,0xd4,0x04,0x33] @@ -1970,3566 +1135,1244 @@ v_cubetc_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 v_cubetc_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 // GFX11: encoding: [0xff,0x83,0x0e,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -v_cvt_f16_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x01,0x01,0x00,0x00] - -v_cvt_f16_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0xff,0x01,0x00,0x00] +v_cvt_pk_i16_f32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x01,0x05,0x02,0x00] -v_cvt_f16_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x01,0x00,0x00,0x00] +v_cvt_pk_i16_f32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0xff,0xff,0x03,0x00] -v_cvt_f16_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x69,0x00,0x00,0x00] +v_cvt_pk_i16_f32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x01,0x04,0x00,0x00] -v_cvt_f16_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x6a,0x00,0x00,0x00] +v_cvt_pk_i16_f32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x69,0xd2,0x00,0x00] -v_cvt_f16_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x6b,0x00,0x00,0x00] +v_cvt_pk_i16_f32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x6a,0xf6,0x00,0x00] -v_cvt_f16_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x7b,0x00,0x00,0x00] +v_cvt_pk_i16_f32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -v_cvt_f16_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x7d,0x00,0x00,0x00] +v_cvt_pk_i16_f32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x7b,0xfa,0x01,0x00] -v_cvt_f16_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x7e,0x00,0x00,0x00] +v_cvt_pk_i16_f32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x7d,0xe0,0x01,0x00] -v_cvt_f16_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x7f,0x00,0x00,0x00] +v_cvt_pk_i16_f32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x7e,0x82,0x01,0x00] -v_cvt_f16_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x7c,0x00,0x00,0x00] +v_cvt_pk_i16_f32 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x06,0xd7,0x7f,0xf8,0x00,0x00] -v_cvt_f16_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0xc1,0x00,0x00,0x00] +v_cvt_pk_i16_f32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x7c,0xfc,0x00,0x00] -v_cvt_f16_f32_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0xf0,0x00,0x00,0x08] +v_cvt_pk_i16_f32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0xc1,0xfe,0x00,0x00] -v_cvt_f16_f32_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0xfd,0x00,0x00,0x10] +v_cvt_pk_i16_f32 v5, 0.5, -m0 +// GFX11: encoding: [0x05,0x00,0x06,0xd7,0xf0,0xfa,0x00,0x40] -v_cvt_f16_f32_e64 v255, -|0xaf123456| clamp div:2 -// GFX11: encoding: [0xff,0x81,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +v_cvt_pk_i16_f32 v5, -src_scc, |vcc_lo| +// GFX11: encoding: [0x05,0x02,0x06,0xd7,0xfd,0xd4,0x00,0x20] -v_cvt_f16_i16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x01,0x01,0x00,0x00] +v_cvt_pk_i16_f32 v255, -|0xaf123456|, -|vcc_hi| +// GFX11: encoding: [0xff,0x03,0x06,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] -v_cvt_f16_i16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0xff,0x01,0x00,0x00] +v_cvt_pk_i16_i32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x01,0x05,0x02,0x00] -v_cvt_f16_i16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x01,0x00,0x00,0x00] +v_cvt_pk_i16_i32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0xff,0xff,0x03,0x00] -v_cvt_f16_i16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x69,0x00,0x00,0x00] +v_cvt_pk_i16_i32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x01,0x04,0x00,0x00] -v_cvt_f16_i16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x6a,0x00,0x00,0x00] +v_cvt_pk_i16_i32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x69,0xd2,0x00,0x00] -v_cvt_f16_i16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x6b,0x00,0x00,0x00] +v_cvt_pk_i16_i32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x6a,0xf6,0x00,0x00] -v_cvt_f16_i16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x7b,0x00,0x00,0x00] +v_cvt_pk_i16_i32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -v_cvt_f16_i16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x7d,0x00,0x00,0x00] +v_cvt_pk_i16_i32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x7b,0xfa,0x01,0x00] -v_cvt_f16_i16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x7e,0x00,0x00,0x00] +v_cvt_pk_i16_i32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x7d,0xe0,0x01,0x00] -v_cvt_f16_i16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x7f,0x00,0x00,0x00] +v_cvt_pk_i16_i32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x7e,0x82,0x01,0x00] -v_cvt_f16_i16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x7c,0x00,0x00,0x00] +v_cvt_pk_i16_i32 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x7f,0xf8,0x00,0x00] -v_cvt_f16_i16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0xc1,0x00,0x00,0x00] +v_cvt_pk_i16_i32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x7c,0xfc,0x00,0x00] -v_cvt_f16_i16_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0xff,0x00,0x00,0x08,0x00,0x38,0x00,0x00] +v_cvt_pk_i16_i32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0xc1,0xfe,0x00,0x00] -v_cvt_f16_i16_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0xfd,0x00,0x00,0x10] +v_cvt_pk_i16_i32 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0xf0,0xfa,0x00,0x00] -v_cvt_f16_i16_e64 v255, 0xfe0b clamp div:2 -// GFX11: encoding: [0xff,0x80,0xd1,0xd5,0xff,0x00,0x00,0x18,0x0b,0xfe,0x00,0x00] +v_cvt_pk_i16_i32 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x24,0xd7,0xfd,0xd4,0x00,0x00] -v_cvt_f16_u16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x01,0x01,0x00,0x00] +v_cvt_pk_i16_i32 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x24,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_cvt_f16_u16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0xff,0x01,0x00,0x00] +v_cvt_pk_norm_i16_f16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] -v_cvt_f16_u16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x01,0x00,0x00,0x00] +v_cvt_pk_norm_i16_f16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] -v_cvt_f16_u16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x69,0x00,0x00,0x00] +v_cvt_pk_norm_i16_f16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00] -v_cvt_f16_u16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x6a,0x00,0x00,0x00] +v_cvt_pk_norm_i16_f16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x69,0xd2,0x00,0x00] -v_cvt_f16_u16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x6b,0x00,0x00,0x00] +v_cvt_pk_norm_i16_f16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x6a,0xf6,0x00,0x00] -v_cvt_f16_u16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x7b,0x00,0x00,0x00] +v_cvt_pk_norm_i16_f16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -v_cvt_f16_u16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x7d,0x00,0x00,0x00] +v_cvt_pk_norm_i16_f16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7b,0xfa,0x01,0x00] -v_cvt_f16_u16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x7e,0x00,0x00,0x00] +v_cvt_pk_norm_i16_f16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7d,0xe0,0x01,0x00] -v_cvt_f16_u16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x7f,0x00,0x00,0x00] +v_cvt_pk_norm_i16_f16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7e,0x82,0x01,0x00] -v_cvt_f16_u16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x7c,0x00,0x00,0x00] +v_cvt_pk_norm_i16_f16 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x12,0xd7,0x7f,0xf8,0x00,0x00] -v_cvt_f16_u16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0xc1,0x00,0x00,0x00] +v_cvt_pk_norm_i16_f16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7c,0xfc,0x00,0x00] -v_cvt_f16_u16_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0xff,0x00,0x00,0x08,0x00,0x38,0x00,0x00] +v_cvt_pk_norm_i16_f16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0xc1,0xfe,0x00,0x00] -v_cvt_f16_u16_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0xfd,0x00,0x00,0x10] +v_cvt_pk_norm_i16_f16 v5, 0.5, -m0 op_sel:[0,0,0] +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0xf0,0xfa,0x00,0x40] -v_cvt_f16_u16_e64 v255, 0xfe0b clamp div:2 -// GFX11: encoding: [0xff,0x80,0xd0,0xd5,0xff,0x00,0x00,0x18,0x0b,0xfe,0x00,0x00] +v_cvt_pk_norm_i16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] +// GFX11: encoding: [0x05,0x0a,0x12,0xd7,0xfd,0xd4,0x00,0x20] -v_cvt_f32_f16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x01,0x01,0x00,0x00] +v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] +// GFX11: encoding: [0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] -v_cvt_f32_f16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0xff,0x01,0x00,0x00] +v_cvt_pk_norm_u16_f16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] -v_cvt_f32_f16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x01,0x00,0x00,0x00] +v_cvt_pk_norm_u16_f16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] -v_cvt_f32_f16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x69,0x00,0x00,0x00] +v_cvt_pk_norm_u16_f16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00] -v_cvt_f32_f16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x6a,0x00,0x00,0x00] +v_cvt_pk_norm_u16_f16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x69,0xd2,0x00,0x00] -v_cvt_f32_f16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x6b,0x00,0x00,0x00] +v_cvt_pk_norm_u16_f16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x6a,0xf6,0x00,0x00] -v_cvt_f32_f16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x7b,0x00,0x00,0x00] +v_cvt_pk_norm_u16_f16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -v_cvt_f32_f16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x7d,0x00,0x00,0x00] +v_cvt_pk_norm_u16_f16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7b,0xfa,0x01,0x00] -v_cvt_f32_f16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x7e,0x00,0x00,0x00] +v_cvt_pk_norm_u16_f16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7d,0xe0,0x01,0x00] -v_cvt_f32_f16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x7f,0x00,0x00,0x00] +v_cvt_pk_norm_u16_f16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7e,0x82,0x01,0x00] -v_cvt_f32_f16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x7c,0x00,0x00,0x00] +v_cvt_pk_norm_u16_f16 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x13,0xd7,0x7f,0xf8,0x00,0x00] -v_cvt_f32_f16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0xc1,0x00,0x00,0x00] +v_cvt_pk_norm_u16_f16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7c,0xfc,0x00,0x00] -v_cvt_f32_f16_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0xf0,0x00,0x00,0x08] +v_cvt_pk_norm_u16_f16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0xc1,0xfe,0x00,0x00] -v_cvt_f32_f16_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0xfd,0x00,0x00,0x10] +v_cvt_pk_norm_u16_f16 v5, 0.5, -m0 op_sel:[0,0,0] +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0xf0,0xfa,0x00,0x40] -v_cvt_f32_f16_e64 v255, -|0xfe0b| clamp div:2 -// GFX11: encoding: [0xff,0x81,0x8b,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] +v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] +// GFX11: encoding: [0x05,0x0a,0x13,0xd7,0xfd,0xd4,0x00,0x20] -v_cvt_f32_f64_e64 v5, v[1:2] -// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0x01,0x01,0x00,0x00] +v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] +// GFX11: encoding: [0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] -v_cvt_f32_f64_e64 v5, v[254:255] -// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0xfe,0x01,0x00,0x00] +v_cvt_pk_u16_f32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00] -v_cvt_f32_f64_e64 v5, s[2:3] -// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0x02,0x00,0x00,0x00] +v_cvt_pk_u16_f32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0xff,0xff,0x03,0x00] -v_cvt_f32_f64_e64 v5, s[104:105] -// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0x68,0x00,0x00,0x00] +v_cvt_pk_u16_f32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x01,0x04,0x00,0x00] -v_cvt_f32_f64_e64 v5, vcc -// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0x6a,0x00,0x00,0x00] +v_cvt_pk_u16_f32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x69,0xd2,0x00,0x00] -v_cvt_f32_f64_e64 v5, ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0x7a,0x00,0x00,0x00] +v_cvt_pk_u16_f32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x6a,0xf6,0x00,0x00] -v_cvt_f32_f64_e64 v5, exec -// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0x7e,0x00,0x00,0x00] +v_cvt_pk_u16_f32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -v_cvt_f32_f64_e64 v5, null -// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0x7c,0x00,0x00,0x00] +v_cvt_pk_u16_f32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x7b,0xfa,0x01,0x00] -v_cvt_f32_f64_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0xc1,0x00,0x00,0x00] +v_cvt_pk_u16_f32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x7d,0xe0,0x01,0x00] -v_cvt_f32_f64_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0xf0,0x00,0x00,0x08] +v_cvt_pk_u16_f32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x7e,0x82,0x01,0x00] -v_cvt_f32_f64_e64 v5, -|src_scc| mul:4 -// GFX11: encoding: [0x05,0x01,0x8f,0xd5,0xfd,0x00,0x00,0x30] +v_cvt_pk_u16_f32 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x07,0xd7,0x7f,0xf8,0x00,0x00] -v_cvt_f32_f64_e64 v255, 0xaf123456 clamp div:2 -// GFX11: encoding: [0xff,0x80,0x8f,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +v_cvt_pk_u16_f32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x7c,0xfc,0x00,0x00] -v_cvt_f32_i32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x01,0x01,0x00,0x00] +v_cvt_pk_u16_f32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0xc1,0xfe,0x00,0x00] -v_cvt_f32_i32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0x85,0xd5,0xff,0x01,0x00,0x00] +v_cvt_pk_u16_f32 v5, 0.5, -m0 +// GFX11: encoding: [0x05,0x00,0x07,0xd7,0xf0,0xfa,0x00,0x40] -v_cvt_f32_i32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x01,0x00,0x00,0x00] +v_cvt_pk_u16_f32 v5, -src_scc, |vcc_lo| +// GFX11: encoding: [0x05,0x02,0x07,0xd7,0xfd,0xd4,0x00,0x20] -v_cvt_f32_i32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x69,0x00,0x00,0x00] +v_cvt_pk_u16_f32 v255, -|0xaf123456|, -|vcc_hi| +// GFX11: encoding: [0xff,0x03,0x07,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] -v_cvt_f32_i32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x6a,0x00,0x00,0x00] +v_cvt_pk_u16_u32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x01,0x05,0x02,0x00] -v_cvt_f32_i32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x6b,0x00,0x00,0x00] +v_cvt_pk_u16_u32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0xff,0xff,0x03,0x00] -v_cvt_f32_i32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x7b,0x00,0x00,0x00] +v_cvt_pk_u16_u32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x01,0x04,0x00,0x00] -v_cvt_f32_i32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x7d,0x00,0x00,0x00] +v_cvt_pk_u16_u32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x69,0xd2,0x00,0x00] -v_cvt_f32_i32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x7e,0x00,0x00,0x00] +v_cvt_pk_u16_u32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x6a,0xf6,0x00,0x00] -v_cvt_f32_i32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x7f,0x00,0x00,0x00] +v_cvt_pk_u16_u32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -v_cvt_f32_i32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x7c,0x00,0x00,0x00] +v_cvt_pk_u16_u32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x7b,0xfa,0x01,0x00] -v_cvt_f32_i32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0x85,0xd5,0xc1,0x00,0x00,0x00] +v_cvt_pk_u16_u32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x7d,0xe0,0x01,0x00] -v_cvt_f32_i32_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x85,0xd5,0xf0,0x00,0x00,0x08] +v_cvt_pk_u16_u32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x7e,0x82,0x01,0x00] -v_cvt_f32_i32_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0x85,0xd5,0xfd,0x00,0x00,0x10] +v_cvt_pk_u16_u32 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x7f,0xf8,0x00,0x00] -v_cvt_f32_i32_e64 v255, 0xaf123456 clamp div:2 -// GFX11: encoding: [0xff,0x80,0x85,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +v_cvt_pk_u16_u32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x7c,0xfc,0x00,0x00] -v_cvt_f32_u32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x01,0x01,0x00,0x00] +v_cvt_pk_u16_u32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0xc1,0xfe,0x00,0x00] -v_cvt_f32_u32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0x86,0xd5,0xff,0x01,0x00,0x00] +v_cvt_pk_u16_u32 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0xf0,0xfa,0x00,0x00] -v_cvt_f32_u32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x01,0x00,0x00,0x00] +v_cvt_pk_u16_u32 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x23,0xd7,0xfd,0xd4,0x00,0x00] -v_cvt_f32_u32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x69,0x00,0x00,0x00] +v_cvt_pk_u16_u32 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x23,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_cvt_f32_u32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x6a,0x00,0x00,0x00] +v_cvt_pk_u8_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x01,0x05,0x0e,0x00] -v_cvt_f32_u32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x6b,0x00,0x00,0x00] +v_cvt_pk_u8_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0xff,0x05,0xa4,0x01] -v_cvt_f32_u32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x7b,0x00,0x00,0x00] +v_cvt_pk_u8_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x01,0xfe,0xff,0x01] -v_cvt_f32_u32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x7d,0x00,0x00,0x00] +v_cvt_pk_u8_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x69,0xd2,0xf8,0x01] -v_cvt_f32_u32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x7e,0x00,0x00,0x00] +v_cvt_pk_u8_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x6a,0xf6,0x0c,0x04] -v_cvt_f32_u32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x7f,0x00,0x00,0x00] +v_cvt_pk_u8_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -v_cvt_f32_u32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x7c,0x00,0x00,0x00] +v_cvt_pk_u8_f32 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x7b,0xfa,0xed,0x01] -v_cvt_f32_u32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0x86,0xd5,0xc1,0x00,0x00,0x00] +v_cvt_pk_u8_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x7d,0xe0,0xf5,0x01] -v_cvt_f32_u32_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x86,0xd5,0xf0,0x00,0x00,0x08] +v_cvt_pk_u8_f32 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x7e,0x82,0xad,0x01] -v_cvt_f32_u32_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0x86,0xd5,0xfd,0x00,0x00,0x10] +v_cvt_pk_u8_f32 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x7f,0xf8,0xa8,0x01] -v_cvt_f32_u32_e64 v255, 0xaf123456 clamp div:2 -// GFX11: encoding: [0xff,0x80,0x86,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +v_cvt_pk_u8_f32 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -v_cvt_f32_ubyte0_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x01,0x01,0x00,0x00] +v_cvt_pk_u8_f32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0xc1,0xfe,0xf4,0x03] -v_cvt_f32_ubyte0_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0x91,0xd5,0xff,0x01,0x00,0x00] +v_cvt_pk_u8_f32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0xf0,0xfa,0xc0,0x03] -v_cvt_f32_ubyte0_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x01,0x00,0x00,0x00] +v_cvt_pk_u8_f32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x26,0xd6,0xfd,0xd4,0x04,0x03] -v_cvt_f32_ubyte0_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x69,0x00,0x00,0x00] +v_cvt_pk_u8_f32 v255, -|0xaf123456|, vcc_hi, null +// GFX11: encoding: [0xff,0x01,0x26,0xd6,0xff,0xd6,0xf0,0x21,0x56,0x34,0x12,0xaf] -v_cvt_f32_ubyte0_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x6a,0x00,0x00,0x00] +v_cvt_pknorm_i16_f16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] -v_cvt_f32_ubyte0_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x6b,0x00,0x00,0x00] +v_cvt_pknorm_i16_f16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] -v_cvt_f32_ubyte0_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x7b,0x00,0x00,0x00] +v_cvt_pknorm_i16_f16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00] -v_cvt_f32_ubyte0_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x7d,0x00,0x00,0x00] +v_cvt_pknorm_i16_f16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x69,0xd2,0x00,0x00] -v_cvt_f32_ubyte0_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x7e,0x00,0x00,0x00] +v_cvt_pknorm_i16_f16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x6a,0xf6,0x00,0x00] -v_cvt_f32_ubyte0_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x7f,0x00,0x00,0x00] +v_cvt_pknorm_i16_f16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -v_cvt_f32_ubyte0_e64 v5, null -// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x7c,0x00,0x00,0x00] +v_cvt_pknorm_i16_f16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7b,0xfa,0x01,0x00] -v_cvt_f32_ubyte0_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0x91,0xd5,0xc1,0x00,0x00,0x00] +v_cvt_pknorm_i16_f16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7d,0xe0,0x01,0x00] -v_cvt_f32_ubyte0_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x91,0xd5,0xf0,0x00,0x00,0x08] +v_cvt_pknorm_i16_f16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7e,0x82,0x01,0x00] -v_cvt_f32_ubyte0_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0x91,0xd5,0xfd,0x00,0x00,0x10] +v_cvt_pknorm_i16_f16 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x12,0xd7,0x7f,0xf8,0x00,0x00] -v_cvt_f32_ubyte0_e64 v255, 0xaf123456 clamp div:2 -// GFX11: encoding: [0xff,0x80,0x91,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +v_cvt_pknorm_i16_f16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7c,0xfc,0x00,0x00] -v_cvt_f32_ubyte1_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x01,0x01,0x00,0x00] +v_cvt_pknorm_i16_f16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0xc1,0xfe,0x00,0x00] -v_cvt_f32_ubyte1_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0x92,0xd5,0xff,0x01,0x00,0x00] +v_cvt_pknorm_i16_f16 v5, 0.5, -m0 op_sel:[0,0,0] +// GFX11: encoding: [0x05,0x00,0x12,0xd7,0xf0,0xfa,0x00,0x40] -v_cvt_f32_ubyte1_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x01,0x00,0x00,0x00] +v_cvt_pknorm_i16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] +// GFX11: encoding: [0x05,0x0a,0x12,0xd7,0xfd,0xd4,0x00,0x20] -v_cvt_f32_ubyte1_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x69,0x00,0x00,0x00] +v_cvt_pknorm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] +// GFX11: encoding: [0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] -v_cvt_f32_ubyte1_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x6a,0x00,0x00,0x00] +v_cvt_pknorm_i16_f32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x01,0x05,0x02,0x00] -v_cvt_f32_ubyte1_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x6b,0x00,0x00,0x00] +v_cvt_pknorm_i16_f32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0xff,0xff,0x03,0x00] -v_cvt_f32_ubyte1_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x7b,0x00,0x00,0x00] +v_cvt_pknorm_i16_f32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x01,0x04,0x00,0x00] -v_cvt_f32_ubyte1_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x7d,0x00,0x00,0x00] +v_cvt_pknorm_i16_f32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x69,0xd2,0x00,0x00] -v_cvt_f32_ubyte1_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x7e,0x00,0x00,0x00] +v_cvt_pknorm_i16_f32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x6a,0xf6,0x00,0x00] -v_cvt_f32_ubyte1_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x7f,0x00,0x00,0x00] +v_cvt_pknorm_i16_f32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -v_cvt_f32_ubyte1_e64 v5, null -// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x7c,0x00,0x00,0x00] +v_cvt_pknorm_i16_f32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x7b,0xfa,0x01,0x00] -v_cvt_f32_ubyte1_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0x92,0xd5,0xc1,0x00,0x00,0x00] +v_cvt_pknorm_i16_f32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x7d,0xe0,0x01,0x00] -v_cvt_f32_ubyte1_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x92,0xd5,0xf0,0x00,0x00,0x08] +v_cvt_pknorm_i16_f32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x7e,0x82,0x01,0x00] -v_cvt_f32_ubyte1_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0x92,0xd5,0xfd,0x00,0x00,0x10] +v_cvt_pknorm_i16_f32 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x21,0xd7,0x7f,0xf8,0x00,0x00] -v_cvt_f32_ubyte1_e64 v255, 0xaf123456 clamp div:2 -// GFX11: encoding: [0xff,0x80,0x92,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +v_cvt_pknorm_i16_f32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x7c,0xfc,0x00,0x00] -v_cvt_f32_ubyte2_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x01,0x01,0x00,0x00] +v_cvt_pknorm_i16_f32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0xc1,0xfe,0x00,0x00] -v_cvt_f32_ubyte2_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0x93,0xd5,0xff,0x01,0x00,0x00] +v_cvt_pknorm_i16_f32 v5, 0.5, -m0 +// GFX11: encoding: [0x05,0x00,0x21,0xd7,0xf0,0xfa,0x00,0x40] -v_cvt_f32_ubyte2_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x01,0x00,0x00,0x00] +v_cvt_pknorm_i16_f32 v5, -src_scc, |vcc_lo| +// GFX11: encoding: [0x05,0x02,0x21,0xd7,0xfd,0xd4,0x00,0x20] -v_cvt_f32_ubyte2_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x69,0x00,0x00,0x00] +v_cvt_pknorm_i16_f32 v255, -|0xaf123456|, -|vcc_hi| +// GFX11: encoding: [0xff,0x03,0x21,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] -v_cvt_f32_ubyte2_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x6a,0x00,0x00,0x00] +v_cvt_pknorm_u16_f16 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] -v_cvt_f32_ubyte2_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x6b,0x00,0x00,0x00] +v_cvt_pknorm_u16_f16 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] -v_cvt_f32_ubyte2_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x7b,0x00,0x00,0x00] +v_cvt_pknorm_u16_f16 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00] -v_cvt_f32_ubyte2_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x7d,0x00,0x00,0x00] +v_cvt_pknorm_u16_f16 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x69,0xd2,0x00,0x00] -v_cvt_f32_ubyte2_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x7e,0x00,0x00,0x00] +v_cvt_pknorm_u16_f16 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x6a,0xf6,0x00,0x00] -v_cvt_f32_ubyte2_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x7f,0x00,0x00,0x00] +v_cvt_pknorm_u16_f16 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -v_cvt_f32_ubyte2_e64 v5, null -// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x7c,0x00,0x00,0x00] +v_cvt_pknorm_u16_f16 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7b,0xfa,0x01,0x00] -v_cvt_f32_ubyte2_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0x93,0xd5,0xc1,0x00,0x00,0x00] +v_cvt_pknorm_u16_f16 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7d,0xe0,0x01,0x00] -v_cvt_f32_ubyte2_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x93,0xd5,0xf0,0x00,0x00,0x08] +v_cvt_pknorm_u16_f16 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7e,0x82,0x01,0x00] -v_cvt_f32_ubyte2_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0x93,0xd5,0xfd,0x00,0x00,0x10] +v_cvt_pknorm_u16_f16 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x13,0xd7,0x7f,0xf8,0x00,0x00] -v_cvt_f32_ubyte2_e64 v255, 0xaf123456 clamp div:2 -// GFX11: encoding: [0xff,0x80,0x93,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +v_cvt_pknorm_u16_f16 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7c,0xfc,0x00,0x00] -v_cvt_f32_ubyte3_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x01,0x01,0x00,0x00] +v_cvt_pknorm_u16_f16 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0xc1,0xfe,0x00,0x00] -v_cvt_f32_ubyte3_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0x94,0xd5,0xff,0x01,0x00,0x00] +v_cvt_pknorm_u16_f16 v5, 0.5, -m0 op_sel:[0,0,0] +// GFX11: encoding: [0x05,0x00,0x13,0xd7,0xf0,0xfa,0x00,0x40] -v_cvt_f32_ubyte3_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x01,0x00,0x00,0x00] +v_cvt_pknorm_u16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] +// GFX11: encoding: [0x05,0x0a,0x13,0xd7,0xfd,0xd4,0x00,0x20] -v_cvt_f32_ubyte3_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x69,0x00,0x00,0x00] +v_cvt_pknorm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] +// GFX11: encoding: [0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] -v_cvt_f32_ubyte3_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x6a,0x00,0x00,0x00] +v_cvt_pknorm_u16_f32 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x01,0x05,0x02,0x00] -v_cvt_f32_ubyte3_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x6b,0x00,0x00,0x00] +v_cvt_pknorm_u16_f32 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0xff,0xff,0x03,0x00] -v_cvt_f32_ubyte3_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x7b,0x00,0x00,0x00] +v_cvt_pknorm_u16_f32 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x01,0x04,0x00,0x00] -v_cvt_f32_ubyte3_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x7d,0x00,0x00,0x00] +v_cvt_pknorm_u16_f32 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x69,0xd2,0x00,0x00] -v_cvt_f32_ubyte3_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x7e,0x00,0x00,0x00] +v_cvt_pknorm_u16_f32 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x6a,0xf6,0x00,0x00] -v_cvt_f32_ubyte3_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x7f,0x00,0x00,0x00] +v_cvt_pknorm_u16_f32 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -v_cvt_f32_ubyte3_e64 v5, null -// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x7c,0x00,0x00,0x00] +v_cvt_pknorm_u16_f32 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x7b,0xfa,0x01,0x00] -v_cvt_f32_ubyte3_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0x94,0xd5,0xc1,0x00,0x00,0x00] +v_cvt_pknorm_u16_f32 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x7d,0xe0,0x01,0x00] -v_cvt_f32_ubyte3_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x94,0xd5,0xf0,0x00,0x00,0x08] +v_cvt_pknorm_u16_f32 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x7e,0x82,0x01,0x00] -v_cvt_f32_ubyte3_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0x94,0xd5,0xfd,0x00,0x00,0x10] +v_cvt_pknorm_u16_f32 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x22,0xd7,0x7f,0xf8,0x00,0x00] -v_cvt_f32_ubyte3_e64 v255, 0xaf123456 clamp div:2 -// GFX11: encoding: [0xff,0x80,0x94,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +v_cvt_pknorm_u16_f32 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x7c,0xfc,0x00,0x00] -v_cvt_f64_f32_e64 v[5:6], v1 -// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x01,0x01,0x00,0x00] +v_cvt_pknorm_u16_f32 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0xc1,0xfe,0x00,0x00] -v_cvt_f64_f32_e64 v[5:6], v255 -// GFX11: encoding: [0x05,0x00,0x90,0xd5,0xff,0x01,0x00,0x00] +v_cvt_pknorm_u16_f32 v5, 0.5, -m0 +// GFX11: encoding: [0x05,0x00,0x22,0xd7,0xf0,0xfa,0x00,0x40] -v_cvt_f64_f32_e64 v[5:6], s1 -// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x01,0x00,0x00,0x00] +v_cvt_pknorm_u16_f32 v5, -src_scc, |vcc_lo| +// GFX11: encoding: [0x05,0x02,0x22,0xd7,0xfd,0xd4,0x00,0x20] -v_cvt_f64_f32_e64 v[5:6], s105 -// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x69,0x00,0x00,0x00] +v_cvt_pknorm_u16_f32 v255, -|0xaf123456|, -|vcc_hi| +// GFX11: encoding: [0xff,0x03,0x22,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] -v_cvt_f64_f32_e64 v[5:6], vcc_lo -// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x6a,0x00,0x00,0x00] +v_div_fixup_f16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00] -v_cvt_f64_f32_e64 v[5:6], vcc_hi -// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x6b,0x00,0x00,0x00] +v_div_fixup_f16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01] -v_cvt_f64_f32_e64 v[5:6], ttmp15 -// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x7b,0x00,0x00,0x00] +v_div_fixup_f16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01] -v_cvt_f64_f32_e64 v[5:6], m0 -// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x7d,0x00,0x00,0x00] +v_div_fixup_f16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01] -v_cvt_f64_f32_e64 v[5:6], exec_lo -// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x7e,0x00,0x00,0x00] +v_div_fixup_f16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04] -v_cvt_f64_f32_e64 v[5:6], exec_hi -// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x7f,0x00,0x00,0x00] +v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_cvt_f64_f32_e64 v[5:6], null -// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x7c,0x00,0x00,0x00] +v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1] -v_cvt_f64_f32_e64 v[5:6], -1 -// GFX11: encoding: [0x05,0x00,0x90,0xd5,0xc1,0x00,0x00,0x00] +v_div_fixup_f16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01] -v_cvt_f64_f32_e64 v[5:6], 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x90,0xd5,0xf0,0x00,0x00,0x08] +v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01] -v_cvt_f64_f32_e64 v[5:6], src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0x90,0xd5,0xfd,0x00,0x00,0x10] +v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x7d,0x54,0xd6,0x7f,0xf8,0xa8,0xa1] -v_cvt_f64_f32_e64 v[254:255], -|0xaf123456| clamp div:2 -// GFX11: encoding: [0xfe,0x81,0x90,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x04,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] -v_cvt_f64_i32_e64 v[5:6], v1 -// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x01,0x01,0x00,0x00] +v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3] -v_cvt_f64_i32_e64 v[5:6], v255 -// GFX11: encoding: [0x05,0x00,0x84,0xd5,0xff,0x01,0x00,0x00] +v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43] -v_cvt_f64_i32_e64 v[5:6], s1 -// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x01,0x00,0x00,0x00] +v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23] -v_cvt_f64_i32_e64 v[5:6], s105 -// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x69,0x00,0x00,0x00] +v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp +// GFX11: encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] -v_cvt_f64_i32_e64 v[5:6], vcc_lo -// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x6a,0x00,0x00,0x00] +v_div_fixup_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00] -v_cvt_f64_i32_e64 v[5:6], vcc_hi -// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x6b,0x00,0x00,0x00] +v_div_fixup_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x27,0xd6,0xff,0x05,0xa4,0x01] -v_cvt_f64_i32_e64 v[5:6], ttmp15 -// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x7b,0x00,0x00,0x00] +v_div_fixup_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x27,0xd6,0x01,0xfe,0xff,0x01] -v_cvt_f64_i32_e64 v[5:6], m0 -// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x7d,0x00,0x00,0x00] +v_div_fixup_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x27,0xd6,0x69,0xd2,0xf8,0x01] -v_cvt_f64_i32_e64 v[5:6], exec_lo -// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x7e,0x00,0x00,0x00] +v_div_fixup_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x27,0xd6,0x6a,0xf6,0x0c,0x04] -v_cvt_f64_i32_e64 v[5:6], exec_hi -// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x7f,0x00,0x00,0x00] +v_div_fixup_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x27,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -v_cvt_f64_i32_e64 v[5:6], null -// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x7c,0x00,0x00,0x00] +v_div_fixup_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x27,0xd6,0x7b,0xfa,0xed,0xe1] -v_cvt_f64_i32_e64 v[5:6], -1 -// GFX11: encoding: [0x05,0x00,0x84,0xd5,0xc1,0x00,0x00,0x00] +v_div_fixup_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x27,0xd6,0x7d,0xe0,0xf5,0x01] -v_cvt_f64_i32_e64 v[5:6], 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x84,0xd5,0xf0,0x00,0x00,0x08] +v_div_fixup_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x27,0xd6,0x7e,0x82,0xad,0x01] -v_cvt_f64_i32_e64 v[5:6], src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0x84,0xd5,0xfd,0x00,0x00,0x10] +v_div_fixup_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x27,0xd6,0x7f,0xf8,0xa8,0xa1] -v_cvt_f64_i32_e64 v[254:255], 0xaf123456 clamp div:2 -// GFX11: encoding: [0xfe,0x80,0x84,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +v_div_fixup_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x27,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] -v_cvt_f64_u32_e64 v[5:6], v1 -// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x01,0x01,0x00,0x00] +v_div_fixup_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x27,0xd6,0xc1,0xfe,0xf4,0xc3] -v_cvt_f64_u32_e64 v[5:6], v255 -// GFX11: encoding: [0x05,0x00,0x96,0xd5,0xff,0x01,0x00,0x00] +v_div_fixup_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x27,0xd6,0xf0,0xfa,0xc0,0x4b] -v_cvt_f64_u32_e64 v[5:6], s1 -// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x01,0x00,0x00,0x00] +v_div_fixup_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x27,0xd6,0xfd,0xd4,0x04,0x33] -v_cvt_f64_u32_e64 v[5:6], s105 -// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x69,0x00,0x00,0x00] +v_div_fixup_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x27,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -v_cvt_f64_u32_e64 v[5:6], vcc_lo -// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x6a,0x00,0x00,0x00] +v_div_fixup_f64 v[5:6], v[1:2], v[2:3], v[3:4] +// GFX11: encoding: [0x05,0x00,0x28,0xd6,0x01,0x05,0x0e,0x04] -v_cvt_f64_u32_e64 v[5:6], vcc_hi -// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x6b,0x00,0x00,0x00] +v_div_fixup_f64 v[5:6], v[254:255], v[254:255], s[6:7] +// GFX11: encoding: [0x05,0x00,0x28,0xd6,0xfe,0xfd,0x1b,0x00] -v_cvt_f64_u32_e64 v[5:6], ttmp15 -// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x7b,0x00,0x00,0x00] +v_div_fixup_f64 v[5:6], s[2:3], s[4:5], v[254:255] +// GFX11: encoding: [0x05,0x00,0x28,0xd6,0x02,0x08,0xf8,0x07] -v_cvt_f64_u32_e64 v[5:6], m0 -// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x7d,0x00,0x00,0x00] +v_div_fixup_f64 v[5:6], -|s[104:105]|, s[104:105], -|s[104:105]| +// GFX11: encoding: [0x05,0x05,0x28,0xd6,0x68,0xd0,0xa0,0xa1] -v_cvt_f64_u32_e64 v[5:6], exec_lo -// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x7e,0x00,0x00,0x00] +v_div_fixup_f64 v[5:6], vcc, -|ttmp[14:15]|, -|ttmp[14:15]| +// GFX11: encoding: [0x05,0x06,0x28,0xd6,0x6a,0xf4,0xe8,0xc1] -v_cvt_f64_u32_e64 v[5:6], exec_hi -// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x7f,0x00,0x00,0x00] +v_div_fixup_f64 v[5:6], -|ttmp[14:15]|, 0xaf123456, null +// GFX11: encoding: [0x05,0x01,0x28,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] -v_cvt_f64_u32_e64 v[5:6], null -// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x7c,0x00,0x00,0x00] - -v_cvt_f64_u32_e64 v[5:6], -1 -// GFX11: encoding: [0x05,0x00,0x96,0xd5,0xc1,0x00,0x00,0x00] - -v_cvt_f64_u32_e64 v[5:6], 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x96,0xd5,0xf0,0x00,0x00,0x08] - -v_cvt_f64_u32_e64 v[5:6], src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0x96,0xd5,0xfd,0x00,0x00,0x10] - -v_cvt_f64_u32_e64 v[254:255], 0xaf123456 clamp div:2 -// GFX11: encoding: [0xfe,0x80,0x96,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] - -v_cvt_floor_i32_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x01,0x01,0x00,0x00] - -v_cvt_floor_i32_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0xff,0x01,0x00,0x00] - -v_cvt_floor_i32_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x01,0x00,0x00,0x00] - -v_cvt_floor_i32_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x69,0x00,0x00,0x00] - -v_cvt_floor_i32_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x6a,0x00,0x00,0x00] - -v_cvt_floor_i32_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x6b,0x00,0x00,0x00] - -v_cvt_floor_i32_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7b,0x00,0x00,0x00] - -v_cvt_floor_i32_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7d,0x00,0x00,0x00] - -v_cvt_floor_i32_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7e,0x00,0x00,0x00] - -v_cvt_floor_i32_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7f,0x00,0x00,0x00] - -v_cvt_floor_i32_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7c,0x00,0x00,0x00] - -v_cvt_floor_i32_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0xc1,0x00,0x00,0x00] - -v_cvt_floor_i32_f32_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0xf0,0x00,0x00,0x00] - -v_cvt_floor_i32_f32_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0xfd,0x00,0x00,0x00] - -v_cvt_floor_i32_f32_e64 v255, -|0xaf123456| -// GFX11: encoding: [0xff,0x01,0x8d,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] - -v_cvt_flr_i32_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x01,0x01,0x00,0x00] - -v_cvt_flr_i32_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0xff,0x01,0x00,0x00] - -v_cvt_flr_i32_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x01,0x00,0x00,0x00] - -v_cvt_flr_i32_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x69,0x00,0x00,0x00] - -v_cvt_flr_i32_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x6a,0x00,0x00,0x00] - -v_cvt_flr_i32_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x6b,0x00,0x00,0x00] - -v_cvt_flr_i32_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7b,0x00,0x00,0x00] - -v_cvt_flr_i32_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7d,0x00,0x00,0x00] - -v_cvt_flr_i32_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7e,0x00,0x00,0x00] - -v_cvt_flr_i32_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7f,0x00,0x00,0x00] - -v_cvt_flr_i32_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7c,0x00,0x00,0x00] - -v_cvt_flr_i32_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0xc1,0x00,0x00,0x00] - -v_cvt_flr_i32_f32_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0xf0,0x00,0x00,0x00] - -v_cvt_flr_i32_f32_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0xfd,0x00,0x00,0x00] - -v_cvt_flr_i32_f32_e64 v255, -|0xaf123456| -// GFX11: encoding: [0xff,0x01,0x8d,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] - -v_cvt_i16_f16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x01,0x01,0x00,0x00] - -v_cvt_i16_f16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0xff,0x01,0x00,0x00] - -v_cvt_i16_f16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x01,0x00,0x00,0x00] - -v_cvt_i16_f16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x69,0x00,0x00,0x00] - -v_cvt_i16_f16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x6a,0x00,0x00,0x00] - -v_cvt_i16_f16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x6b,0x00,0x00,0x00] - -v_cvt_i16_f16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x7b,0x00,0x00,0x00] - -v_cvt_i16_f16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x7d,0x00,0x00,0x00] - -v_cvt_i16_f16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x7e,0x00,0x00,0x00] - -v_cvt_i16_f16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x7f,0x00,0x00,0x00] - -v_cvt_i16_f16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x7c,0x00,0x00,0x00] - -v_cvt_i16_f16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0xc1,0x00,0x00,0x00] - -v_cvt_i16_f16_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0xf0,0x00,0x00,0x00] - -v_cvt_i16_f16_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0xfd,0x00,0x00,0x00] - -v_cvt_i16_f16_e64 v255, -|0xfe0b| clamp -// GFX11: encoding: [0xff,0x81,0xd3,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] - -v_cvt_i32_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x01,0x01,0x00,0x00] - -v_cvt_i32_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0x88,0xd5,0xff,0x01,0x00,0x00] - -v_cvt_i32_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x01,0x00,0x00,0x00] - -v_cvt_i32_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x69,0x00,0x00,0x00] - -v_cvt_i32_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x6a,0x00,0x00,0x00] - -v_cvt_i32_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x6b,0x00,0x00,0x00] - -v_cvt_i32_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x7b,0x00,0x00,0x00] - -v_cvt_i32_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x7d,0x00,0x00,0x00] - -v_cvt_i32_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x7e,0x00,0x00,0x00] - -v_cvt_i32_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x7f,0x00,0x00,0x00] - -v_cvt_i32_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x7c,0x00,0x00,0x00] - -v_cvt_i32_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0x88,0xd5,0xc1,0x00,0x00,0x00] - -v_cvt_i32_f32_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0x88,0xd5,0xf0,0x00,0x00,0x00] - -v_cvt_i32_f32_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0x88,0xd5,0xfd,0x00,0x00,0x00] - -v_cvt_i32_f32_e64 v255, -|0xaf123456| clamp -// GFX11: encoding: [0xff,0x81,0x88,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] - -v_cvt_i32_f64_e64 v5, v[1:2] -// GFX11: encoding: [0x05,0x00,0x83,0xd5,0x01,0x01,0x00,0x00] - -v_cvt_i32_f64_e64 v5, v[254:255] -// GFX11: encoding: [0x05,0x00,0x83,0xd5,0xfe,0x01,0x00,0x00] - -v_cvt_i32_f64_e64 v5, s[2:3] -// GFX11: encoding: [0x05,0x00,0x83,0xd5,0x02,0x00,0x00,0x00] - -v_cvt_i32_f64_e64 v5, s[104:105] -// GFX11: encoding: [0x05,0x00,0x83,0xd5,0x68,0x00,0x00,0x00] - -v_cvt_i32_f64_e64 v5, vcc -// GFX11: encoding: [0x05,0x00,0x83,0xd5,0x6a,0x00,0x00,0x00] - -v_cvt_i32_f64_e64 v5, ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0x83,0xd5,0x7a,0x00,0x00,0x00] - -v_cvt_i32_f64_e64 v5, exec -// GFX11: encoding: [0x05,0x00,0x83,0xd5,0x7e,0x00,0x00,0x00] - -v_cvt_i32_f64_e64 v5, null -// GFX11: encoding: [0x05,0x00,0x83,0xd5,0x7c,0x00,0x00,0x00] - -v_cvt_i32_f64_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0x83,0xd5,0xc1,0x00,0x00,0x00] - -v_cvt_i32_f64_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0x83,0xd5,0xf0,0x00,0x00,0x00] - -v_cvt_i32_f64_e64 v5, -|src_scc| -// GFX11: encoding: [0x05,0x01,0x83,0xd5,0xfd,0x00,0x00,0x20] - -v_cvt_i32_f64_e64 v255, 0xaf123456 clamp -// GFX11: encoding: [0xff,0x80,0x83,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] - -v_cvt_i32_i16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x01,0x01,0x00,0x00] - -v_cvt_i32_i16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xea,0xd5,0xff,0x01,0x00,0x00] - -v_cvt_i32_i16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x01,0x00,0x00,0x00] - -v_cvt_i32_i16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x69,0x00,0x00,0x00] - -v_cvt_i32_i16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x6a,0x00,0x00,0x00] - -v_cvt_i32_i16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x6b,0x00,0x00,0x00] - -v_cvt_i32_i16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x7b,0x00,0x00,0x00] - -v_cvt_i32_i16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x7d,0x00,0x00,0x00] - -v_cvt_i32_i16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x7e,0x00,0x00,0x00] - -v_cvt_i32_i16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x7f,0x00,0x00,0x00] - -v_cvt_i32_i16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x7c,0x00,0x00,0x00] - -v_cvt_i32_i16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xea,0xd5,0xc1,0x00,0x00,0x00] - -v_cvt_i32_i16_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xea,0xd5,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00] - -v_cvt_i32_i16_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0xea,0xd5,0xfd,0x00,0x00,0x00] - -v_cvt_i32_i16_e64 v255, 0xfe0b -// GFX11: encoding: [0xff,0x00,0xea,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] - -v_cvt_nearest_i32_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x01,0x01,0x00,0x00] - -v_cvt_nearest_i32_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0xff,0x01,0x00,0x00] - -v_cvt_nearest_i32_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x01,0x00,0x00,0x00] - -v_cvt_nearest_i32_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x69,0x00,0x00,0x00] - -v_cvt_nearest_i32_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x6a,0x00,0x00,0x00] - -v_cvt_nearest_i32_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x6b,0x00,0x00,0x00] - -v_cvt_nearest_i32_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7b,0x00,0x00,0x00] - -v_cvt_nearest_i32_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7d,0x00,0x00,0x00] - -v_cvt_nearest_i32_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7e,0x00,0x00,0x00] - -v_cvt_nearest_i32_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7f,0x00,0x00,0x00] - -v_cvt_nearest_i32_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7c,0x00,0x00,0x00] - -v_cvt_nearest_i32_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0xc1,0x00,0x00,0x00] - -v_cvt_nearest_i32_f32_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0xf0,0x00,0x00,0x00] - -v_cvt_nearest_i32_f32_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0xfd,0x00,0x00,0x00] - -v_cvt_nearest_i32_f32_e64 v255, -|0xaf123456| -// GFX11: encoding: [0xff,0x01,0x8c,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] - -v_cvt_norm_i16_f16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x01,0x01,0x00,0x00] - -v_cvt_norm_i16_f16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0xff,0x01,0x00,0x00] - -v_cvt_norm_i16_f16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x01,0x00,0x00,0x00] - -v_cvt_norm_i16_f16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x69,0x00,0x00,0x00] - -v_cvt_norm_i16_f16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x6a,0x00,0x00,0x00] - -v_cvt_norm_i16_f16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x6b,0x00,0x00,0x00] - -v_cvt_norm_i16_f16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x7b,0x00,0x00,0x00] - -v_cvt_norm_i16_f16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x7d,0x00,0x00,0x00] - -v_cvt_norm_i16_f16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x7e,0x00,0x00,0x00] - -v_cvt_norm_i16_f16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x7f,0x00,0x00,0x00] - -v_cvt_norm_i16_f16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x7c,0x00,0x00,0x00] - -v_cvt_norm_i16_f16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0xc1,0x00,0x00,0x00] - -v_cvt_norm_i16_f16_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0xf0,0x00,0x00,0x00] - -v_cvt_norm_i16_f16_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0xfd,0x00,0x00,0x00] - -v_cvt_norm_i16_f16_e64 v255, -|0xfe0b| -// GFX11: encoding: [0xff,0x01,0xe3,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] - -v_cvt_norm_u16_f16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x01,0x01,0x00,0x00] - -v_cvt_norm_u16_f16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0xff,0x01,0x00,0x00] - -v_cvt_norm_u16_f16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x01,0x00,0x00,0x00] - -v_cvt_norm_u16_f16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x69,0x00,0x00,0x00] - -v_cvt_norm_u16_f16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x6a,0x00,0x00,0x00] - -v_cvt_norm_u16_f16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x6b,0x00,0x00,0x00] - -v_cvt_norm_u16_f16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x7b,0x00,0x00,0x00] - -v_cvt_norm_u16_f16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x7d,0x00,0x00,0x00] - -v_cvt_norm_u16_f16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x7e,0x00,0x00,0x00] - -v_cvt_norm_u16_f16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x7f,0x00,0x00,0x00] - -v_cvt_norm_u16_f16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x7c,0x00,0x00,0x00] - -v_cvt_norm_u16_f16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0xc1,0x00,0x00,0x00] - -v_cvt_norm_u16_f16_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0xf0,0x00,0x00,0x00] - -v_cvt_norm_u16_f16_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0xfd,0x00,0x00,0x00] - -v_cvt_norm_u16_f16_e64 v255, -|0xfe0b| -// GFX11: encoding: [0xff,0x01,0xe4,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] - -v_cvt_off_f32_i4_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x01,0x01,0x00,0x00] - -v_cvt_off_f32_i4_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0xff,0x01,0x00,0x00] - -v_cvt_off_f32_i4_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x01,0x00,0x00,0x00] - -v_cvt_off_f32_i4_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x69,0x00,0x00,0x00] - -v_cvt_off_f32_i4_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x6a,0x00,0x00,0x00] - -v_cvt_off_f32_i4_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x6b,0x00,0x00,0x00] - -v_cvt_off_f32_i4_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x7b,0x00,0x00,0x00] - -v_cvt_off_f32_i4_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x7d,0x00,0x00,0x00] - -v_cvt_off_f32_i4_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x7e,0x00,0x00,0x00] - -v_cvt_off_f32_i4_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x7f,0x00,0x00,0x00] - -v_cvt_off_f32_i4_e64 v5, null -// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x7c,0x00,0x00,0x00] - -v_cvt_off_f32_i4_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0xc1,0x00,0x00,0x00] - -v_cvt_off_f32_i4_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0xf0,0x00,0x00,0x08] - -v_cvt_off_f32_i4_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0xfd,0x00,0x00,0x10] - -v_cvt_off_f32_i4_e64 v255, 0x4f clamp div:2 -// GFX11: encoding: [0xff,0x80,0x8e,0xd5,0xff,0x00,0x00,0x18,0x4f,0x00,0x00,0x00] - -v_cvt_pk_i16_f32 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x01,0x05,0x02,0x00] - -v_cvt_pk_i16_f32 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x06,0xd7,0xff,0xff,0x03,0x00] - -v_cvt_pk_i16_f32 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x01,0x04,0x00,0x00] - -v_cvt_pk_i16_f32 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x69,0xd2,0x00,0x00] - -v_cvt_pk_i16_f32 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x6a,0xf6,0x00,0x00] - -v_cvt_pk_i16_f32 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_cvt_pk_i16_f32 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x7b,0xfa,0x01,0x00] - -v_cvt_pk_i16_f32 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x7d,0xe0,0x01,0x00] - -v_cvt_pk_i16_f32 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x7e,0x82,0x01,0x00] - -v_cvt_pk_i16_f32 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x06,0xd7,0x7f,0xf8,0x00,0x00] - -v_cvt_pk_i16_f32 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x06,0xd7,0x7c,0xfc,0x00,0x00] - -v_cvt_pk_i16_f32 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x06,0xd7,0xc1,0xfe,0x00,0x00] - -v_cvt_pk_i16_f32 v5, 0.5, -m0 -// GFX11: encoding: [0x05,0x00,0x06,0xd7,0xf0,0xfa,0x00,0x40] - -v_cvt_pk_i16_f32 v5, -src_scc, |vcc_lo| -// GFX11: encoding: [0x05,0x02,0x06,0xd7,0xfd,0xd4,0x00,0x20] - -v_cvt_pk_i16_f32 v255, -|0xaf123456|, -|vcc_hi| -// GFX11: encoding: [0xff,0x03,0x06,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] - -v_cvt_pk_i16_i32 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x01,0x05,0x02,0x00] - -v_cvt_pk_i16_i32 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x24,0xd7,0xff,0xff,0x03,0x00] - -v_cvt_pk_i16_i32 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x01,0x04,0x00,0x00] - -v_cvt_pk_i16_i32 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x69,0xd2,0x00,0x00] - -v_cvt_pk_i16_i32 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x6a,0xf6,0x00,0x00] - -v_cvt_pk_i16_i32 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_cvt_pk_i16_i32 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x7b,0xfa,0x01,0x00] - -v_cvt_pk_i16_i32 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x7d,0xe0,0x01,0x00] - -v_cvt_pk_i16_i32 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x7e,0x82,0x01,0x00] - -v_cvt_pk_i16_i32 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x7f,0xf8,0x00,0x00] - -v_cvt_pk_i16_i32 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x24,0xd7,0x7c,0xfc,0x00,0x00] - -v_cvt_pk_i16_i32 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x24,0xd7,0xc1,0xfe,0x00,0x00] - -v_cvt_pk_i16_i32 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x24,0xd7,0xf0,0xfa,0x00,0x00] - -v_cvt_pk_i16_i32 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x24,0xd7,0xfd,0xd4,0x00,0x00] - -v_cvt_pk_i16_i32 v255, 0xaf123456, vcc_hi -// GFX11: encoding: [0xff,0x00,0x24,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] - -v_cvt_pk_norm_i16_f16 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] - -v_cvt_pk_norm_i16_f16 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] - -v_cvt_pk_norm_i16_f16 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00] - -v_cvt_pk_norm_i16_f16 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x69,0xd2,0x00,0x00] - -v_cvt_pk_norm_i16_f16 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x6a,0xf6,0x00,0x00] - -v_cvt_pk_norm_i16_f16 v5, vcc_hi, 0xfe0b -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] - -v_cvt_pk_norm_i16_f16 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7b,0xfa,0x01,0x00] - -v_cvt_pk_norm_i16_f16 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7d,0xe0,0x01,0x00] - -v_cvt_pk_norm_i16_f16 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7e,0x82,0x01,0x00] - -v_cvt_pk_norm_i16_f16 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x12,0xd7,0x7f,0xf8,0x00,0x00] - -v_cvt_pk_norm_i16_f16 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7c,0xfc,0x00,0x00] - -v_cvt_pk_norm_i16_f16 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0xc1,0xfe,0x00,0x00] - -v_cvt_pk_norm_i16_f16 v5, 0.5, -m0 op_sel:[0,0,0] -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0xf0,0xfa,0x00,0x40] - -v_cvt_pk_norm_i16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] -// GFX11: encoding: [0x05,0x0a,0x12,0xd7,0xfd,0xd4,0x00,0x20] - -v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] -// GFX11: encoding: [0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] - -v_cvt_pk_norm_u16_f16 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] - -v_cvt_pk_norm_u16_f16 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] - -v_cvt_pk_norm_u16_f16 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00] - -v_cvt_pk_norm_u16_f16 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x69,0xd2,0x00,0x00] - -v_cvt_pk_norm_u16_f16 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x6a,0xf6,0x00,0x00] - -v_cvt_pk_norm_u16_f16 v5, vcc_hi, 0xfe0b -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] - -v_cvt_pk_norm_u16_f16 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7b,0xfa,0x01,0x00] - -v_cvt_pk_norm_u16_f16 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7d,0xe0,0x01,0x00] - -v_cvt_pk_norm_u16_f16 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7e,0x82,0x01,0x00] - -v_cvt_pk_norm_u16_f16 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x13,0xd7,0x7f,0xf8,0x00,0x00] - -v_cvt_pk_norm_u16_f16 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7c,0xfc,0x00,0x00] - -v_cvt_pk_norm_u16_f16 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0xc1,0xfe,0x00,0x00] - -v_cvt_pk_norm_u16_f16 v5, 0.5, -m0 op_sel:[0,0,0] -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0xf0,0xfa,0x00,0x40] - -v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] -// GFX11: encoding: [0x05,0x0a,0x13,0xd7,0xfd,0xd4,0x00,0x20] - -v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] -// GFX11: encoding: [0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] - -v_cvt_pk_rtz_f16_f32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x01,0x05,0x02,0x00] - -v_cvt_pk_rtz_f16_f32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0xff,0xff,0x03,0x00] - -v_cvt_pk_rtz_f16_f32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x01,0x04,0x00,0x00] - -v_cvt_pk_rtz_f16_f32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x69,0xd2,0x00,0x00] - -v_cvt_pk_rtz_f16_f32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x6a,0xf6,0x00,0x00] - -v_cvt_pk_rtz_f16_f32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_cvt_pk_rtz_f16_f32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x7b,0xfa,0x01,0x00] - -v_cvt_pk_rtz_f16_f32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x7d,0xe0,0x01,0x00] - -v_cvt_pk_rtz_f16_f32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x7e,0x82,0x01,0x00] - -v_cvt_pk_rtz_f16_f32_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x2f,0xd5,0x7f,0xf8,0x00,0x00] - -v_cvt_pk_rtz_f16_f32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x7c,0xfc,0x00,0x00] - -v_cvt_pk_rtz_f16_f32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0xc1,0xfe,0x00,0x00] - -v_cvt_pk_rtz_f16_f32_e64 v5, 0.5, -m0 -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0xf0,0xfa,0x00,0x40] - -v_cvt_pk_rtz_f16_f32_e64 v5, -src_scc, |vcc_lo| -// GFX11: encoding: [0x05,0x02,0x2f,0xd5,0xfd,0xd4,0x00,0x20] - -v_cvt_pk_rtz_f16_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp -// GFX11: encoding: [0xff,0x83,0x2f,0xd5,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] - -v_cvt_pk_u16_f32 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00] - -v_cvt_pk_u16_f32 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x07,0xd7,0xff,0xff,0x03,0x00] - -v_cvt_pk_u16_f32 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x01,0x04,0x00,0x00] - -v_cvt_pk_u16_f32 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x69,0xd2,0x00,0x00] - -v_cvt_pk_u16_f32 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x6a,0xf6,0x00,0x00] - -v_cvt_pk_u16_f32 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_cvt_pk_u16_f32 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x7b,0xfa,0x01,0x00] - -v_cvt_pk_u16_f32 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x7d,0xe0,0x01,0x00] - -v_cvt_pk_u16_f32 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x7e,0x82,0x01,0x00] - -v_cvt_pk_u16_f32 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x07,0xd7,0x7f,0xf8,0x00,0x00] - -v_cvt_pk_u16_f32 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x07,0xd7,0x7c,0xfc,0x00,0x00] - -v_cvt_pk_u16_f32 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x07,0xd7,0xc1,0xfe,0x00,0x00] - -v_cvt_pk_u16_f32 v5, 0.5, -m0 -// GFX11: encoding: [0x05,0x00,0x07,0xd7,0xf0,0xfa,0x00,0x40] - -v_cvt_pk_u16_f32 v5, -src_scc, |vcc_lo| -// GFX11: encoding: [0x05,0x02,0x07,0xd7,0xfd,0xd4,0x00,0x20] - -v_cvt_pk_u16_f32 v255, -|0xaf123456|, -|vcc_hi| -// GFX11: encoding: [0xff,0x03,0x07,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] - -v_cvt_pk_u16_u32 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x01,0x05,0x02,0x00] - -v_cvt_pk_u16_u32 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x23,0xd7,0xff,0xff,0x03,0x00] - -v_cvt_pk_u16_u32 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x01,0x04,0x00,0x00] - -v_cvt_pk_u16_u32 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x69,0xd2,0x00,0x00] - -v_cvt_pk_u16_u32 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x6a,0xf6,0x00,0x00] - -v_cvt_pk_u16_u32 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_cvt_pk_u16_u32 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x7b,0xfa,0x01,0x00] - -v_cvt_pk_u16_u32 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x7d,0xe0,0x01,0x00] - -v_cvt_pk_u16_u32 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x7e,0x82,0x01,0x00] - -v_cvt_pk_u16_u32 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x7f,0xf8,0x00,0x00] - -v_cvt_pk_u16_u32 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x23,0xd7,0x7c,0xfc,0x00,0x00] - -v_cvt_pk_u16_u32 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x23,0xd7,0xc1,0xfe,0x00,0x00] - -v_cvt_pk_u16_u32 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x23,0xd7,0xf0,0xfa,0x00,0x00] - -v_cvt_pk_u16_u32 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x23,0xd7,0xfd,0xd4,0x00,0x00] - -v_cvt_pk_u16_u32 v255, 0xaf123456, vcc_hi -// GFX11: encoding: [0xff,0x00,0x23,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] - -v_cvt_pk_u8_f32 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x01,0x05,0x0e,0x00] - -v_cvt_pk_u8_f32 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x26,0xd6,0xff,0x05,0xa4,0x01] - -v_cvt_pk_u8_f32 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x01,0xfe,0xff,0x01] - -v_cvt_pk_u8_f32 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x69,0xd2,0xf8,0x01] - -v_cvt_pk_u8_f32 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x6a,0xf6,0x0c,0x04] - -v_cvt_pk_u8_f32 v5, vcc_hi, 0xaf123456, v255 -// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] - -v_cvt_pk_u8_f32 v5, ttmp15, src_scc, ttmp15 -// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x7b,0xfa,0xed,0x01] - -v_cvt_pk_u8_f32 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x7d,0xe0,0xf5,0x01] - -v_cvt_pk_u8_f32 v5, exec_lo, -1, vcc_hi -// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x7e,0x82,0xad,0x01] - -v_cvt_pk_u8_f32 v5, exec_hi, null, vcc_lo -// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x7f,0xf8,0xa8,0x01] - -v_cvt_pk_u8_f32 v5, null, exec_lo, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x26,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] - -v_cvt_pk_u8_f32 v5, -1, exec_hi, src_scc -// GFX11: encoding: [0x05,0x00,0x26,0xd6,0xc1,0xfe,0xf4,0x03] - -v_cvt_pk_u8_f32 v5, 0.5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x26,0xd6,0xf0,0xfa,0xc0,0x03] - -v_cvt_pk_u8_f32 v5, src_scc, vcc_lo, -1 -// GFX11: encoding: [0x05,0x00,0x26,0xd6,0xfd,0xd4,0x04,0x03] - -v_cvt_pk_u8_f32 v255, -|0xaf123456|, vcc_hi, null -// GFX11: encoding: [0xff,0x01,0x26,0xd6,0xff,0xd6,0xf0,0x21,0x56,0x34,0x12,0xaf] - -v_cvt_pknorm_i16_f16 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] - -v_cvt_pknorm_i16_f16 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] - -v_cvt_pknorm_i16_f16 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00] - -v_cvt_pknorm_i16_f16 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x69,0xd2,0x00,0x00] - -v_cvt_pknorm_i16_f16 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x6a,0xf6,0x00,0x00] - -v_cvt_pknorm_i16_f16 v5, vcc_hi, 0xfe0b -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] - -v_cvt_pknorm_i16_f16 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7b,0xfa,0x01,0x00] - -v_cvt_pknorm_i16_f16 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7d,0xe0,0x01,0x00] - -v_cvt_pknorm_i16_f16 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7e,0x82,0x01,0x00] - -v_cvt_pknorm_i16_f16 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x12,0xd7,0x7f,0xf8,0x00,0x00] - -v_cvt_pknorm_i16_f16 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0x7c,0xfc,0x00,0x00] - -v_cvt_pknorm_i16_f16 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0xc1,0xfe,0x00,0x00] - -v_cvt_pknorm_i16_f16 v5, 0.5, -m0 op_sel:[0,0,0] -// GFX11: encoding: [0x05,0x00,0x12,0xd7,0xf0,0xfa,0x00,0x40] - -v_cvt_pknorm_i16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] -// GFX11: encoding: [0x05,0x0a,0x12,0xd7,0xfd,0xd4,0x00,0x20] - -v_cvt_pknorm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] -// GFX11: encoding: [0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] - -v_cvt_pknorm_i16_f32 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x01,0x05,0x02,0x00] - -v_cvt_pknorm_i16_f32 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x21,0xd7,0xff,0xff,0x03,0x00] - -v_cvt_pknorm_i16_f32 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x01,0x04,0x00,0x00] - -v_cvt_pknorm_i16_f32 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x69,0xd2,0x00,0x00] - -v_cvt_pknorm_i16_f32 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x6a,0xf6,0x00,0x00] - -v_cvt_pknorm_i16_f32 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_cvt_pknorm_i16_f32 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x7b,0xfa,0x01,0x00] - -v_cvt_pknorm_i16_f32 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x7d,0xe0,0x01,0x00] - -v_cvt_pknorm_i16_f32 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x7e,0x82,0x01,0x00] - -v_cvt_pknorm_i16_f32 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x21,0xd7,0x7f,0xf8,0x00,0x00] - -v_cvt_pknorm_i16_f32 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x21,0xd7,0x7c,0xfc,0x00,0x00] - -v_cvt_pknorm_i16_f32 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x21,0xd7,0xc1,0xfe,0x00,0x00] - -v_cvt_pknorm_i16_f32 v5, 0.5, -m0 -// GFX11: encoding: [0x05,0x00,0x21,0xd7,0xf0,0xfa,0x00,0x40] - -v_cvt_pknorm_i16_f32 v5, -src_scc, |vcc_lo| -// GFX11: encoding: [0x05,0x02,0x21,0xd7,0xfd,0xd4,0x00,0x20] - -v_cvt_pknorm_i16_f32 v255, -|0xaf123456|, -|vcc_hi| -// GFX11: encoding: [0xff,0x03,0x21,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] - -v_cvt_pknorm_u16_f16 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] - -v_cvt_pknorm_u16_f16 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] - -v_cvt_pknorm_u16_f16 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00] - -v_cvt_pknorm_u16_f16 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x69,0xd2,0x00,0x00] - -v_cvt_pknorm_u16_f16 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x6a,0xf6,0x00,0x00] - -v_cvt_pknorm_u16_f16 v5, vcc_hi, 0xfe0b -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] - -v_cvt_pknorm_u16_f16 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7b,0xfa,0x01,0x00] - -v_cvt_pknorm_u16_f16 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7d,0xe0,0x01,0x00] - -v_cvt_pknorm_u16_f16 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7e,0x82,0x01,0x00] - -v_cvt_pknorm_u16_f16 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x13,0xd7,0x7f,0xf8,0x00,0x00] - -v_cvt_pknorm_u16_f16 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0x7c,0xfc,0x00,0x00] - -v_cvt_pknorm_u16_f16 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0xc1,0xfe,0x00,0x00] - -v_cvt_pknorm_u16_f16 v5, 0.5, -m0 op_sel:[0,0,0] -// GFX11: encoding: [0x05,0x00,0x13,0xd7,0xf0,0xfa,0x00,0x40] - -v_cvt_pknorm_u16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] -// GFX11: encoding: [0x05,0x0a,0x13,0xd7,0xfd,0xd4,0x00,0x20] - -v_cvt_pknorm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] -// GFX11: encoding: [0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] - -v_cvt_pknorm_u16_f32 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x01,0x05,0x02,0x00] - -v_cvt_pknorm_u16_f32 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x22,0xd7,0xff,0xff,0x03,0x00] - -v_cvt_pknorm_u16_f32 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x01,0x04,0x00,0x00] - -v_cvt_pknorm_u16_f32 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x69,0xd2,0x00,0x00] - -v_cvt_pknorm_u16_f32 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x6a,0xf6,0x00,0x00] - -v_cvt_pknorm_u16_f32 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_cvt_pknorm_u16_f32 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x7b,0xfa,0x01,0x00] - -v_cvt_pknorm_u16_f32 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x7d,0xe0,0x01,0x00] - -v_cvt_pknorm_u16_f32 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x7e,0x82,0x01,0x00] - -v_cvt_pknorm_u16_f32 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x22,0xd7,0x7f,0xf8,0x00,0x00] - -v_cvt_pknorm_u16_f32 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x22,0xd7,0x7c,0xfc,0x00,0x00] - -v_cvt_pknorm_u16_f32 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x22,0xd7,0xc1,0xfe,0x00,0x00] - -v_cvt_pknorm_u16_f32 v5, 0.5, -m0 -// GFX11: encoding: [0x05,0x00,0x22,0xd7,0xf0,0xfa,0x00,0x40] - -v_cvt_pknorm_u16_f32 v5, -src_scc, |vcc_lo| -// GFX11: encoding: [0x05,0x02,0x22,0xd7,0xfd,0xd4,0x00,0x20] - -v_cvt_pknorm_u16_f32 v255, -|0xaf123456|, -|vcc_hi| -// GFX11: encoding: [0xff,0x03,0x22,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] - -v_cvt_pkrtz_f16_f32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x01,0x05,0x02,0x00] - -v_cvt_pkrtz_f16_f32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0xff,0xff,0x03,0x00] - -v_cvt_pkrtz_f16_f32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x01,0x04,0x00,0x00] - -v_cvt_pkrtz_f16_f32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x69,0xd2,0x00,0x00] - -v_cvt_pkrtz_f16_f32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x6a,0xf6,0x00,0x00] - -v_cvt_pkrtz_f16_f32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_cvt_pkrtz_f16_f32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x7b,0xfa,0x01,0x00] - -v_cvt_pkrtz_f16_f32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x7d,0xe0,0x01,0x00] - -v_cvt_pkrtz_f16_f32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x7e,0x82,0x01,0x00] - -v_cvt_pkrtz_f16_f32_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x2f,0xd5,0x7f,0xf8,0x00,0x00] - -v_cvt_pkrtz_f16_f32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x7c,0xfc,0x00,0x00] - -v_cvt_pkrtz_f16_f32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0xc1,0xfe,0x00,0x00] - -v_cvt_pkrtz_f16_f32_e64 v5, 0.5, -m0 -// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0xf0,0xfa,0x00,0x40] - -v_cvt_pkrtz_f16_f32_e64 v5, -src_scc, |vcc_lo| -// GFX11: encoding: [0x05,0x02,0x2f,0xd5,0xfd,0xd4,0x00,0x20] - -v_cvt_pkrtz_f16_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp -// GFX11: encoding: [0xff,0x83,0x2f,0xd5,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] - -v_cvt_rpi_i32_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x01,0x01,0x00,0x00] - -v_cvt_rpi_i32_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0xff,0x01,0x00,0x00] - -v_cvt_rpi_i32_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x01,0x00,0x00,0x00] - -v_cvt_rpi_i32_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x69,0x00,0x00,0x00] - -v_cvt_rpi_i32_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x6a,0x00,0x00,0x00] - -v_cvt_rpi_i32_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x6b,0x00,0x00,0x00] - -v_cvt_rpi_i32_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7b,0x00,0x00,0x00] - -v_cvt_rpi_i32_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7d,0x00,0x00,0x00] - -v_cvt_rpi_i32_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7e,0x00,0x00,0x00] - -v_cvt_rpi_i32_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7f,0x00,0x00,0x00] - -v_cvt_rpi_i32_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7c,0x00,0x00,0x00] - -v_cvt_rpi_i32_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0xc1,0x00,0x00,0x00] - -v_cvt_rpi_i32_f32_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0xf0,0x00,0x00,0x00] - -v_cvt_rpi_i32_f32_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0xfd,0x00,0x00,0x00] - -v_cvt_rpi_i32_f32_e64 v255, -|0xaf123456| -// GFX11: encoding: [0xff,0x01,0x8c,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] - -v_cvt_u16_f16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x01,0x01,0x00,0x00] - -v_cvt_u16_f16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0xff,0x01,0x00,0x00] - -v_cvt_u16_f16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x01,0x00,0x00,0x00] - -v_cvt_u16_f16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x69,0x00,0x00,0x00] - -v_cvt_u16_f16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x6a,0x00,0x00,0x00] - -v_cvt_u16_f16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x6b,0x00,0x00,0x00] - -v_cvt_u16_f16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x7b,0x00,0x00,0x00] - -v_cvt_u16_f16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x7d,0x00,0x00,0x00] - -v_cvt_u16_f16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x7e,0x00,0x00,0x00] - -v_cvt_u16_f16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x7f,0x00,0x00,0x00] - -v_cvt_u16_f16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x7c,0x00,0x00,0x00] - -v_cvt_u16_f16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0xc1,0x00,0x00,0x00] - -v_cvt_u16_f16_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0xf0,0x00,0x00,0x00] - -v_cvt_u16_f16_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0xfd,0x00,0x00,0x00] - -v_cvt_u16_f16_e64 v255, -|0xfe0b| clamp -// GFX11: encoding: [0xff,0x81,0xd2,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] - -v_cvt_u32_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x01,0x01,0x00,0x00] - -v_cvt_u32_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0x87,0xd5,0xff,0x01,0x00,0x00] - -v_cvt_u32_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x01,0x00,0x00,0x00] - -v_cvt_u32_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x69,0x00,0x00,0x00] - -v_cvt_u32_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x6a,0x00,0x00,0x00] - -v_cvt_u32_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x6b,0x00,0x00,0x00] - -v_cvt_u32_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x7b,0x00,0x00,0x00] - -v_cvt_u32_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x7d,0x00,0x00,0x00] - -v_cvt_u32_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x7e,0x00,0x00,0x00] - -v_cvt_u32_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x7f,0x00,0x00,0x00] - -v_cvt_u32_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x7c,0x00,0x00,0x00] - -v_cvt_u32_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0x87,0xd5,0xc1,0x00,0x00,0x00] - -v_cvt_u32_f32_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0x87,0xd5,0xf0,0x00,0x00,0x00] - -v_cvt_u32_f32_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0x87,0xd5,0xfd,0x00,0x00,0x00] - -v_cvt_u32_f32_e64 v255, -|0xaf123456| clamp -// GFX11: encoding: [0xff,0x81,0x87,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] - -v_cvt_u32_f64_e64 v5, v[1:2] -// GFX11: encoding: [0x05,0x00,0x95,0xd5,0x01,0x01,0x00,0x00] - -v_cvt_u32_f64_e64 v5, v[254:255] -// GFX11: encoding: [0x05,0x00,0x95,0xd5,0xfe,0x01,0x00,0x00] - -v_cvt_u32_f64_e64 v5, s[2:3] -// GFX11: encoding: [0x05,0x00,0x95,0xd5,0x02,0x00,0x00,0x00] - -v_cvt_u32_f64_e64 v5, s[104:105] -// GFX11: encoding: [0x05,0x00,0x95,0xd5,0x68,0x00,0x00,0x00] - -v_cvt_u32_f64_e64 v5, vcc -// GFX11: encoding: [0x05,0x00,0x95,0xd5,0x6a,0x00,0x00,0x00] - -v_cvt_u32_f64_e64 v5, ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0x95,0xd5,0x7a,0x00,0x00,0x00] - -v_cvt_u32_f64_e64 v5, exec -// GFX11: encoding: [0x05,0x00,0x95,0xd5,0x7e,0x00,0x00,0x00] - -v_cvt_u32_f64_e64 v5, null -// GFX11: encoding: [0x05,0x00,0x95,0xd5,0x7c,0x00,0x00,0x00] - -v_cvt_u32_f64_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0x95,0xd5,0xc1,0x00,0x00,0x00] - -v_cvt_u32_f64_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0x95,0xd5,0xf0,0x00,0x00,0x00] - -v_cvt_u32_f64_e64 v5, -|src_scc| -// GFX11: encoding: [0x05,0x01,0x95,0xd5,0xfd,0x00,0x00,0x20] - -v_cvt_u32_f64_e64 v255, 0xaf123456 clamp -// GFX11: encoding: [0xff,0x80,0x95,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] - -v_cvt_u32_u16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x01,0x01,0x00,0x00] - -v_cvt_u32_u16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0xff,0x01,0x00,0x00] - -v_cvt_u32_u16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x01,0x00,0x00,0x00] - -v_cvt_u32_u16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x69,0x00,0x00,0x00] - -v_cvt_u32_u16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x6a,0x00,0x00,0x00] - -v_cvt_u32_u16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x6b,0x00,0x00,0x00] - -v_cvt_u32_u16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x7b,0x00,0x00,0x00] - -v_cvt_u32_u16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x7d,0x00,0x00,0x00] - -v_cvt_u32_u16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x7e,0x00,0x00,0x00] - -v_cvt_u32_u16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x7f,0x00,0x00,0x00] - -v_cvt_u32_u16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x7c,0x00,0x00,0x00] - -v_cvt_u32_u16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0xc1,0x00,0x00,0x00] - -v_cvt_u32_u16_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00] - -v_cvt_u32_u16_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0xfd,0x00,0x00,0x00] - -v_cvt_u32_u16_e64 v255, 0xfe0b -// GFX11: encoding: [0xff,0x00,0xeb,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] - -v_div_fixup_f16 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00] - -v_div_fixup_f16 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01] - -v_div_fixup_f16 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01] - -v_div_fixup_f16 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01] - -v_div_fixup_f16 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04] - -v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 -// GFX11: encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] - -v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| -// GFX11: encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1] - -v_div_fixup_f16 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01] - -v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi -// GFX11: encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01] - -v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] -// GFX11: encoding: [0x05,0x7d,0x54,0xd6,0x7f,0xf8,0xa8,0xa1] - -v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] -// GFX11: encoding: [0x05,0x04,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] - -v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] -// GFX11: encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3] - -v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] -// GFX11: encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43] - -v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] -// GFX11: encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23] - -v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp -// GFX11: encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] - -v_div_fixup_f32 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00] - -v_div_fixup_f32 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x27,0xd6,0xff,0x05,0xa4,0x01] - -v_div_fixup_f32 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x27,0xd6,0x01,0xfe,0xff,0x01] - -v_div_fixup_f32 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x27,0xd6,0x69,0xd2,0xf8,0x01] - -v_div_fixup_f32 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x27,0xd6,0x6a,0xf6,0x0c,0x04] - -v_div_fixup_f32 v5, vcc_hi, 0xaf123456, v255 -// GFX11: encoding: [0x05,0x00,0x27,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] - -v_div_fixup_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| -// GFX11: encoding: [0x05,0x07,0x27,0xd6,0x7b,0xfa,0xed,0xe1] - -v_div_fixup_f32 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x27,0xd6,0x7d,0xe0,0xf5,0x01] - -v_div_fixup_f32 v5, |exec_lo|, -1, vcc_hi -// GFX11: encoding: [0x05,0x01,0x27,0xd6,0x7e,0x82,0xad,0x01] - -v_div_fixup_f32 v5, -|exec_hi|, null, -|vcc_lo| -// GFX11: encoding: [0x05,0x05,0x27,0xd6,0x7f,0xf8,0xa8,0xa1] - -v_div_fixup_f32 v5, null, exec_lo, -|0xaf123456| -// GFX11: encoding: [0x05,0x04,0x27,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] - -v_div_fixup_f32 v5, -1, -|exec_hi|, -|src_scc| -// GFX11: encoding: [0x05,0x06,0x27,0xd6,0xc1,0xfe,0xf4,0xc3] - -v_div_fixup_f32 v5, 0.5, -m0, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x27,0xd6,0xf0,0xfa,0xc0,0x4b] - -v_div_fixup_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 -// GFX11: encoding: [0x05,0x02,0x27,0xd6,0xfd,0xd4,0x04,0x33] - -v_div_fixup_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 -// GFX11: encoding: [0xff,0x83,0x27,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] - -v_div_fixup_f64 v[5:6], v[1:2], v[2:3], v[3:4] -// GFX11: encoding: [0x05,0x00,0x28,0xd6,0x01,0x05,0x0e,0x04] - -v_div_fixup_f64 v[5:6], v[254:255], v[254:255], s[6:7] -// GFX11: encoding: [0x05,0x00,0x28,0xd6,0xfe,0xfd,0x1b,0x00] - -v_div_fixup_f64 v[5:6], s[2:3], s[4:5], v[254:255] -// GFX11: encoding: [0x05,0x00,0x28,0xd6,0x02,0x08,0xf8,0x07] - -v_div_fixup_f64 v[5:6], -|s[104:105]|, s[104:105], -|s[104:105]| -// GFX11: encoding: [0x05,0x05,0x28,0xd6,0x68,0xd0,0xa0,0xa1] - -v_div_fixup_f64 v[5:6], vcc, -|ttmp[14:15]|, -|ttmp[14:15]| -// GFX11: encoding: [0x05,0x06,0x28,0xd6,0x6a,0xf4,0xe8,0xc1] - -v_div_fixup_f64 v[5:6], -|ttmp[14:15]|, 0xaf123456, null -// GFX11: encoding: [0x05,0x01,0x28,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] - -v_div_fixup_f64 v[5:6], -|exec|, -|src_scc|, -|exec| -// GFX11: encoding: [0x05,0x07,0x28,0xd6,0x7e,0xfa,0xf9,0xe1] - -v_div_fixup_f64 v[5:6], null, 0.5, vcc -// GFX11: encoding: [0x05,0x00,0x28,0xd6,0x7c,0xe0,0xa9,0x01] - -v_div_fixup_f64 v[5:6], -1, -1, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x28,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] - -v_div_fixup_f64 v[5:6], 0.5, null, -|src_scc| mul:2 -// GFX11: encoding: [0x05,0x04,0x28,0xd6,0xf0,0xf8,0xf4,0x8b] - -v_div_fixup_f64 v[5:6], -|src_scc|, -|exec|, 0.5 mul:4 -// GFX11: encoding: [0x05,0x03,0x28,0xd6,0xfd,0xfc,0xc0,0x73] - -v_div_fixup_f64 v[254:255], 0xaf123456, -|vcc|, -1 clamp div:2 -// GFX11: encoding: [0xfe,0x82,0x28,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] - -v_div_fmas_f32 v5, vcc_lo, v2, vcc_lo -// W32: encoding: [0x05,0x00,0x37,0xd6,0x6a,0x04,0xaa,0x01] - -v_div_fmas_f32 v5, ttmp15, ttmp15, ttmp15 -// W32: encoding: [0x05,0x00,0x37,0xd6,0x7b,0xf6,0xec,0x01] - -v_div_fmas_f32 v5, -|m0|, -|v255|, v3 -// W32: encoding: [0x05,0x03,0x37,0xd6,0x7d,0xfe,0x0f,0x64] - -v_div_fmas_f32 v5, -|exec_lo|, -|exec_lo|, -|exec_lo| -// W32: encoding: [0x05,0x07,0x37,0xd6,0x7e,0xfc,0xf8,0xe1] - -v_div_fmas_f32 v5, -|exec_hi|, 0.5, -|v255| -// W32: encoding: [0x05,0x05,0x37,0xd6,0x7f,0xe0,0xfd,0xa7] - -v_div_fmas_f32 v5, null, exec_hi, -|exec_hi| -// W32: encoding: [0x05,0x04,0x37,0xd6,0x7c,0xfe,0xfc,0x81] - -v_div_fmas_f32 v5, -1, -|m0|, -|m0| -// W32: encoding: [0x05,0x06,0x37,0xd6,0xc1,0xfa,0xf4,0xc1] - -v_div_fmas_f32 v5, 0.5, -|vcc_lo|, 0.5 mul:2 -// W32: encoding: [0x05,0x02,0x37,0xd6,0xf0,0xd4,0xc0,0x4b] - -v_div_fmas_f32 v5, vcc_lo, v2, v3 -// W64: encoding: [0x05,0x00,0x37,0xd6,0x6a,0x04,0x0e,0x04] - -v_div_fmas_f32 v5, vcc_hi, v255, vcc_hi -// W64: encoding: [0x05,0x00,0x37,0xd6,0x6b,0xfe,0xaf,0x01] - -v_div_fmas_f32 v5, -|ttmp15|, -|ttmp15|, ttmp15 -// W64: encoding: [0x05,0x03,0x37,0xd6,0x7b,0xf6,0xec,0x61] - -v_div_fmas_f32 v5, m0, 0.5, v255 -// W64: encoding: [0x05,0x00,0x37,0xd6,0x7d,0xe0,0xfd,0x07] - -v_div_fmas_f32 v5, -|exec_lo|, exec_lo, -|exec_lo| -// W64: encoding: [0x05,0x05,0x37,0xd6,0x7e,0xfc,0xf8,0xa1] - -v_div_fmas_f32 v5, -|exec_hi|, -|exec_hi|, -|exec_hi| -// W64: encoding: [0x05,0x07,0x37,0xd6,0x7f,0xfe,0xfc,0xe1] - -v_div_fmas_f32 v5, null, m0, -|m0| -// W64: encoding: [0x05,0x04,0x37,0xd6,0x7c,0xfa,0xf4,0x81] - -v_div_fmas_f32 v5, -1, -|vcc_lo|, -|vcc_lo| -// W64: encoding: [0x05,0x06,0x37,0xd6,0xc1,0xd4,0xa8,0xc1] - -v_div_fmas_f32 v5, 0.5, -|vcc_hi|, 0.5 mul:2 -// W64: encoding: [0x05,0x02,0x37,0xd6,0xf0,0xd6,0xc0,0x4b] - -v_div_fmas_f32 v5, v1, 0xaf123456, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x37,0xd6,0x01,0xff,0xfd,0x03,0x56,0x34,0x12,0xaf] - -v_div_fmas_f32 v5, v255, src_scc, src_scc -// GFX11: encoding: [0x05,0x00,0x37,0xd6,0xff,0xfb,0xf5,0x03] - -v_div_fmas_f32 v5, s105, s105, s105 -// GFX11: encoding: [0x05,0x00,0x37,0xd6,0x69,0xd2,0xa4,0x01] - -v_div_fmas_f32 v5, src_scc, -1, -1 mul:4 -// GFX11: encoding: [0x05,0x00,0x37,0xd6,0xfd,0x82,0x05,0x13] - -v_div_fmas_f32 v255, -|0xaf123456|, null, null clamp div:2 -// GFX11: encoding: [0xff,0x81,0x37,0xd6,0xff,0xf8,0xf0,0x39,0x56,0x34,0x12,0xaf] - -v_div_fmas_f64 v[5:6], v[1:2], 0xaf123456, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x38,0xd6,0x01,0xff,0xfd,0x03,0x56,0x34,0x12,0xaf] - -v_div_fmas_f64 v[5:6], v[254:255], src_scc, v[3:4] -// GFX11: encoding: [0x05,0x00,0x38,0xd6,0xfe,0xfb,0x0d,0x04] - -v_div_fmas_f64 v[5:6], s[104:105], |s[104:105]|, s[104:105] -// GFX11: encoding: [0x05,0x02,0x38,0xd6,0x68,0xd0,0xa0,0x01] - -v_div_fmas_f64 v[5:6], -|vcc|, v[2:3], -|v[254:255]| -// GFX11: encoding: [0x05,0x05,0x38,0xd6,0x6a,0x04,0xfa,0xa7] - -v_div_fmas_f64 v[5:6], -|ttmp[14:15]|, -|ttmp[14:15]|, -|ttmp[14:15]| -// GFX11: encoding: [0x05,0x07,0x38,0xd6,0x7a,0xf4,0xe8,0xe1] - -v_div_fmas_f64 v[5:6], -|exec|, -|v[254:255]|, null -// GFX11: encoding: [0x05,0x03,0x38,0xd6,0x7e,0xfc,0xf3,0x61] - -v_div_fmas_f64 v[5:6], null, 0.5, -src_scc -// GFX11: encoding: [0x05,0x00,0x38,0xd6,0x7c,0xe0,0xf5,0x83] - -v_div_fmas_f64 v[5:6], -1, -exec, |exec| -// GFX11: encoding: [0x05,0x04,0x38,0xd6,0xc1,0xfc,0xf8,0x41] - -v_div_fmas_f64 v[5:6], 0.5, -|vcc|, -|vcc| mul:2 -// GFX11: encoding: [0x05,0x06,0x38,0xd6,0xf0,0xd4,0xa8,0xc9] - -v_div_fmas_f64 v[5:6], -|src_scc|, -1, 0.5 mul:4 -// GFX11: encoding: [0x05,0x01,0x38,0xd6,0xfd,0x82,0xc1,0x33] - -v_div_fmas_f64 v[254:255], 0xaf123456, null, -1 clamp div:2 -// GFX11: encoding: [0xfe,0x80,0x38,0xd6,0xff,0xf8,0x04,0x1b,0x56,0x34,0x12,0xaf] - -v_div_scale_f32 v5, vcc_lo, v1, v2, s3 -// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x01,0x05,0x0e,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc_lo, v255, s2, s105 -// W32: encoding: [0x05,0x6a,0xfc,0xd6,0xff,0x05,0xa4,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc_lo, s1, v255, exec_hi -// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x01,0xfe,0xff,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc_lo, s105, s105, exec_lo -// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x69,0xd2,0xf8,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc_lo, vcc_lo, ttmp15, v3 -// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x6a,0xf6,0x0c,0x04] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc_lo, vcc_hi, 0xaf123456, v255 -// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc_lo, -ttmp15, -src_scc, -ttmp15 -// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7b,0xfa,0xed,0xe1] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc_lo, m0, 0.5, m0 -// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7d,0xe0,0xf5,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc_lo, exec_lo, -1, vcc_hi -// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7e,0x82,0xad,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc_lo, -exec_hi, null, -vcc_lo -// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7f,0xf8,0xa8,0xa1] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc_lo, null, exec_lo, neg(0xaf123456) -// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc_lo, -1, -exec_hi, -src_scc -// W32: encoding: [0x05,0x6a,0xfc,0xd6,0xc1,0xfe,0xf4,0xc3] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc_lo, 0.5, -m0, 0.5 mul:2 -// W32: encoding: [0x05,0x6a,0xfc,0xd6,0xf0,0xfa,0xc0,0x4b] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc_lo, -src_scc, vcc_lo, -1 mul:4 -// W32: encoding: [0x05,0x6a,0xfc,0xd6,0xfd,0xd4,0x04,0x33] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v255, vcc_lo, neg(0xaf123456), -vcc_hi, null clamp div:2 -// W32: encoding: [0xff,0xea,0xfc,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc, v1, v2, s3 -// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x01,0x05,0x0e,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc, v255, s2, s105 -// W64: encoding: [0x05,0x6a,0xfc,0xd6,0xff,0x05,0xa4,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc, s1, v255, exec_hi -// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x01,0xfe,0xff,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc, s105, s105, exec_lo -// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x69,0xd2,0xf8,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc, vcc_lo, ttmp15, v3 -// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x6a,0xf6,0x0c,0x04] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc, vcc_hi, 0xaf123456, v255 -// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc, -ttmp15, -src_scc, -ttmp15 -// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7b,0xfa,0xed,0xe1] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc, m0, 0.5, m0 -// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7d,0xe0,0xf5,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc, exec_lo, -1, vcc_hi -// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7e,0x82,0xad,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc, -exec_hi, null, -vcc_lo -// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7f,0xf8,0xa8,0xa1] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc, null, exec_lo, neg(0xaf123456) -// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc, -1, -exec_hi, -src_scc -// W64: encoding: [0x05,0x6a,0xfc,0xd6,0xc1,0xfe,0xf4,0xc3] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc, 0.5, -m0, 0.5 mul:2 -// W64: encoding: [0x05,0x6a,0xfc,0xd6,0xf0,0xfa,0xc0,0x4b] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v5, vcc, -src_scc, vcc_lo, -1 mul:4 -// W64: encoding: [0x05,0x6a,0xfc,0xd6,0xfd,0xd4,0x04,0x33] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f32 v255, vcc, neg(0xaf123456), -vcc_hi, null clamp div:2 -// W64: encoding: [0xff,0xea,0xfc,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc_lo, v[1:2], v[2:3], v[3:4] -// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x01,0x05,0x0e,0x04] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc_lo, v[254:255], v[254:255], s[6:7] -// W32: encoding: [0x05,0x6a,0xfd,0xd6,0xfe,0xfd,0x1b,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc_lo, s[2:3], s[4:5], v[254:255] -// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x02,0x08,0xf8,0x07] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc_lo, -s[104:105], s[104:105], -s[104:105] -// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x68,0xd0,0xa0,0xa1] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc_lo, vcc, -ttmp[14:15], -ttmp[14:15] -// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x6a,0xf4,0xe8,0xc1] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc_lo, -ttmp[14:15], 0xaf123456, null -// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc_lo, -exec, -src_scc, -exec -// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x7e,0xfa,0xf9,0xe1] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc_lo, null, 0.5, vcc -// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x7c,0xe0,0xa9,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc_lo, -1, -1, 0xaf123456 -// W32: encoding: [0x05,0x6a,0xfd,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc_lo, 0.5, null, -src_scc mul:2 -// W32: encoding: [0x05,0x6a,0xfd,0xd6,0xf0,0xf8,0xf4,0x8b] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc_lo, -src_scc, -exec, 0.5 mul:4 -// W32: encoding: [0x05,0x6a,0xfd,0xd6,0xfd,0xfc,0xc0,0x73] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[254:255], vcc_lo, 0xaf123456, -vcc, -1 clamp div:2 -// W32: encoding: [0xfe,0xea,0xfd,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc, v[1:2], v[2:3], v[3:4] -// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x01,0x05,0x0e,0x04] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc, v[254:255], v[254:255], s[6:7] -// W64: encoding: [0x05,0x6a,0xfd,0xd6,0xfe,0xfd,0x1b,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc, s[2:3], s[4:5], v[254:255] -// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x02,0x08,0xf8,0x07] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc, -s[104:105], s[104:105], -s[104:105] -// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x68,0xd0,0xa0,0xa1] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc, vcc, -ttmp[14:15], -ttmp[14:15] -// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x6a,0xf4,0xe8,0xc1] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc, -ttmp[14:15], 0xaf123456, null -// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc, -exec, -src_scc, -exec -// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x7e,0xfa,0xf9,0xe1] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc, null, 0.5, vcc -// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x7c,0xe0,0xa9,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc, -1, -1, 0xaf123456 -// W64: encoding: [0x05,0x6a,0xfd,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc, 0.5, null, -src_scc mul:2 -// W64: encoding: [0x05,0x6a,0xfd,0xd6,0xf0,0xf8,0xf4,0x8b] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[5:6], vcc, -src_scc, -exec, 0.5 mul:4 -// W64: encoding: [0x05,0x6a,0xfd,0xd6,0xfd,0xfc,0xc0,0x73] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_div_scale_f64 v[254:255], vcc, 0xaf123456, -vcc, -1 clamp div:2 -// W64: encoding: [0xfe,0xea,0xfd,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_dot2_bf16_bf16 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0x0e,0x00] - -v_dot2_bf16_bf16 v5, v255, v255, s105 -// GFX11: encoding: [0x05,0x00,0x67,0xd6,0xff,0xff,0xa7,0x01] - -v_dot2_bf16_bf16 v5, s1, s2, v3 -// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x01,0x04,0x0c,0x04] - -v_dot2_bf16_bf16 v5, s105, s105, m0 -// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x69,0xd2,0xf4,0x01] - -v_dot2_bf16_bf16 v5, vcc_lo, ttmp15, v255 -// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x6a,0xf6,0xfc,0x07] - -v_dot2_bf16_bf16 v5, vcc_hi, 0xfe0b, vcc_hi -// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] - -v_dot2_bf16_bf16 v5, ttmp15, src_scc, ttmp15 -// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x7b,0xfa,0xed,0x01] - -v_dot2_bf16_bf16 v5, |m0|, -1, -vcc_lo -// GFX11: encoding: [0x05,0x01,0x67,0xd6,0x7d,0x82,0xa9,0x81] - -v_dot2_bf16_bf16 v5, -|exec_lo|, null, -|0xfe0b| -// GFX11: encoding: [0x05,0x05,0x67,0xd6,0x7e,0xf8,0xfc,0xa3,0x0b,0xfe,0x00,0x00] - -v_dot2_bf16_bf16 v5, -|exec_hi|, -|exec_lo|, -|exec_lo| -// GFX11: encoding: [0x05,0x07,0x67,0xd6,0x7f,0xfc,0xf8,0xe1] - -v_dot2_bf16_bf16 v5, null, -exec_hi, |src_scc| -// GFX11: encoding: [0x05,0x04,0x67,0xd6,0x7c,0xfe,0xf4,0x43] - -v_dot2_bf16_bf16 v5, -1, -|m0|, -|exec_hi| op_sel:[0,0,0,0] -// GFX11: encoding: [0x05,0x06,0x67,0xd6,0xc1,0xfa,0xfc,0xc1] - -v_dot2_bf16_bf16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] -// GFX11: encoding: [0x05,0x22,0x67,0xd6,0xfd,0xd4,0x04,0x23] - -v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] -// GFX11: encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] - -v_dot2_f16_f16 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00] - -v_dot2_f16_f16 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x66,0xd6,0xff,0x05,0xa4,0x01] - -v_dot2_f16_f16 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x01,0xfe,0xff,0x01] - -v_dot2_f16_f16 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x69,0xd2,0xf8,0x01] - -v_dot2_f16_f16 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x6a,0xf6,0x0c,0x04] - -v_dot2_f16_f16 v5, vcc_hi, 0xfe0b, v255 -// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] - -v_dot2_f16_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| -// GFX11: encoding: [0x05,0x07,0x66,0xd6,0x7b,0xfa,0xed,0xe1] - -v_dot2_f16_f16 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x7d,0xe0,0xf5,0x01] - -v_dot2_f16_f16 v5, |exec_lo|, -1, vcc_hi -// GFX11: encoding: [0x05,0x01,0x66,0xd6,0x7e,0x82,0xad,0x01] - -v_dot2_f16_f16 v5, -|exec_hi|, null, -|vcc_lo| -// GFX11: encoding: [0x05,0x05,0x66,0xd6,0x7f,0xf8,0xa8,0xa1] - -v_dot2_f16_f16 v5, null, exec_lo, -|0xfe0b| -// GFX11: encoding: [0x05,0x04,0x66,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] - -v_dot2_f16_f16 v5, -1, -|exec_hi|, -|src_scc| -// GFX11: encoding: [0x05,0x06,0x66,0xd6,0xc1,0xfe,0xf4,0xc3] - -v_dot2_f16_f16 v5, 0.5, -m0, 0.5 op_sel:[0,0,0,0] -// GFX11: encoding: [0x05,0x00,0x66,0xd6,0xf0,0xfa,0xc0,0x43] - -v_dot2_f16_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] -// GFX11: encoding: [0x05,0x22,0x66,0xd6,0xfd,0xd4,0x04,0x23] - -v_dot2_f16_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] -// GFX11: encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] - -v_exp_f16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x01,0x01,0x00,0x00] - -v_exp_f16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0xff,0x01,0x00,0x00] - -v_exp_f16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x01,0x00,0x00,0x00] - -v_exp_f16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x69,0x00,0x00,0x00] - -v_exp_f16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x6a,0x00,0x00,0x00] - -v_exp_f16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x6b,0x00,0x00,0x00] - -v_exp_f16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x7b,0x00,0x00,0x00] - -v_exp_f16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x7d,0x00,0x00,0x00] - -v_exp_f16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x7e,0x00,0x00,0x00] - -v_exp_f16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x7f,0x00,0x00,0x00] - -v_exp_f16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x7c,0x00,0x00,0x00] - -v_exp_f16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0xc1,0x00,0x00,0x00] - -v_exp_f16_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0xf0,0x00,0x00,0x08] - -v_exp_f16_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0xfd,0x00,0x00,0x10] - -v_exp_f16_e64 v255, -|0xfe0b| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xd8,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] - -v_exp_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x01,0x01,0x00,0x00] - -v_exp_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0xff,0x01,0x00,0x00] - -v_exp_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x01,0x00,0x00,0x00] - -v_exp_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x69,0x00,0x00,0x00] - -v_exp_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x6a,0x00,0x00,0x00] - -v_exp_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x6b,0x00,0x00,0x00] - -v_exp_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x7b,0x00,0x00,0x00] - -v_exp_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x7d,0x00,0x00,0x00] - -v_exp_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x7e,0x00,0x00,0x00] - -v_exp_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x7f,0x00,0x00,0x00] - -v_exp_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x7c,0x00,0x00,0x00] - -v_exp_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0xc1,0x00,0x00,0x00] - -v_exp_f32_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0xf0,0x00,0x00,0x08] - -v_exp_f32_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0xfd,0x00,0x00,0x10] - -v_exp_f32_e64 v255, -|0xaf123456| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xa5,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] - -v_ffbh_i32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x01,0x01,0x00,0x00] - -v_ffbh_i32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0xff,0x01,0x00,0x00] - -v_ffbh_i32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x01,0x00,0x00,0x00] - -v_ffbh_i32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x69,0x00,0x00,0x00] - -v_ffbh_i32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x6a,0x00,0x00,0x00] - -v_ffbh_i32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x6b,0x00,0x00,0x00] - -v_ffbh_i32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7b,0x00,0x00,0x00] - -v_ffbh_i32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7d,0x00,0x00,0x00] - -v_ffbh_i32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7e,0x00,0x00,0x00] - -v_ffbh_i32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7f,0x00,0x00,0x00] - -v_ffbh_i32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7c,0x00,0x00,0x00] - -v_ffbh_i32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0xc1,0x00,0x00,0x00] - -v_ffbh_i32_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0xf0,0x00,0x00,0x00] - -v_ffbh_i32_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0xfd,0x00,0x00,0x00] - -v_ffbh_i32_e64 v255, 0xaf123456 -// GFX11: encoding: [0xff,0x00,0xbb,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] - -v_ffbh_u32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x01,0x01,0x00,0x00] - -v_ffbh_u32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0xff,0x01,0x00,0x00] - -v_ffbh_u32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x01,0x00,0x00,0x00] - -v_ffbh_u32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x69,0x00,0x00,0x00] - -v_ffbh_u32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x6a,0x00,0x00,0x00] - -v_ffbh_u32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x6b,0x00,0x00,0x00] - -v_ffbh_u32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7b,0x00,0x00,0x00] - -v_ffbh_u32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7d,0x00,0x00,0x00] - -v_ffbh_u32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7e,0x00,0x00,0x00] - -v_ffbh_u32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7f,0x00,0x00,0x00] - -v_ffbh_u32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7c,0x00,0x00,0x00] - -v_ffbh_u32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0xc1,0x00,0x00,0x00] - -v_ffbh_u32_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0xf0,0x00,0x00,0x00] - -v_ffbh_u32_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0xfd,0x00,0x00,0x00] - -v_ffbh_u32_e64 v255, 0xaf123456 -// GFX11: encoding: [0xff,0x00,0xb9,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] - -v_ffbl_b32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x01,0x01,0x00,0x00] - -v_ffbl_b32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0xff,0x01,0x00,0x00] - -v_ffbl_b32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x01,0x00,0x00,0x00] - -v_ffbl_b32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x69,0x00,0x00,0x00] - -v_ffbl_b32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x6a,0x00,0x00,0x00] - -v_ffbl_b32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x6b,0x00,0x00,0x00] - -v_ffbl_b32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7b,0x00,0x00,0x00] - -v_ffbl_b32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7d,0x00,0x00,0x00] - -v_ffbl_b32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7e,0x00,0x00,0x00] - -v_ffbl_b32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7f,0x00,0x00,0x00] - -v_ffbl_b32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7c,0x00,0x00,0x00] - -v_ffbl_b32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0xc1,0x00,0x00,0x00] - -v_ffbl_b32_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0xf0,0x00,0x00,0x00] - -v_ffbl_b32_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0xba,0xd5,0xfd,0x00,0x00,0x00] - -v_ffbl_b32_e64 v255, 0xaf123456 -// GFX11: encoding: [0xff,0x00,0xba,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] - -v_floor_f16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x01,0x01,0x00,0x00] - -v_floor_f16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0xff,0x01,0x00,0x00] - -v_floor_f16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x01,0x00,0x00,0x00] - -v_floor_f16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x69,0x00,0x00,0x00] - -v_floor_f16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x6a,0x00,0x00,0x00] - -v_floor_f16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x6b,0x00,0x00,0x00] - -v_floor_f16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x7b,0x00,0x00,0x00] - -v_floor_f16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x7d,0x00,0x00,0x00] - -v_floor_f16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x7e,0x00,0x00,0x00] - -v_floor_f16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x7f,0x00,0x00,0x00] - -v_floor_f16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x7c,0x00,0x00,0x00] - -v_floor_f16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0xc1,0x00,0x00,0x00] - -v_floor_f16_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0xf0,0x00,0x00,0x08] - -v_floor_f16_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0xfd,0x00,0x00,0x10] - -v_floor_f16_e64 v255, -|0xfe0b| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xdb,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] - -v_floor_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x01,0x01,0x00,0x00] - -v_floor_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0xff,0x01,0x00,0x00] - -v_floor_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x01,0x00,0x00,0x00] - -v_floor_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x69,0x00,0x00,0x00] - -v_floor_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x6a,0x00,0x00,0x00] - -v_floor_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x6b,0x00,0x00,0x00] - -v_floor_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x7b,0x00,0x00,0x00] - -v_floor_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x7d,0x00,0x00,0x00] - -v_floor_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x7e,0x00,0x00,0x00] - -v_floor_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x7f,0x00,0x00,0x00] - -v_floor_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x7c,0x00,0x00,0x00] - -v_floor_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0xc1,0x00,0x00,0x00] - -v_floor_f32_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0xf0,0x00,0x00,0x08] - -v_floor_f32_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0xfd,0x00,0x00,0x10] - -v_floor_f32_e64 v255, -|0xaf123456| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xa4,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] - -v_floor_f64_e64 v[5:6], v[1:2] -// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0x01,0x01,0x00,0x00] - -v_floor_f64_e64 v[5:6], v[254:255] -// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0xfe,0x01,0x00,0x00] - -v_floor_f64_e64 v[5:6], s[2:3] -// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0x02,0x00,0x00,0x00] - -v_floor_f64_e64 v[5:6], s[104:105] -// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0x68,0x00,0x00,0x00] - -v_floor_f64_e64 v[5:6], vcc -// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0x6a,0x00,0x00,0x00] - -v_floor_f64_e64 v[5:6], ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0x7a,0x00,0x00,0x00] - -v_floor_f64_e64 v[5:6], exec -// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0x7e,0x00,0x00,0x00] - -v_floor_f64_e64 v[5:6], null -// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0x7c,0x00,0x00,0x00] - -v_floor_f64_e64 v[5:6], -1 -// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0xc1,0x00,0x00,0x00] - -v_floor_f64_e64 v[5:6], 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0xf0,0x00,0x00,0x08] - -v_floor_f64_e64 v[5:6], -|src_scc| mul:4 -// GFX11: encoding: [0x05,0x01,0x9a,0xd5,0xfd,0x00,0x00,0x30] - -v_floor_f64_e64 v[254:255], 0xaf123456 clamp div:2 -// GFX11: encoding: [0xfe,0x80,0x9a,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] - -v_fma_dx9_zero_f32 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00] - -v_fma_dx9_zero_f32 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x09,0xd6,0xff,0x05,0xa4,0x01] - -v_fma_dx9_zero_f32 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x01,0xfe,0xff,0x01] - -v_fma_dx9_zero_f32 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x69,0xd2,0xf8,0x01] - -v_fma_dx9_zero_f32 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x6a,0xf6,0x0c,0x04] - -v_fma_dx9_zero_f32 v5, vcc_hi, 0xaf123456, v255 -// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] - -v_fma_dx9_zero_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| -// GFX11: encoding: [0x05,0x07,0x09,0xd6,0x7b,0xfa,0xed,0xe1] - -v_fma_dx9_zero_f32 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x7d,0xe0,0xf5,0x01] - -v_fma_dx9_zero_f32 v5, |exec_lo|, -1, vcc_hi -// GFX11: encoding: [0x05,0x01,0x09,0xd6,0x7e,0x82,0xad,0x01] - -v_fma_dx9_zero_f32 v5, -|exec_hi|, null, -|vcc_lo| -// GFX11: encoding: [0x05,0x05,0x09,0xd6,0x7f,0xf8,0xa8,0xa1] - -v_fma_dx9_zero_f32 v5, null, exec_lo, -|0xaf123456| -// GFX11: encoding: [0x05,0x04,0x09,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] - -v_fma_dx9_zero_f32 v5, -1, -|exec_hi|, -|src_scc| -// GFX11: encoding: [0x05,0x06,0x09,0xd6,0xc1,0xfe,0xf4,0xc3] - -v_fma_dx9_zero_f32 v5, 0.5, -m0, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x09,0xd6,0xf0,0xfa,0xc0,0x4b] - -v_fma_dx9_zero_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 -// GFX11: encoding: [0x05,0x02,0x09,0xd6,0xfd,0xd4,0x04,0x33] - -v_fma_dx9_zero_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 -// GFX11: encoding: [0xff,0x83,0x09,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] - -v_fma_f16 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x48,0xd6,0x01,0x05,0x0e,0x00] - -v_fma_f16 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x48,0xd6,0xff,0x05,0xa4,0x01] - -v_fma_f16 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x48,0xd6,0x01,0xfe,0xff,0x01] - -v_fma_f16 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x48,0xd6,0x69,0xd2,0xf8,0x01] - -v_fma_f16 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x48,0xd6,0x6a,0xf6,0x0c,0x04] - -v_fma_f16 v5, vcc_hi, 0xfe0b, v255 -// GFX11: encoding: [0x05,0x00,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] - -v_fma_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| -// GFX11: encoding: [0x05,0x07,0x48,0xd6,0x7b,0xfa,0xed,0xe1] - -v_fma_f16 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x48,0xd6,0x7d,0xe0,0xf5,0x01] - -v_fma_f16 v5, |exec_lo|, -1, vcc_hi -// GFX11: encoding: [0x05,0x01,0x48,0xd6,0x7e,0x82,0xad,0x01] - -v_fma_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] -// GFX11: encoding: [0x05,0x7d,0x48,0xd6,0x7f,0xf8,0xa8,0xa1] - -v_fma_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] -// GFX11: encoding: [0x05,0x04,0x48,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] - -v_fma_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] -// GFX11: encoding: [0x05,0x0e,0x48,0xd6,0xc1,0xfe,0xf4,0xc3] - -v_fma_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] -// GFX11: encoding: [0x05,0x10,0x48,0xd6,0xf0,0xfa,0xc0,0x43] - -v_fma_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] -// GFX11: encoding: [0x05,0x22,0x48,0xd6,0xfd,0xd4,0x04,0x23] - -v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp -// GFX11: encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] - -v_fma_f32 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x00] - -v_fma_f32 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x13,0xd6,0xff,0x05,0xa4,0x01] - -v_fma_f32 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x13,0xd6,0x01,0xfe,0xff,0x01] - -v_fma_f32 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x13,0xd6,0x69,0xd2,0xf8,0x01] - -v_fma_f32 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x13,0xd6,0x6a,0xf6,0x0c,0x04] - -v_fma_f32 v5, vcc_hi, 0xaf123456, v255 -// GFX11: encoding: [0x05,0x00,0x13,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] - -v_fma_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| -// GFX11: encoding: [0x05,0x07,0x13,0xd6,0x7b,0xfa,0xed,0xe1] - -v_fma_f32 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x13,0xd6,0x7d,0xe0,0xf5,0x01] - -v_fma_f32 v5, |exec_lo|, -1, vcc_hi -// GFX11: encoding: [0x05,0x01,0x13,0xd6,0x7e,0x82,0xad,0x01] - -v_fma_f32 v5, -|exec_hi|, null, -|vcc_lo| -// GFX11: encoding: [0x05,0x05,0x13,0xd6,0x7f,0xf8,0xa8,0xa1] - -v_fma_f32 v5, null, exec_lo, -|0xaf123456| -// GFX11: encoding: [0x05,0x04,0x13,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] - -v_fma_f32 v5, -1, -|exec_hi|, -|src_scc| -// GFX11: encoding: [0x05,0x06,0x13,0xd6,0xc1,0xfe,0xf4,0xc3] - -v_fma_f32 v5, 0.5, -m0, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x13,0xd6,0xf0,0xfa,0xc0,0x4b] - -v_fma_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 -// GFX11: encoding: [0x05,0x02,0x13,0xd6,0xfd,0xd4,0x04,0x33] - -v_fma_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 -// GFX11: encoding: [0xff,0x83,0x13,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] - -v_fma_f64 v[5:6], v[1:2], v[2:3], v[3:4] -// GFX11: encoding: [0x05,0x00,0x14,0xd6,0x01,0x05,0x0e,0x04] - -v_fma_f64 v[5:6], v[254:255], v[254:255], s[6:7] -// GFX11: encoding: [0x05,0x00,0x14,0xd6,0xfe,0xfd,0x1b,0x00] - -v_fma_f64 v[5:6], s[2:3], s[4:5], v[254:255] -// GFX11: encoding: [0x05,0x00,0x14,0xd6,0x02,0x08,0xf8,0x07] - -v_fma_f64 v[5:6], -|s[104:105]|, s[104:105], -|s[104:105]| -// GFX11: encoding: [0x05,0x05,0x14,0xd6,0x68,0xd0,0xa0,0xa1] - -v_fma_f64 v[5:6], vcc, -|ttmp[14:15]|, -|ttmp[14:15]| -// GFX11: encoding: [0x05,0x06,0x14,0xd6,0x6a,0xf4,0xe8,0xc1] - -v_fma_f64 v[5:6], -|ttmp[14:15]|, 0xaf123456, null -// GFX11: encoding: [0x05,0x01,0x14,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] - -v_fma_f64 v[5:6], -|exec|, -|src_scc|, -|exec| -// GFX11: encoding: [0x05,0x07,0x14,0xd6,0x7e,0xfa,0xf9,0xe1] - -v_fma_f64 v[5:6], null, 0.5, vcc -// GFX11: encoding: [0x05,0x00,0x14,0xd6,0x7c,0xe0,0xa9,0x01] - -v_fma_f64 v[5:6], -1, -1, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x14,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] - -v_fma_f64 v[5:6], 0.5, null, -|src_scc| mul:2 -// GFX11: encoding: [0x05,0x04,0x14,0xd6,0xf0,0xf8,0xf4,0x8b] - -v_fma_f64 v[5:6], -|src_scc|, -|exec|, 0.5 mul:4 -// GFX11: encoding: [0x05,0x03,0x14,0xd6,0xfd,0xfc,0xc0,0x73] - -v_fma_f64 v[254:255], 0xaf123456, -|vcc|, -1 clamp div:2 -// GFX11: encoding: [0xfe,0x82,0x14,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] - -v_fma_legacy_f32 v5, v1, v2, s3 -// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00] - -v_fma_legacy_f32 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x09,0xd6,0xff,0x05,0xa4,0x01] - -v_fma_legacy_f32 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x01,0xfe,0xff,0x01] - -v_fma_legacy_f32 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x69,0xd2,0xf8,0x01] - -v_fma_legacy_f32 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x6a,0xf6,0x0c,0x04] - -v_fma_legacy_f32 v5, vcc_hi, 0xaf123456, v255 -// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] - -v_fma_legacy_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| -// GFX11: encoding: [0x05,0x07,0x09,0xd6,0x7b,0xfa,0xed,0xe1] - -v_fma_legacy_f32 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x7d,0xe0,0xf5,0x01] - -v_fma_legacy_f32 v5, |exec_lo|, -1, vcc_hi -// GFX11: encoding: [0x05,0x01,0x09,0xd6,0x7e,0x82,0xad,0x01] - -v_fma_legacy_f32 v5, -|exec_hi|, null, -|vcc_lo| -// GFX11: encoding: [0x05,0x05,0x09,0xd6,0x7f,0xf8,0xa8,0xa1] - -v_fma_legacy_f32 v5, null, exec_lo, -|0xaf123456| -// GFX11: encoding: [0x05,0x04,0x09,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] - -v_fma_legacy_f32 v5, -1, -|exec_hi|, -|src_scc| -// GFX11: encoding: [0x05,0x06,0x09,0xd6,0xc1,0xfe,0xf4,0xc3] - -v_fma_legacy_f32 v5, 0.5, -m0, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x09,0xd6,0xf0,0xfa,0xc0,0x4b] - -v_fma_legacy_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 -// GFX11: encoding: [0x05,0x02,0x09,0xd6,0xfd,0xd4,0x04,0x33] - -v_fma_legacy_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 -// GFX11: encoding: [0xff,0x83,0x09,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] - -v_fmac_dx9_zero_f32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x01,0x05,0x02,0x00] - -v_fmac_dx9_zero_f32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0xff,0xff,0x03,0x00] - -v_fmac_dx9_zero_f32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x01,0x04,0x00,0x00] - -v_fmac_dx9_zero_f32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x69,0xd2,0x00,0x00] - -v_fmac_dx9_zero_f32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x6a,0xf6,0x00,0x00] - -v_fmac_dx9_zero_f32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_fmac_dx9_zero_f32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x7b,0xfa,0x01,0x00] - -v_fmac_dx9_zero_f32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x7d,0xe0,0x01,0x00] +v_div_fixup_f64 v[5:6], -|exec|, -|src_scc|, -|exec| +// GFX11: encoding: [0x05,0x07,0x28,0xd6,0x7e,0xfa,0xf9,0xe1] -v_fmac_dx9_zero_f32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x7e,0x82,0x01,0x00] +v_div_fixup_f64 v[5:6], null, 0.5, vcc +// GFX11: encoding: [0x05,0x00,0x28,0xd6,0x7c,0xe0,0xa9,0x01] -v_fmac_dx9_zero_f32_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x06,0xd5,0x7f,0xf8,0x00,0x00] +v_div_fixup_f64 v[5:6], -1, -1, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x28,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] -v_fmac_dx9_zero_f32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x7c,0xfc,0x00,0x00] +v_div_fixup_f64 v[5:6], 0.5, null, -|src_scc| mul:2 +// GFX11: encoding: [0x05,0x04,0x28,0xd6,0xf0,0xf8,0xf4,0x8b] -v_fmac_dx9_zero_f32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0xc1,0xfe,0x00,0x00] +v_div_fixup_f64 v[5:6], -|src_scc|, -|exec|, 0.5 mul:4 +// GFX11: encoding: [0x05,0x03,0x28,0xd6,0xfd,0xfc,0xc0,0x73] -v_fmac_dx9_zero_f32_e64 v5, 0.5, -m0 mul:2 -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0xf0,0xfa,0x00,0x48] +v_div_fixup_f64 v[254:255], 0xaf123456, -|vcc|, -1 clamp div:2 +// GFX11: encoding: [0xfe,0x82,0x28,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] -v_fmac_dx9_zero_f32_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX11: encoding: [0x05,0x02,0x06,0xd5,0xfd,0xd4,0x00,0x30] +v_div_fmas_f32 v5, vcc_lo, v2, vcc_lo +// W32: encoding: [0x05,0x00,0x37,0xd6,0x6a,0x04,0xaa,0x01] -v_fmac_dx9_zero_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 -// GFX11: encoding: [0xff,0x83,0x06,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] +v_div_fmas_f32 v5, ttmp15, ttmp15, ttmp15 +// W32: encoding: [0x05,0x00,0x37,0xd6,0x7b,0xf6,0xec,0x01] -v_fmac_f16_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x36,0xd5,0x01,0x05,0x02,0x00] +v_div_fmas_f32 v5, -|m0|, -|v255|, v3 +// W32: encoding: [0x05,0x03,0x37,0xd6,0x7d,0xfe,0x0f,0x64] -v_fmac_f16_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x36,0xd5,0xff,0xff,0x03,0x00] +v_div_fmas_f32 v5, -|exec_lo|, -|exec_lo|, -|exec_lo| +// W32: encoding: [0x05,0x07,0x37,0xd6,0x7e,0xfc,0xf8,0xe1] -v_fmac_f16_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x36,0xd5,0x01,0x04,0x00,0x00] +v_div_fmas_f32 v5, -|exec_hi|, 0.5, -|v255| +// W32: encoding: [0x05,0x05,0x37,0xd6,0x7f,0xe0,0xfd,0xa7] -v_fmac_f16_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x36,0xd5,0x69,0xd2,0x00,0x00] +v_div_fmas_f32 v5, null, exec_hi, -|exec_hi| +// W32: encoding: [0x05,0x04,0x37,0xd6,0x7c,0xfe,0xfc,0x81] -v_fmac_f16_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x36,0xd5,0x6a,0xf6,0x00,0x00] +v_div_fmas_f32 v5, -1, -|m0|, -|m0| +// W32: encoding: [0x05,0x06,0x37,0xd6,0xc1,0xfa,0xf4,0xc1] -v_fmac_f16_e64 v5, vcc_hi, 0xfe0b -// GFX11: encoding: [0x05,0x00,0x36,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +v_div_fmas_f32 v5, 0.5, -|vcc_lo|, 0.5 mul:2 +// W32: encoding: [0x05,0x02,0x37,0xd6,0xf0,0xd4,0xc0,0x4b] -v_fmac_f16_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x36,0xd5,0x7b,0xfa,0x01,0x00] +v_div_fmas_f32 v5, vcc_lo, v2, v3 +// W64: encoding: [0x05,0x00,0x37,0xd6,0x6a,0x04,0x0e,0x04] -v_fmac_f16_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x36,0xd5,0x7d,0xe0,0x01,0x00] +v_div_fmas_f32 v5, vcc_hi, v255, vcc_hi +// W64: encoding: [0x05,0x00,0x37,0xd6,0x6b,0xfe,0xaf,0x01] -v_fmac_f16_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x36,0xd5,0x7e,0x82,0x01,0x00] +v_div_fmas_f32 v5, -|ttmp15|, -|ttmp15|, ttmp15 +// W64: encoding: [0x05,0x03,0x37,0xd6,0x7b,0xf6,0xec,0x61] -v_fmac_f16_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x36,0xd5,0x7f,0xf8,0x00,0x00] +v_div_fmas_f32 v5, m0, 0.5, v255 +// W64: encoding: [0x05,0x00,0x37,0xd6,0x7d,0xe0,0xfd,0x07] -v_fmac_f16_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x36,0xd5,0x7c,0xfc,0x00,0x00] +v_div_fmas_f32 v5, -|exec_lo|, exec_lo, -|exec_lo| +// W64: encoding: [0x05,0x05,0x37,0xd6,0x7e,0xfc,0xf8,0xa1] -v_fmac_f16_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x36,0xd5,0xc1,0xfe,0x00,0x00] +v_div_fmas_f32 v5, -|exec_hi|, -|exec_hi|, -|exec_hi| +// W64: encoding: [0x05,0x07,0x37,0xd6,0x7f,0xfe,0xfc,0xe1] -v_fmac_f16_e64 v5, 0.5, -m0 mul:2 -// GFX11: encoding: [0x05,0x00,0x36,0xd5,0xf0,0xfa,0x00,0x48] +v_div_fmas_f32 v5, null, m0, -|m0| +// W64: encoding: [0x05,0x04,0x37,0xd6,0x7c,0xfa,0xf4,0x81] -v_fmac_f16_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX11: encoding: [0x05,0x02,0x36,0xd5,0xfd,0xd4,0x00,0x30] +v_div_fmas_f32 v5, -1, -|vcc_lo|, -|vcc_lo| +// W64: encoding: [0x05,0x06,0x37,0xd6,0xc1,0xd4,0xa8,0xc1] -v_fmac_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 -// GFX11: encoding: [0xff,0x83,0x36,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] +v_div_fmas_f32 v5, 0.5, -|vcc_hi|, 0.5 mul:2 +// W64: encoding: [0x05,0x02,0x37,0xd6,0xf0,0xd6,0xc0,0x4b] -v_fmac_f32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0x01,0x05,0x02,0x00] +v_div_fmas_f32 v5, v1, 0xaf123456, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x37,0xd6,0x01,0xff,0xfd,0x03,0x56,0x34,0x12,0xaf] -v_fmac_f32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0xff,0xff,0x03,0x00] +v_div_fmas_f32 v5, v255, src_scc, src_scc +// GFX11: encoding: [0x05,0x00,0x37,0xd6,0xff,0xfb,0xf5,0x03] -v_fmac_f32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0x01,0x04,0x00,0x00] +v_div_fmas_f32 v5, s105, s105, s105 +// GFX11: encoding: [0x05,0x00,0x37,0xd6,0x69,0xd2,0xa4,0x01] -v_fmac_f32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0x69,0xd2,0x00,0x00] +v_div_fmas_f32 v5, src_scc, -1, -1 mul:4 +// GFX11: encoding: [0x05,0x00,0x37,0xd6,0xfd,0x82,0x05,0x13] -v_fmac_f32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0x6a,0xf6,0x00,0x00] +v_div_fmas_f32 v255, -|0xaf123456|, null, null clamp div:2 +// GFX11: encoding: [0xff,0x81,0x37,0xd6,0xff,0xf8,0xf0,0x39,0x56,0x34,0x12,0xaf] -v_fmac_f32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +v_div_fmas_f64 v[5:6], v[1:2], 0xaf123456, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x38,0xd6,0x01,0xff,0xfd,0x03,0x56,0x34,0x12,0xaf] -v_fmac_f32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0x7b,0xfa,0x01,0x00] +v_div_fmas_f64 v[5:6], v[254:255], src_scc, v[3:4] +// GFX11: encoding: [0x05,0x00,0x38,0xd6,0xfe,0xfb,0x0d,0x04] -v_fmac_f32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0x7d,0xe0,0x01,0x00] +v_div_fmas_f64 v[5:6], s[104:105], |s[104:105]|, s[104:105] +// GFX11: encoding: [0x05,0x02,0x38,0xd6,0x68,0xd0,0xa0,0x01] -v_fmac_f32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0x7e,0x82,0x01,0x00] +v_div_fmas_f64 v[5:6], -|vcc|, v[2:3], -|v[254:255]| +// GFX11: encoding: [0x05,0x05,0x38,0xd6,0x6a,0x04,0xfa,0xa7] -v_fmac_f32_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x2b,0xd5,0x7f,0xf8,0x00,0x00] +v_div_fmas_f64 v[5:6], -|ttmp[14:15]|, -|ttmp[14:15]|, -|ttmp[14:15]| +// GFX11: encoding: [0x05,0x07,0x38,0xd6,0x7a,0xf4,0xe8,0xe1] -v_fmac_f32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0x7c,0xfc,0x00,0x00] +v_div_fmas_f64 v[5:6], -|exec|, -|v[254:255]|, null +// GFX11: encoding: [0x05,0x03,0x38,0xd6,0x7e,0xfc,0xf3,0x61] -v_fmac_f32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0xc1,0xfe,0x00,0x00] +v_div_fmas_f64 v[5:6], null, 0.5, -src_scc +// GFX11: encoding: [0x05,0x00,0x38,0xd6,0x7c,0xe0,0xf5,0x83] -v_fmac_f32_e64 v5, 0.5, -m0 mul:2 -// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0xf0,0xfa,0x00,0x48] +v_div_fmas_f64 v[5:6], -1, -exec, |exec| +// GFX11: encoding: [0x05,0x04,0x38,0xd6,0xc1,0xfc,0xf8,0x41] -v_fmac_f32_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX11: encoding: [0x05,0x02,0x2b,0xd5,0xfd,0xd4,0x00,0x30] +v_div_fmas_f64 v[5:6], 0.5, -|vcc|, -|vcc| mul:2 +// GFX11: encoding: [0x05,0x06,0x38,0xd6,0xf0,0xd4,0xa8,0xc9] -v_fmac_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 -// GFX11: encoding: [0xff,0x83,0x2b,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] +v_div_fmas_f64 v[5:6], -|src_scc|, -1, 0.5 mul:4 +// GFX11: encoding: [0x05,0x01,0x38,0xd6,0xfd,0x82,0xc1,0x33] -v_fmac_legacy_f32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x01,0x05,0x02,0x00] +v_div_fmas_f64 v[254:255], 0xaf123456, null, -1 clamp div:2 +// GFX11: encoding: [0xfe,0x80,0x38,0xd6,0xff,0xf8,0x04,0x1b,0x56,0x34,0x12,0xaf] -v_fmac_legacy_f32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0xff,0xff,0x03,0x00] +v_div_scale_f32 v5, vcc_lo, v1, v2, s3 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x01,0x05,0x0e,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fmac_legacy_f32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x01,0x04,0x00,0x00] +v_div_scale_f32 v5, vcc_lo, v255, s2, s105 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0xff,0x05,0xa4,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fmac_legacy_f32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x69,0xd2,0x00,0x00] +v_div_scale_f32 v5, vcc_lo, s1, v255, exec_hi +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x01,0xfe,0xff,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fmac_legacy_f32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x6a,0xf6,0x00,0x00] +v_div_scale_f32 v5, vcc_lo, s105, s105, exec_lo +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x69,0xd2,0xf8,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fmac_legacy_f32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +v_div_scale_f32 v5, vcc_lo, vcc_lo, ttmp15, v3 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x6a,0xf6,0x0c,0x04] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fmac_legacy_f32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x7b,0xfa,0x01,0x00] +v_div_scale_f32 v5, vcc_lo, vcc_hi, 0xaf123456, v255 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fmac_legacy_f32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x7d,0xe0,0x01,0x00] +v_div_scale_f32 v5, vcc_lo, -ttmp15, -src_scc, -ttmp15 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7b,0xfa,0xed,0xe1] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fmac_legacy_f32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x7e,0x82,0x01,0x00] +v_div_scale_f32 v5, vcc_lo, m0, 0.5, m0 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7d,0xe0,0xf5,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fmac_legacy_f32_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x06,0xd5,0x7f,0xf8,0x00,0x00] +v_div_scale_f32 v5, vcc_lo, exec_lo, -1, vcc_hi +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7e,0x82,0xad,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fmac_legacy_f32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x7c,0xfc,0x00,0x00] +v_div_scale_f32 v5, vcc_lo, -exec_hi, null, -vcc_lo +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7f,0xf8,0xa8,0xa1] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fmac_legacy_f32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0xc1,0xfe,0x00,0x00] +v_div_scale_f32 v5, vcc_lo, null, exec_lo, neg(0xaf123456) +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fmac_legacy_f32_e64 v5, 0.5, -m0 mul:2 -// GFX11: encoding: [0x05,0x00,0x06,0xd5,0xf0,0xfa,0x00,0x48] +v_div_scale_f32 v5, vcc_lo, -1, -exec_hi, -src_scc +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0xc1,0xfe,0xf4,0xc3] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fmac_legacy_f32_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX11: encoding: [0x05,0x02,0x06,0xd5,0xfd,0xd4,0x00,0x30] +v_div_scale_f32 v5, vcc_lo, 0.5, -m0, 0.5 mul:2 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0xf0,0xfa,0xc0,0x4b] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fmac_legacy_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 -// GFX11: encoding: [0xff,0x83,0x06,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] +v_div_scale_f32 v5, vcc_lo, -src_scc, vcc_lo, -1 mul:4 +// W32: encoding: [0x05,0x6a,0xfc,0xd6,0xfd,0xd4,0x04,0x33] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x01,0x01,0x00,0x00] +v_div_scale_f32 v255, vcc_lo, neg(0xaf123456), -vcc_hi, null clamp div:2 +// W32: encoding: [0xff,0xea,0xfc,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0xff,0x01,0x00,0x00] +v_div_scale_f32 v5, vcc, v1, v2, s3 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x01,0x05,0x0e,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x01,0x00,0x00,0x00] +v_div_scale_f32 v5, vcc, v255, s2, s105 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0xff,0x05,0xa4,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x69,0x00,0x00,0x00] +v_div_scale_f32 v5, vcc, s1, v255, exec_hi +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x01,0xfe,0xff,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x6a,0x00,0x00,0x00] +v_div_scale_f32 v5, vcc, s105, s105, exec_lo +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x69,0xd2,0xf8,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x6b,0x00,0x00,0x00] +v_div_scale_f32 v5, vcc, vcc_lo, ttmp15, v3 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x6a,0xf6,0x0c,0x04] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x7b,0x00,0x00,0x00] +v_div_scale_f32 v5, vcc, vcc_hi, 0xaf123456, v255 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x7d,0x00,0x00,0x00] +v_div_scale_f32 v5, vcc, -ttmp15, -src_scc, -ttmp15 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7b,0xfa,0xed,0xe1] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x7e,0x00,0x00,0x00] +v_div_scale_f32 v5, vcc, m0, 0.5, m0 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7d,0xe0,0xf5,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x7f,0x00,0x00,0x00] +v_div_scale_f32 v5, vcc, exec_lo, -1, vcc_hi +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7e,0x82,0xad,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x7c,0x00,0x00,0x00] +v_div_scale_f32 v5, vcc, -exec_hi, null, -vcc_lo +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7f,0xf8,0xa8,0xa1] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0xc1,0x00,0x00,0x00] +v_div_scale_f32 v5, vcc, null, exec_lo, neg(0xaf123456) +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f16_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0xf0,0x00,0x00,0x08] +v_div_scale_f32 v5, vcc, -1, -exec_hi, -src_scc +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0xc1,0xfe,0xf4,0xc3] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f16_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0xfd,0x00,0x00,0x10] +v_div_scale_f32 v5, vcc, 0.5, -m0, 0.5 mul:2 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0xf0,0xfa,0xc0,0x4b] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f16_e64 v255, -|0xfe0b| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xdf,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] +v_div_scale_f32 v5, vcc, -src_scc, vcc_lo, -1 mul:4 +// W64: encoding: [0x05,0x6a,0xfc,0xd6,0xfd,0xd4,0x04,0x33] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x01,0x01,0x00,0x00] +v_div_scale_f32 v255, vcc, neg(0xaf123456), -vcc_hi, null clamp div:2 +// W64: encoding: [0xff,0xea,0xfc,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0xff,0x01,0x00,0x00] +v_div_scale_f64 v[5:6], vcc_lo, v[1:2], v[2:3], v[3:4] +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x01,0x05,0x0e,0x04] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x01,0x00,0x00,0x00] +v_div_scale_f64 v[5:6], vcc_lo, v[254:255], v[254:255], s[6:7] +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0xfe,0xfd,0x1b,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x69,0x00,0x00,0x00] +v_div_scale_f64 v[5:6], vcc_lo, s[2:3], s[4:5], v[254:255] +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x02,0x08,0xf8,0x07] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x6a,0x00,0x00,0x00] +v_div_scale_f64 v[5:6], vcc_lo, -s[104:105], s[104:105], -s[104:105] +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x68,0xd0,0xa0,0xa1] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x6b,0x00,0x00,0x00] +v_div_scale_f64 v[5:6], vcc_lo, vcc, -ttmp[14:15], -ttmp[14:15] +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x6a,0xf4,0xe8,0xc1] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x7b,0x00,0x00,0x00] +v_div_scale_f64 v[5:6], vcc_lo, -ttmp[14:15], 0xaf123456, null +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x7d,0x00,0x00,0x00] +v_div_scale_f64 v[5:6], vcc_lo, -exec, -src_scc, -exec +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x7e,0xfa,0xf9,0xe1] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x7e,0x00,0x00,0x00] +v_div_scale_f64 v[5:6], vcc_lo, null, 0.5, vcc +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0x7c,0xe0,0xa9,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x7f,0x00,0x00,0x00] +v_div_scale_f64 v[5:6], vcc_lo, -1, -1, 0xaf123456 +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x7c,0x00,0x00,0x00] +v_div_scale_f64 v[5:6], vcc_lo, 0.5, null, -src_scc mul:2 +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0xf0,0xf8,0xf4,0x8b] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0xc1,0x00,0x00,0x00] +v_div_scale_f64 v[5:6], vcc_lo, -src_scc, -exec, 0.5 mul:4 +// W32: encoding: [0x05,0x6a,0xfd,0xd6,0xfd,0xfc,0xc0,0x73] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f32_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0xf0,0x00,0x00,0x08] +v_div_scale_f64 v[254:255], vcc_lo, 0xaf123456, -vcc, -1 clamp div:2 +// W32: encoding: [0xfe,0xea,0xfd,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f32_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0xfd,0x00,0x00,0x10] +v_div_scale_f64 v[5:6], vcc, v[1:2], v[2:3], v[3:4] +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x01,0x05,0x0e,0x04] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f32_e64 v255, -|0xaf123456| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xa0,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +v_div_scale_f64 v[5:6], vcc, v[254:255], v[254:255], s[6:7] +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0xfe,0xfd,0x1b,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f64_e64 v[5:6], v[1:2] -// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0x01,0x01,0x00,0x00] +v_div_scale_f64 v[5:6], vcc, s[2:3], s[4:5], v[254:255] +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x02,0x08,0xf8,0x07] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f64_e64 v[5:6], v[254:255] -// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0xfe,0x01,0x00,0x00] +v_div_scale_f64 v[5:6], vcc, -s[104:105], s[104:105], -s[104:105] +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x68,0xd0,0xa0,0xa1] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f64_e64 v[5:6], s[2:3] -// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0x02,0x00,0x00,0x00] +v_div_scale_f64 v[5:6], vcc, vcc, -ttmp[14:15], -ttmp[14:15] +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x6a,0xf4,0xe8,0xc1] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f64_e64 v[5:6], s[104:105] -// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0x68,0x00,0x00,0x00] +v_div_scale_f64 v[5:6], vcc, -ttmp[14:15], 0xaf123456, null +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f64_e64 v[5:6], vcc -// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0x6a,0x00,0x00,0x00] +v_div_scale_f64 v[5:6], vcc, -exec, -src_scc, -exec +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x7e,0xfa,0xf9,0xe1] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f64_e64 v[5:6], ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0x7a,0x00,0x00,0x00] +v_div_scale_f64 v[5:6], vcc, null, 0.5, vcc +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0x7c,0xe0,0xa9,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f64_e64 v[5:6], exec -// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0x7e,0x00,0x00,0x00] +v_div_scale_f64 v[5:6], vcc, -1, -1, 0xaf123456 +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f64_e64 v[5:6], null -// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0x7c,0x00,0x00,0x00] +v_div_scale_f64 v[5:6], vcc, 0.5, null, -src_scc mul:2 +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0xf0,0xf8,0xf4,0x8b] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f64_e64 v[5:6], -1 -// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0xc1,0x00,0x00,0x00] +v_div_scale_f64 v[5:6], vcc, -src_scc, -exec, 0.5 mul:4 +// W64: encoding: [0x05,0x6a,0xfd,0xd6,0xfd,0xfc,0xc0,0x73] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f64_e64 v[5:6], 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0xf0,0x00,0x00,0x08] +v_div_scale_f64 v[254:255], vcc, 0xaf123456, -vcc, -1 clamp div:2 +// W64: encoding: [0xfe,0xea,0xfd,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction -v_fract_f64_e64 v[5:6], -|src_scc| mul:4 -// GFX11: encoding: [0x05,0x01,0xbe,0xd5,0xfd,0x00,0x00,0x30] +v_dot2_bf16_bf16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0x0e,0x00] -v_fract_f64_e64 v[254:255], 0xaf123456 clamp div:2 -// GFX11: encoding: [0xfe,0x80,0xbe,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +v_dot2_bf16_bf16 v5, v255, v255, s105 +// GFX11: encoding: [0x05,0x00,0x67,0xd6,0xff,0xff,0xa7,0x01] -v_frexp_exp_i16_f16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x01,0x01,0x00,0x00] +v_dot2_bf16_bf16 v5, s1, s2, v3 +// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x01,0x04,0x0c,0x04] -v_frexp_exp_i16_f16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xda,0xd5,0xff,0x01,0x00,0x00] +v_dot2_bf16_bf16 v5, s105, s105, m0 +// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x69,0xd2,0xf4,0x01] -v_frexp_exp_i16_f16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x01,0x00,0x00,0x00] +v_dot2_bf16_bf16 v5, vcc_lo, ttmp15, v255 +// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x6a,0xf6,0xfc,0x07] -v_frexp_exp_i16_f16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x69,0x00,0x00,0x00] +v_dot2_bf16_bf16 v5, vcc_hi, 0xfe0b, vcc_hi +// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] -v_frexp_exp_i16_f16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x6a,0x00,0x00,0x00] +v_dot2_bf16_bf16 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x67,0xd6,0x7b,0xfa,0xed,0x01] -v_frexp_exp_i16_f16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x6b,0x00,0x00,0x00] +v_dot2_bf16_bf16 v5, |m0|, -1, -vcc_lo +// GFX11: encoding: [0x05,0x01,0x67,0xd6,0x7d,0x82,0xa9,0x81] -v_frexp_exp_i16_f16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x7b,0x00,0x00,0x00] +v_dot2_bf16_bf16 v5, -|exec_lo|, null, -|0xfe0b| +// GFX11: encoding: [0x05,0x05,0x67,0xd6,0x7e,0xf8,0xfc,0xa3,0x0b,0xfe,0x00,0x00] -v_frexp_exp_i16_f16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x7d,0x00,0x00,0x00] +v_dot2_bf16_bf16 v5, -|exec_hi|, -|exec_lo|, -|exec_lo| +// GFX11: encoding: [0x05,0x07,0x67,0xd6,0x7f,0xfc,0xf8,0xe1] -v_frexp_exp_i16_f16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x7e,0x00,0x00,0x00] +v_dot2_bf16_bf16 v5, null, -exec_hi, |src_scc| +// GFX11: encoding: [0x05,0x04,0x67,0xd6,0x7c,0xfe,0xf4,0x43] -v_frexp_exp_i16_f16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x7f,0x00,0x00,0x00] +v_dot2_bf16_bf16 v5, -1, -|m0|, -|exec_hi| op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x06,0x67,0xd6,0xc1,0xfa,0xfc,0xc1] -v_frexp_exp_i16_f16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x7c,0x00,0x00,0x00] +v_dot2_bf16_bf16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x22,0x67,0xd6,0xfd,0xd4,0x04,0x23] -v_frexp_exp_i16_f16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xda,0xd5,0xc1,0x00,0x00,0x00] +v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] +// GFX11: encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] -v_frexp_exp_i16_f16_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xda,0xd5,0xf0,0x00,0x00,0x00] +v_dot2_f16_f16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00] -v_frexp_exp_i16_f16_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0xda,0xd5,0xfd,0x00,0x00,0x00] +v_dot2_f16_f16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x66,0xd6,0xff,0x05,0xa4,0x01] -v_frexp_exp_i16_f16_e64 v255, -|0xfe0b| -// GFX11: encoding: [0xff,0x01,0xda,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] +v_dot2_f16_f16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x01,0xfe,0xff,0x01] -v_frexp_exp_i32_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x01,0x01,0x00,0x00] +v_dot2_f16_f16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x69,0xd2,0xf8,0x01] -v_frexp_exp_i32_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0xff,0x01,0x00,0x00] +v_dot2_f16_f16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x6a,0xf6,0x0c,0x04] -v_frexp_exp_i32_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x01,0x00,0x00,0x00] +v_dot2_f16_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_frexp_exp_i32_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x69,0x00,0x00,0x00] +v_dot2_f16_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x66,0xd6,0x7b,0xfa,0xed,0xe1] -v_frexp_exp_i32_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x6a,0x00,0x00,0x00] +v_dot2_f16_f16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x66,0xd6,0x7d,0xe0,0xf5,0x01] -v_frexp_exp_i32_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x6b,0x00,0x00,0x00] +v_dot2_f16_f16 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x66,0xd6,0x7e,0x82,0xad,0x01] -v_frexp_exp_i32_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x7b,0x00,0x00,0x00] +v_dot2_f16_f16 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x66,0xd6,0x7f,0xf8,0xa8,0xa1] -v_frexp_exp_i32_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x7d,0x00,0x00,0x00] +v_dot2_f16_f16 v5, null, exec_lo, -|0xfe0b| +// GFX11: encoding: [0x05,0x04,0x66,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] -v_frexp_exp_i32_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x7e,0x00,0x00,0x00] +v_dot2_f16_f16 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x66,0xd6,0xc1,0xfe,0xf4,0xc3] -v_frexp_exp_i32_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x7f,0x00,0x00,0x00] +v_dot2_f16_f16 v5, 0.5, -m0, 0.5 op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x00,0x66,0xd6,0xf0,0xfa,0xc0,0x43] -v_frexp_exp_i32_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x7c,0x00,0x00,0x00] +v_dot2_f16_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x22,0x66,0xd6,0xfd,0xd4,0x04,0x23] -v_frexp_exp_i32_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0xc1,0x00,0x00,0x00] +v_dot2_f16_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] +// GFX11: encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] -v_frexp_exp_i32_f32_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0xf0,0x00,0x00,0x00] +v_fma_dx9_zero_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00] -v_frexp_exp_i32_f32_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0xfd,0x00,0x00,0x00] +v_fma_dx9_zero_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0xff,0x05,0xa4,0x01] -v_frexp_exp_i32_f32_e64 v255, -|0xaf123456| -// GFX11: encoding: [0xff,0x01,0xbf,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] +v_fma_dx9_zero_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x01,0xfe,0xff,0x01] -v_frexp_exp_i32_f64_e64 v5, v[1:2] -// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0x01,0x01,0x00,0x00] +v_fma_dx9_zero_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x69,0xd2,0xf8,0x01] -v_frexp_exp_i32_f64_e64 v5, v[254:255] -// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0xfe,0x01,0x00,0x00] +v_fma_dx9_zero_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x6a,0xf6,0x0c,0x04] -v_frexp_exp_i32_f64_e64 v5, s[2:3] -// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0x02,0x00,0x00,0x00] +v_fma_dx9_zero_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -v_frexp_exp_i32_f64_e64 v5, s[104:105] -// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0x68,0x00,0x00,0x00] +v_fma_dx9_zero_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x09,0xd6,0x7b,0xfa,0xed,0xe1] -v_frexp_exp_i32_f64_e64 v5, vcc -// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0x6a,0x00,0x00,0x00] +v_fma_dx9_zero_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x7d,0xe0,0xf5,0x01] -v_frexp_exp_i32_f64_e64 v5, ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0x7a,0x00,0x00,0x00] +v_fma_dx9_zero_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x09,0xd6,0x7e,0x82,0xad,0x01] -v_frexp_exp_i32_f64_e64 v5, exec -// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0x7e,0x00,0x00,0x00] +v_fma_dx9_zero_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x09,0xd6,0x7f,0xf8,0xa8,0xa1] -v_frexp_exp_i32_f64_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0x7c,0x00,0x00,0x00] +v_fma_dx9_zero_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x09,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] -v_frexp_exp_i32_f64_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0xc1,0x00,0x00,0x00] +v_fma_dx9_zero_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x09,0xd6,0xc1,0xfe,0xf4,0xc3] -v_frexp_exp_i32_f64_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0xf0,0x00,0x00,0x00] +v_fma_dx9_zero_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0xf0,0xfa,0xc0,0x4b] -v_frexp_exp_i32_f64_e64 v5, -|src_scc| -// GFX11: encoding: [0x05,0x01,0xbc,0xd5,0xfd,0x00,0x00,0x20] +v_fma_dx9_zero_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x09,0xd6,0xfd,0xd4,0x04,0x33] -v_frexp_exp_i32_f64_e64 v255, 0xaf123456 -// GFX11: encoding: [0xff,0x00,0xbc,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] +v_fma_dx9_zero_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x09,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -v_frexp_mant_f16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x01,0x01,0x00,0x00] +v_fma_f16 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x48,0xd6,0x01,0x05,0x0e,0x00] -v_frexp_mant_f16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0xff,0x01,0x00,0x00] +v_fma_f16 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x48,0xd6,0xff,0x05,0xa4,0x01] -v_frexp_mant_f16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x01,0x00,0x00,0x00] +v_fma_f16 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x48,0xd6,0x01,0xfe,0xff,0x01] -v_frexp_mant_f16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x69,0x00,0x00,0x00] +v_fma_f16 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x48,0xd6,0x69,0xd2,0xf8,0x01] -v_frexp_mant_f16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x6a,0x00,0x00,0x00] +v_fma_f16 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x48,0xd6,0x6a,0xf6,0x0c,0x04] -v_frexp_mant_f16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x6b,0x00,0x00,0x00] +v_fma_f16 v5, vcc_hi, 0xfe0b, v255 +// GFX11: encoding: [0x05,0x00,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_frexp_mant_f16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x7b,0x00,0x00,0x00] +v_fma_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x48,0xd6,0x7b,0xfa,0xed,0xe1] -v_frexp_mant_f16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x7d,0x00,0x00,0x00] +v_fma_f16 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x48,0xd6,0x7d,0xe0,0xf5,0x01] -v_frexp_mant_f16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x7e,0x00,0x00,0x00] +v_fma_f16 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x48,0xd6,0x7e,0x82,0xad,0x01] -v_frexp_mant_f16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x7f,0x00,0x00,0x00] +v_fma_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] +// GFX11: encoding: [0x05,0x7d,0x48,0xd6,0x7f,0xf8,0xa8,0xa1] -v_frexp_mant_f16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x7c,0x00,0x00,0x00] +v_fma_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] +// GFX11: encoding: [0x05,0x04,0x48,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] -v_frexp_mant_f16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0xc1,0x00,0x00,0x00] +v_fma_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] +// GFX11: encoding: [0x05,0x0e,0x48,0xd6,0xc1,0xfe,0xf4,0xc3] -v_frexp_mant_f16_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0xf0,0x00,0x00,0x08] +v_fma_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] +// GFX11: encoding: [0x05,0x10,0x48,0xd6,0xf0,0xfa,0xc0,0x43] -v_frexp_mant_f16_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0xfd,0x00,0x00,0x10] +v_fma_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX11: encoding: [0x05,0x22,0x48,0xd6,0xfd,0xd4,0x04,0x23] -v_frexp_mant_f16_e64 v255, -|0xfe0b| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xd9,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] +v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp +// GFX11: encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] -v_frexp_mant_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x01,0x01,0x00,0x00] +v_fma_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x00] -v_frexp_mant_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0xff,0x01,0x00,0x00] +v_fma_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x13,0xd6,0xff,0x05,0xa4,0x01] -v_frexp_mant_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x01,0x00,0x00,0x00] +v_fma_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x13,0xd6,0x01,0xfe,0xff,0x01] -v_frexp_mant_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x69,0x00,0x00,0x00] +v_fma_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x13,0xd6,0x69,0xd2,0xf8,0x01] -v_frexp_mant_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x6a,0x00,0x00,0x00] +v_fma_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x13,0xd6,0x6a,0xf6,0x0c,0x04] -v_frexp_mant_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x6b,0x00,0x00,0x00] +v_fma_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x13,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -v_frexp_mant_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x7b,0x00,0x00,0x00] +v_fma_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x13,0xd6,0x7b,0xfa,0xed,0xe1] -v_frexp_mant_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x7d,0x00,0x00,0x00] +v_fma_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x13,0xd6,0x7d,0xe0,0xf5,0x01] -v_frexp_mant_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x7e,0x00,0x00,0x00] +v_fma_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x13,0xd6,0x7e,0x82,0xad,0x01] -v_frexp_mant_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x7f,0x00,0x00,0x00] +v_fma_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x13,0xd6,0x7f,0xf8,0xa8,0xa1] -v_frexp_mant_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x7c,0x00,0x00,0x00] +v_fma_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x13,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] -v_frexp_mant_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0xc1,0x00,0x00,0x00] +v_fma_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x13,0xd6,0xc1,0xfe,0xf4,0xc3] -v_frexp_mant_f32_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0xf0,0x00,0x00,0x08] +v_fma_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x13,0xd6,0xf0,0xfa,0xc0,0x4b] -v_frexp_mant_f32_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0xfd,0x00,0x00,0x10] +v_fma_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x13,0xd6,0xfd,0xd4,0x04,0x33] -v_frexp_mant_f32_e64 v255, -|0xaf123456| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xc0,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +v_fma_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x13,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -v_frexp_mant_f64_e64 v[5:6], v[1:2] -// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0x01,0x01,0x00,0x00] +v_fma_f64 v[5:6], v[1:2], v[2:3], v[3:4] +// GFX11: encoding: [0x05,0x00,0x14,0xd6,0x01,0x05,0x0e,0x04] -v_frexp_mant_f64_e64 v[5:6], v[254:255] -// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0xfe,0x01,0x00,0x00] +v_fma_f64 v[5:6], v[254:255], v[254:255], s[6:7] +// GFX11: encoding: [0x05,0x00,0x14,0xd6,0xfe,0xfd,0x1b,0x00] -v_frexp_mant_f64_e64 v[5:6], s[2:3] -// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0x02,0x00,0x00,0x00] +v_fma_f64 v[5:6], s[2:3], s[4:5], v[254:255] +// GFX11: encoding: [0x05,0x00,0x14,0xd6,0x02,0x08,0xf8,0x07] -v_frexp_mant_f64_e64 v[5:6], s[104:105] -// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0x68,0x00,0x00,0x00] +v_fma_f64 v[5:6], -|s[104:105]|, s[104:105], -|s[104:105]| +// GFX11: encoding: [0x05,0x05,0x14,0xd6,0x68,0xd0,0xa0,0xa1] -v_frexp_mant_f64_e64 v[5:6], vcc -// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0x6a,0x00,0x00,0x00] +v_fma_f64 v[5:6], vcc, -|ttmp[14:15]|, -|ttmp[14:15]| +// GFX11: encoding: [0x05,0x06,0x14,0xd6,0x6a,0xf4,0xe8,0xc1] -v_frexp_mant_f64_e64 v[5:6], ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0x7a,0x00,0x00,0x00] +v_fma_f64 v[5:6], -|ttmp[14:15]|, 0xaf123456, null +// GFX11: encoding: [0x05,0x01,0x14,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] -v_frexp_mant_f64_e64 v[5:6], exec -// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0x7e,0x00,0x00,0x00] +v_fma_f64 v[5:6], -|exec|, -|src_scc|, -|exec| +// GFX11: encoding: [0x05,0x07,0x14,0xd6,0x7e,0xfa,0xf9,0xe1] -v_frexp_mant_f64_e64 v[5:6], null -// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0x7c,0x00,0x00,0x00] +v_fma_f64 v[5:6], null, 0.5, vcc +// GFX11: encoding: [0x05,0x00,0x14,0xd6,0x7c,0xe0,0xa9,0x01] -v_frexp_mant_f64_e64 v[5:6], -1 -// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0xc1,0x00,0x00,0x00] +v_fma_f64 v[5:6], -1, -1, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x14,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] -v_frexp_mant_f64_e64 v[5:6], 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0xf0,0x00,0x00,0x08] +v_fma_f64 v[5:6], 0.5, null, -|src_scc| mul:2 +// GFX11: encoding: [0x05,0x04,0x14,0xd6,0xf0,0xf8,0xf4,0x8b] -v_frexp_mant_f64_e64 v[5:6], -|src_scc| mul:4 -// GFX11: encoding: [0x05,0x01,0xbd,0xd5,0xfd,0x00,0x00,0x30] +v_fma_f64 v[5:6], -|src_scc|, -|exec|, 0.5 mul:4 +// GFX11: encoding: [0x05,0x03,0x14,0xd6,0xfd,0xfc,0xc0,0x73] -v_frexp_mant_f64_e64 v[254:255], 0xaf123456 clamp div:2 -// GFX11: encoding: [0xfe,0x80,0xbd,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +v_fma_f64 v[254:255], 0xaf123456, -|vcc|, -1 clamp div:2 +// GFX11: encoding: [0xfe,0x82,0x14,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] -v_ldexp_f16_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x01,0x05,0x02,0x00] +v_fma_legacy_f32 v5, v1, v2, s3 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00] -v_ldexp_f16_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0xff,0xff,0x03,0x00] +v_fma_legacy_f32 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0xff,0x05,0xa4,0x01] -v_ldexp_f16_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x01,0x04,0x00,0x00] +v_fma_legacy_f32 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x01,0xfe,0xff,0x01] -v_ldexp_f16_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x69,0xd2,0x00,0x00] +v_fma_legacy_f32 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x69,0xd2,0xf8,0x01] -v_ldexp_f16_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x6a,0xf6,0x00,0x00] +v_fma_legacy_f32 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x6a,0xf6,0x0c,0x04] -v_ldexp_f16_e64 v5, vcc_hi, 0xfe0b -// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +v_fma_legacy_f32 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -v_ldexp_f16_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x7b,0xfa,0x01,0x00] +v_fma_legacy_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: encoding: [0x05,0x07,0x09,0xd6,0x7b,0xfa,0xed,0xe1] -v_ldexp_f16_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x7d,0xe0,0x01,0x00] +v_fma_legacy_f32 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0x7d,0xe0,0xf5,0x01] -v_ldexp_f16_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x7e,0x82,0x01,0x00] +v_fma_legacy_f32 v5, |exec_lo|, -1, vcc_hi +// GFX11: encoding: [0x05,0x01,0x09,0xd6,0x7e,0x82,0xad,0x01] -v_ldexp_f16_e64 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x7f,0xf8,0x00,0x00] +v_fma_legacy_f32 v5, -|exec_hi|, null, -|vcc_lo| +// GFX11: encoding: [0x05,0x05,0x09,0xd6,0x7f,0xf8,0xa8,0xa1] -v_ldexp_f16_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x7c,0xfc,0x00,0x00] +v_fma_legacy_f32 v5, null, exec_lo, -|0xaf123456| +// GFX11: encoding: [0x05,0x04,0x09,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] -v_ldexp_f16_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0xc1,0xfe,0x00,0x00] +v_fma_legacy_f32 v5, -1, -|exec_hi|, -|src_scc| +// GFX11: encoding: [0x05,0x06,0x09,0xd6,0xc1,0xfe,0xf4,0xc3] -v_ldexp_f16_e64 v5, 0.5, m0 mul:2 -// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0xf0,0xfa,0x00,0x08] +v_fma_legacy_f32 v5, 0.5, -m0, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x09,0xd6,0xf0,0xfa,0xc0,0x4b] -v_ldexp_f16_e64 v5, src_scc, vcc_lo mul:4 -// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0xfd,0xd4,0x00,0x10] +v_fma_legacy_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x09,0xd6,0xfd,0xd4,0x04,0x33] -v_ldexp_f16_e64 v255, -|0xfe0b|, vcc_hi clamp div:2 -// GFX11: encoding: [0xff,0x81,0x3b,0xd5,0xff,0xd6,0x00,0x38,0x0b,0xfe,0x00,0x00] +v_fma_legacy_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x09,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] v_ldexp_f32 v5, v1, v2 // GFX11: encoding: [0x05,0x00,0x1c,0xd7,0x01,0x05,0x02,0x00] @@ -5666,96 +2509,6 @@ v_lerp_u8 v5, src_scc, vcc_lo, -1 v_lerp_u8 v255, 0xaf123456, vcc_hi, null // GFX11: encoding: [0xff,0x00,0x15,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_log_f16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x01,0x01,0x00,0x00] - -v_log_f16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0xff,0x01,0x00,0x00] - -v_log_f16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x01,0x00,0x00,0x00] - -v_log_f16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x69,0x00,0x00,0x00] - -v_log_f16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x6a,0x00,0x00,0x00] - -v_log_f16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x6b,0x00,0x00,0x00] - -v_log_f16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x7b,0x00,0x00,0x00] - -v_log_f16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x7d,0x00,0x00,0x00] - -v_log_f16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x7e,0x00,0x00,0x00] - -v_log_f16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x7f,0x00,0x00,0x00] - -v_log_f16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x7c,0x00,0x00,0x00] - -v_log_f16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0xc1,0x00,0x00,0x00] - -v_log_f16_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0xf0,0x00,0x00,0x08] - -v_log_f16_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0xfd,0x00,0x00,0x10] - -v_log_f16_e64 v255, -|0xfe0b| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xd7,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] - -v_log_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x01,0x01,0x00,0x00] - -v_log_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0xff,0x01,0x00,0x00] - -v_log_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x01,0x00,0x00,0x00] - -v_log_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x69,0x00,0x00,0x00] - -v_log_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x6a,0x00,0x00,0x00] - -v_log_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x6b,0x00,0x00,0x00] - -v_log_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x7b,0x00,0x00,0x00] - -v_log_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x7d,0x00,0x00,0x00] - -v_log_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x7e,0x00,0x00,0x00] - -v_log_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x7f,0x00,0x00,0x00] - -v_log_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x7c,0x00,0x00,0x00] - -v_log_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0xc1,0x00,0x00,0x00] - -v_log_f32_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0xf0,0x00,0x00,0x08] - -v_log_f32_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0xfd,0x00,0x00,0x10] - -v_log_f32_e64 v255, -|0xaf123456| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xa7,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] - v_lshl_add_u32 v5, v1, v2, s3 // GFX11: encoding: [0x05,0x00,0x46,0xd6,0x01,0x05,0x0e,0x00] @@ -5891,51 +2644,6 @@ v_lshlrev_b16 v5, src_scc, vcc_lo v_lshlrev_b16 v255, 0xfe0b, vcc_hi // GFX11: encoding: [0xff,0x00,0x38,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_lshlrev_b32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x01,0x05,0x02,0x00] - -v_lshlrev_b32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x18,0xd5,0xff,0xff,0x03,0x00] - -v_lshlrev_b32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x01,0x04,0x00,0x00] - -v_lshlrev_b32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x69,0xd2,0x00,0x00] - -v_lshlrev_b32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x6a,0xf6,0x00,0x00] - -v_lshlrev_b32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_lshlrev_b32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x7b,0xfa,0x01,0x00] - -v_lshlrev_b32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x7d,0xe0,0x01,0x00] - -v_lshlrev_b32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x7e,0x82,0x01,0x00] - -v_lshlrev_b32_e64 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x7f,0xf8,0x00,0x00] - -v_lshlrev_b32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x7c,0xfc,0x00,0x00] - -v_lshlrev_b32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x18,0xd5,0xc1,0xfe,0x00,0x00] - -v_lshlrev_b32_e64 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x18,0xd5,0xf0,0xfa,0x00,0x00] - -v_lshlrev_b32_e64 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x18,0xd5,0xfd,0xd4,0x00,0x00] - -v_lshlrev_b32_e64 v255, 0xaf123456, vcc_hi -// GFX11: encoding: [0xff,0x00,0x18,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] - v_lshlrev_b64 v[5:6], v1, vcc // GFX11: encoding: [0x05,0x00,0x3c,0xd7,0x01,0xd5,0x00,0x00] @@ -6008,51 +2716,6 @@ v_lshrrev_b16 v5, src_scc, vcc_lo v_lshrrev_b16 v255, 0xfe0b, vcc_hi // GFX11: encoding: [0xff,0x00,0x39,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_lshrrev_b32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x01,0x05,0x02,0x00] - -v_lshrrev_b32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x19,0xd5,0xff,0xff,0x03,0x00] - -v_lshrrev_b32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x01,0x04,0x00,0x00] - -v_lshrrev_b32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x69,0xd2,0x00,0x00] - -v_lshrrev_b32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x6a,0xf6,0x00,0x00] - -v_lshrrev_b32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_lshrrev_b32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x7b,0xfa,0x01,0x00] - -v_lshrrev_b32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x7d,0xe0,0x01,0x00] - -v_lshrrev_b32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x7e,0x82,0x01,0x00] - -v_lshrrev_b32_e64 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x7f,0xf8,0x00,0x00] - -v_lshrrev_b32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x7c,0xfc,0x00,0x00] - -v_lshrrev_b32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x19,0xd5,0xc1,0xfe,0x00,0x00] - -v_lshrrev_b32_e64 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x19,0xd5,0xf0,0xfa,0x00,0x00] - -v_lshrrev_b32_e64 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x19,0xd5,0xfd,0xd4,0x00,0x00] - -v_lshrrev_b32_e64 v255, 0xaf123456, vcc_hi -// GFX11: encoding: [0xff,0x00,0x19,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] - v_lshrrev_b64 v[5:6], v1, vcc // GFX11: encoding: [0x05,0x00,0x3d,0xd7,0x01,0xd5,0x00,0x00] @@ -6770,96 +3433,6 @@ v_max3_u32 v5, src_scc, vcc_lo, -1 v_max3_u32 v255, 0xaf123456, vcc_hi, null // GFX11: encoding: [0xff,0x00,0x1e,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_max_f16_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x39,0xd5,0x01,0x05,0x02,0x00] - -v_max_f16_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x39,0xd5,0xff,0xff,0x03,0x00] - -v_max_f16_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x39,0xd5,0x01,0x04,0x00,0x00] - -v_max_f16_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x39,0xd5,0x69,0xd2,0x00,0x00] - -v_max_f16_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x39,0xd5,0x6a,0xf6,0x00,0x00] - -v_max_f16_e64 v5, vcc_hi, 0xfe0b -// GFX11: encoding: [0x05,0x00,0x39,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] - -v_max_f16_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x39,0xd5,0x7b,0xfa,0x01,0x00] - -v_max_f16_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x39,0xd5,0x7d,0xe0,0x01,0x00] - -v_max_f16_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x39,0xd5,0x7e,0x82,0x01,0x00] - -v_max_f16_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x39,0xd5,0x7f,0xf8,0x00,0x00] - -v_max_f16_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x39,0xd5,0x7c,0xfc,0x00,0x00] - -v_max_f16_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x39,0xd5,0xc1,0xfe,0x00,0x00] - -v_max_f16_e64 v5, 0.5, -m0 mul:2 -// GFX11: encoding: [0x05,0x00,0x39,0xd5,0xf0,0xfa,0x00,0x48] - -v_max_f16_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX11: encoding: [0x05,0x02,0x39,0xd5,0xfd,0xd4,0x00,0x30] - -v_max_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 -// GFX11: encoding: [0xff,0x83,0x39,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] - -v_max_f32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x10,0xd5,0x01,0x05,0x02,0x00] - -v_max_f32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x10,0xd5,0xff,0xff,0x03,0x00] - -v_max_f32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x10,0xd5,0x01,0x04,0x00,0x00] - -v_max_f32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x10,0xd5,0x69,0xd2,0x00,0x00] - -v_max_f32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x10,0xd5,0x6a,0xf6,0x00,0x00] - -v_max_f32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x10,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_max_f32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x10,0xd5,0x7b,0xfa,0x01,0x00] - -v_max_f32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x10,0xd5,0x7d,0xe0,0x01,0x00] - -v_max_f32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x10,0xd5,0x7e,0x82,0x01,0x00] - -v_max_f32_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x10,0xd5,0x7f,0xf8,0x00,0x00] - -v_max_f32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x10,0xd5,0x7c,0xfc,0x00,0x00] - -v_max_f32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x10,0xd5,0xc1,0xfe,0x00,0x00] - -v_max_f32_e64 v5, 0.5, -m0 mul:2 -// GFX11: encoding: [0x05,0x00,0x10,0xd5,0xf0,0xfa,0x00,0x48] - -v_max_f32_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX11: encoding: [0x05,0x02,0x10,0xd5,0xfd,0xd4,0x00,0x30] - -v_max_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 -// GFX11: encoding: [0xff,0x83,0x10,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] - v_max_f64 v[5:6], v[1:2], v[2:3] // GFX11: encoding: [0x05,0x00,0x2a,0xd7,0x01,0x05,0x02,0x00] @@ -6941,51 +3514,6 @@ v_max_i16 v5, src_scc, vcc_lo v_max_i16 v255, 0xfe0b, vcc_hi // GFX11: encoding: [0xff,0x00,0x0a,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_max_i32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x01,0x05,0x02,0x00] - -v_max_i32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x12,0xd5,0xff,0xff,0x03,0x00] - -v_max_i32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x01,0x04,0x00,0x00] - -v_max_i32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x69,0xd2,0x00,0x00] - -v_max_i32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x6a,0xf6,0x00,0x00] - -v_max_i32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_max_i32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x7b,0xfa,0x01,0x00] - -v_max_i32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x7d,0xe0,0x01,0x00] - -v_max_i32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x7e,0x82,0x01,0x00] - -v_max_i32_e64 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x7f,0xf8,0x00,0x00] - -v_max_i32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x7c,0xfc,0x00,0x00] - -v_max_i32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x12,0xd5,0xc1,0xfe,0x00,0x00] - -v_max_i32_e64 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x12,0xd5,0xf0,0xfa,0x00,0x00] - -v_max_i32_e64 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x12,0xd5,0xfd,0xd4,0x00,0x00] - -v_max_i32_e64 v255, 0xaf123456, vcc_hi -// GFX11: encoding: [0xff,0x00,0x12,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] - v_max_u16 v5, v1, v2 // GFX11: encoding: [0x05,0x00,0x09,0xd7,0x01,0x05,0x02,0x00] @@ -7023,58 +3551,13 @@ v_max_u16 v5, -1, exec_hi // GFX11: encoding: [0x05,0x00,0x09,0xd7,0xc1,0xfe,0x00,0x00] v_max_u16 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x09,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] - -v_max_u16 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x09,0xd7,0xfd,0xd4,0x00,0x00] - -v_max_u16 v255, 0xfe0b, vcc_hi -// GFX11: encoding: [0xff,0x00,0x09,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] - -v_max_u32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x01,0x05,0x02,0x00] - -v_max_u32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x14,0xd5,0xff,0xff,0x03,0x00] - -v_max_u32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x01,0x04,0x00,0x00] - -v_max_u32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x69,0xd2,0x00,0x00] - -v_max_u32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x6a,0xf6,0x00,0x00] - -v_max_u32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_max_u32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x7b,0xfa,0x01,0x00] - -v_max_u32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x7d,0xe0,0x01,0x00] - -v_max_u32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x7e,0x82,0x01,0x00] - -v_max_u32_e64 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x7f,0xf8,0x00,0x00] - -v_max_u32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x7c,0xfc,0x00,0x00] - -v_max_u32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x14,0xd5,0xc1,0xfe,0x00,0x00] - -v_max_u32_e64 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x14,0xd5,0xf0,0xfa,0x00,0x00] +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0xff,0xfa,0x00,0x00,0x00,0x38,0x00,0x00] -v_max_u32_e64 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x14,0xd5,0xfd,0xd4,0x00,0x00] +v_max_u16 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x09,0xd7,0xfd,0xd4,0x00,0x00] -v_max_u32_e64 v255, 0xaf123456, vcc_hi -// GFX11: encoding: [0xff,0x00,0x14,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +v_max_u16 v255, 0xfe0b, vcc_hi +// GFX11: encoding: [0xff,0x00,0x09,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] v_maxmin_f16 v5, v1, v2, s3 // GFX11: encoding: [0x05,0x00,0x60,0xd6,0x01,0x05,0x0e,0x00] @@ -7886,96 +4369,6 @@ v_min3_u32 v5, src_scc, vcc_lo, -1 v_min3_u32 v255, 0xaf123456, vcc_hi, null // GFX11: encoding: [0xff,0x00,0x1b,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_min_f16_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0x01,0x05,0x02,0x00] - -v_min_f16_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0xff,0xff,0x03,0x00] - -v_min_f16_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0x01,0x04,0x00,0x00] - -v_min_f16_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0x69,0xd2,0x00,0x00] - -v_min_f16_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0x6a,0xf6,0x00,0x00] - -v_min_f16_e64 v5, vcc_hi, 0xfe0b -// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] - -v_min_f16_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0x7b,0xfa,0x01,0x00] - -v_min_f16_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0x7d,0xe0,0x01,0x00] - -v_min_f16_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0x7e,0x82,0x01,0x00] - -v_min_f16_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x3a,0xd5,0x7f,0xf8,0x00,0x00] - -v_min_f16_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0x7c,0xfc,0x00,0x00] - -v_min_f16_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0xc1,0xfe,0x00,0x00] - -v_min_f16_e64 v5, 0.5, -m0 mul:2 -// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0xf0,0xfa,0x00,0x48] - -v_min_f16_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX11: encoding: [0x05,0x02,0x3a,0xd5,0xfd,0xd4,0x00,0x30] - -v_min_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 -// GFX11: encoding: [0xff,0x83,0x3a,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] - -v_min_f32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0x01,0x05,0x02,0x00] - -v_min_f32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0xff,0xff,0x03,0x00] - -v_min_f32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0x01,0x04,0x00,0x00] - -v_min_f32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0x69,0xd2,0x00,0x00] - -v_min_f32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0x6a,0xf6,0x00,0x00] - -v_min_f32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_min_f32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0x7b,0xfa,0x01,0x00] - -v_min_f32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0x7d,0xe0,0x01,0x00] - -v_min_f32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0x7e,0x82,0x01,0x00] - -v_min_f32_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x0f,0xd5,0x7f,0xf8,0x00,0x00] - -v_min_f32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0x7c,0xfc,0x00,0x00] - -v_min_f32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0xc1,0xfe,0x00,0x00] - -v_min_f32_e64 v5, 0.5, -m0 mul:2 -// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0xf0,0xfa,0x00,0x48] - -v_min_f32_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX11: encoding: [0x05,0x02,0x0f,0xd5,0xfd,0xd4,0x00,0x30] - -v_min_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 -// GFX11: encoding: [0xff,0x83,0x0f,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] - v_min_f64 v[5:6], v[1:2], v[2:3] // GFX11: encoding: [0x05,0x00,0x29,0xd7,0x01,0x05,0x02,0x00] @@ -8057,51 +4450,6 @@ v_min_i16 v5, src_scc, vcc_lo v_min_i16 v255, 0xfe0b, vcc_hi // GFX11: encoding: [0xff,0x00,0x0c,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_min_i32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x01,0x05,0x02,0x00] - -v_min_i32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x11,0xd5,0xff,0xff,0x03,0x00] - -v_min_i32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x01,0x04,0x00,0x00] - -v_min_i32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x69,0xd2,0x00,0x00] - -v_min_i32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x6a,0xf6,0x00,0x00] - -v_min_i32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_min_i32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x7b,0xfa,0x01,0x00] - -v_min_i32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x7d,0xe0,0x01,0x00] - -v_min_i32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x7e,0x82,0x01,0x00] - -v_min_i32_e64 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x7f,0xf8,0x00,0x00] - -v_min_i32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x7c,0xfc,0x00,0x00] - -v_min_i32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x11,0xd5,0xc1,0xfe,0x00,0x00] - -v_min_i32_e64 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x11,0xd5,0xf0,0xfa,0x00,0x00] - -v_min_i32_e64 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x11,0xd5,0xfd,0xd4,0x00,0x00] - -v_min_i32_e64 v255, 0xaf123456, vcc_hi -// GFX11: encoding: [0xff,0x00,0x11,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] - v_min_u16 v5, v1, v2 // GFX11: encoding: [0x05,0x00,0x0b,0xd7,0x01,0x05,0x02,0x00] @@ -8147,51 +4495,6 @@ v_min_u16 v5, src_scc, vcc_lo v_min_u16 v255, 0xfe0b, vcc_hi // GFX11: encoding: [0xff,0x00,0x0b,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_min_u32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x01,0x05,0x02,0x00] - -v_min_u32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x13,0xd5,0xff,0xff,0x03,0x00] - -v_min_u32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x01,0x04,0x00,0x00] - -v_min_u32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x69,0xd2,0x00,0x00] - -v_min_u32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x6a,0xf6,0x00,0x00] - -v_min_u32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_min_u32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x7b,0xfa,0x01,0x00] - -v_min_u32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x7d,0xe0,0x01,0x00] - -v_min_u32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x7e,0x82,0x01,0x00] - -v_min_u32_e64 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x7f,0xf8,0x00,0x00] - -v_min_u32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x7c,0xfc,0x00,0x00] - -v_min_u32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x13,0xd5,0xc1,0xfe,0x00,0x00] - -v_min_u32_e64 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x13,0xd5,0xf0,0xfa,0x00,0x00] - -v_min_u32_e64 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x13,0xd5,0xfd,0xd4,0x00,0x00] - -v_min_u32_e64 v255, 0xaf123456, vcc_hi -// GFX11: encoding: [0xff,0x00,0x13,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] - v_minmax_f16 v5, v1, v2, s3 // GFX11: encoding: [0x05,0x00,0x61,0xd6,0x01,0x05,0x0e,0x00] @@ -8372,114 +4675,6 @@ v_minmax_u32 v5, src_scc, vcc_lo, -1 v_minmax_u32 v255, 0xaf123456, vcc_hi, null // GFX11: encoding: [0xff,0x00,0x63,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_mov_b32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x00] - -v_mov_b32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0x81,0xd5,0xff,0x01,0x00,0x00] - -v_mov_b32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x01,0x00,0x00,0x00] - -v_mov_b32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x69,0x00,0x00,0x00] - -v_mov_b32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x6a,0x00,0x00,0x00] - -v_mov_b32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x6b,0x00,0x00,0x00] - -v_mov_b32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x7b,0x00,0x00,0x00] - -v_mov_b32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x7d,0x00,0x00,0x00] - -v_mov_b32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x7e,0x00,0x00,0x00] - -v_mov_b32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x7f,0x00,0x00,0x00] - -v_mov_b32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x7c,0x00,0x00,0x00] - -v_mov_b32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0x81,0xd5,0xc1,0x00,0x00,0x00] - -v_mov_b32_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0x81,0xd5,0xf0,0x00,0x00,0x00] - -v_mov_b32_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0x81,0xd5,0xfd,0x00,0x00,0x00] - -v_mov_b32_e64 v255, 0xaf123456 -// GFX11: encoding: [0xff,0x00,0x81,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] - -v_movreld_b32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00] - -v_movreld_b32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0xff,0x01,0x00,0x00] - -v_movreld_b32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x01,0x00,0x00,0x00] - -v_movreld_b32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x69,0x00,0x00,0x00] - -v_movreld_b32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x6a,0x00,0x00,0x00] - -v_movreld_b32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x6b,0x00,0x00,0x00] - -v_movreld_b32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x7b,0x00,0x00,0x00] - -v_movreld_b32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x7d,0x00,0x00,0x00] - -v_movreld_b32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x7e,0x00,0x00,0x00] - -v_movreld_b32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x7f,0x00,0x00,0x00] - -v_movreld_b32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x7c,0x00,0x00,0x00] - -v_movreld_b32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0xc1,0x00,0x00,0x00] - -v_movreld_b32_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0xf0,0x00,0x00,0x00] - -v_movreld_b32_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0xfd,0x00,0x00,0x00] - -v_movreld_b32_e64 v255, 0xaf123456 -// GFX11: encoding: [0xff,0x00,0xc2,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] - -v_movrels_b32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x00] - -v_movrels_b32_e64 v255, v255 -// GFX11: encoding: [0xff,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00] - -v_movrelsd_2_b32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x00] - -v_movrelsd_2_b32_e64 v255, v255 -// GFX11: encoding: [0xff,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00] - -v_movrelsd_b32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x00] - -v_movrelsd_b32_e64 v255, v255 -// GFX11: encoding: [0xff,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00] - v_mqsad_pk_u16_u8 v[5:6], v[1:2], v2, ttmp[14:15] // GFX11: encoding: [0x05,0x00,0x3b,0xd6,0x01,0x05,0xea,0x01] @@ -8615,141 +4810,6 @@ v_msad_u8 v5, src_scc, vcc_lo, -1 v_msad_u8 v255, 0xaf123456, vcc_hi, null clamp // GFX11: encoding: [0xff,0x80,0x39,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_mul_dx9_zero_f32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x01,0x05,0x02,0x00] - -v_mul_dx9_zero_f32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0xff,0xff,0x03,0x00] - -v_mul_dx9_zero_f32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x01,0x04,0x00,0x00] - -v_mul_dx9_zero_f32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x69,0xd2,0x00,0x00] - -v_mul_dx9_zero_f32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x6a,0xf6,0x00,0x00] - -v_mul_dx9_zero_f32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_mul_dx9_zero_f32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x7b,0xfa,0x01,0x00] - -v_mul_dx9_zero_f32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x7d,0xe0,0x01,0x00] - -v_mul_dx9_zero_f32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x7e,0x82,0x01,0x00] - -v_mul_dx9_zero_f32_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x07,0xd5,0x7f,0xf8,0x00,0x00] - -v_mul_dx9_zero_f32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x7c,0xfc,0x00,0x00] - -v_mul_dx9_zero_f32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0xc1,0xfe,0x00,0x00] - -v_mul_dx9_zero_f32_e64 v5, 0.5, -m0 mul:2 -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0xf0,0xfa,0x00,0x48] - -v_mul_dx9_zero_f32_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX11: encoding: [0x05,0x02,0x07,0xd5,0xfd,0xd4,0x00,0x30] - -v_mul_dx9_zero_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 -// GFX11: encoding: [0xff,0x83,0x07,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] - -v_mul_f16_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x35,0xd5,0x01,0x05,0x02,0x00] - -v_mul_f16_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x35,0xd5,0xff,0xff,0x03,0x00] - -v_mul_f16_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x35,0xd5,0x01,0x04,0x00,0x00] - -v_mul_f16_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x35,0xd5,0x69,0xd2,0x00,0x00] - -v_mul_f16_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x35,0xd5,0x6a,0xf6,0x00,0x00] - -v_mul_f16_e64 v5, vcc_hi, 0xfe0b -// GFX11: encoding: [0x05,0x00,0x35,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] - -v_mul_f16_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x35,0xd5,0x7b,0xfa,0x01,0x00] - -v_mul_f16_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x35,0xd5,0x7d,0xe0,0x01,0x00] - -v_mul_f16_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x35,0xd5,0x7e,0x82,0x01,0x00] - -v_mul_f16_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x35,0xd5,0x7f,0xf8,0x00,0x00] - -v_mul_f16_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x35,0xd5,0x7c,0xfc,0x00,0x00] - -v_mul_f16_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x35,0xd5,0xc1,0xfe,0x00,0x00] - -v_mul_f16_e64 v5, 0.5, -m0 mul:2 -// GFX11: encoding: [0x05,0x00,0x35,0xd5,0xf0,0xfa,0x00,0x48] - -v_mul_f16_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX11: encoding: [0x05,0x02,0x35,0xd5,0xfd,0xd4,0x00,0x30] - -v_mul_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 -// GFX11: encoding: [0xff,0x83,0x35,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] - -v_mul_f32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x08,0xd5,0x01,0x05,0x02,0x00] - -v_mul_f32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x08,0xd5,0xff,0xff,0x03,0x00] - -v_mul_f32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x08,0xd5,0x01,0x04,0x00,0x00] - -v_mul_f32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x08,0xd5,0x69,0xd2,0x00,0x00] - -v_mul_f32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x08,0xd5,0x6a,0xf6,0x00,0x00] - -v_mul_f32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x08,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_mul_f32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x08,0xd5,0x7b,0xfa,0x01,0x00] - -v_mul_f32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x08,0xd5,0x7d,0xe0,0x01,0x00] - -v_mul_f32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x08,0xd5,0x7e,0x82,0x01,0x00] - -v_mul_f32_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x08,0xd5,0x7f,0xf8,0x00,0x00] - -v_mul_f32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x08,0xd5,0x7c,0xfc,0x00,0x00] - -v_mul_f32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x08,0xd5,0xc1,0xfe,0x00,0x00] - -v_mul_f32_e64 v5, 0.5, -m0 mul:2 -// GFX11: encoding: [0x05,0x00,0x08,0xd5,0xf0,0xfa,0x00,0x48] - -v_mul_f32_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX11: encoding: [0x05,0x02,0x08,0xd5,0xfd,0xd4,0x00,0x30] - -v_mul_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 -// GFX11: encoding: [0xff,0x83,0x08,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] - v_mul_f64 v[5:6], v[1:2], v[2:3] // GFX11: encoding: [0x05,0x00,0x28,0xd7,0x01,0x05,0x02,0x00] @@ -8831,51 +4891,6 @@ v_mul_hi_i32 v5, src_scc, vcc_lo v_mul_hi_i32 v255, 0xaf123456, vcc_hi // GFX11: encoding: [0xff,0x00,0x2e,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_mul_hi_i32_i24_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x01,0x05,0x02,0x00] - -v_mul_hi_i32_i24_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0xff,0xff,0x03,0x00] - -v_mul_hi_i32_i24_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x01,0x04,0x00,0x00] - -v_mul_hi_i32_i24_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x69,0xd2,0x00,0x00] - -v_mul_hi_i32_i24_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x6a,0xf6,0x00,0x00] - -v_mul_hi_i32_i24_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_mul_hi_i32_i24_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x7b,0xfa,0x01,0x00] - -v_mul_hi_i32_i24_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x7d,0xe0,0x01,0x00] - -v_mul_hi_i32_i24_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x7e,0x82,0x01,0x00] - -v_mul_hi_i32_i24_e64 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x7f,0xf8,0x00,0x00] - -v_mul_hi_i32_i24_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x7c,0xfc,0x00,0x00] - -v_mul_hi_i32_i24_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0xc1,0xfe,0x00,0x00] - -v_mul_hi_i32_i24_e64 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0xf0,0xfa,0x00,0x00] - -v_mul_hi_i32_i24_e64 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0xfd,0xd4,0x00,0x00] - -v_mul_hi_i32_i24_e64 v255, 0xaf123456, vcc_hi -// GFX11: encoding: [0xff,0x00,0x0a,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] - v_mul_hi_u32 v5, v1, v2 // GFX11: encoding: [0x05,0x00,0x2d,0xd7,0x01,0x05,0x02,0x00] @@ -8921,141 +4936,6 @@ v_mul_hi_u32 v5, src_scc, vcc_lo v_mul_hi_u32 v255, 0xaf123456, vcc_hi // GFX11: encoding: [0xff,0x00,0x2d,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_mul_hi_u32_u24_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x01,0x05,0x02,0x00] - -v_mul_hi_u32_u24_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0xff,0xff,0x03,0x00] - -v_mul_hi_u32_u24_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x01,0x04,0x00,0x00] - -v_mul_hi_u32_u24_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x69,0xd2,0x00,0x00] - -v_mul_hi_u32_u24_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x6a,0xf6,0x00,0x00] - -v_mul_hi_u32_u24_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_mul_hi_u32_u24_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x7b,0xfa,0x01,0x00] - -v_mul_hi_u32_u24_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x7d,0xe0,0x01,0x00] - -v_mul_hi_u32_u24_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x7e,0x82,0x01,0x00] - -v_mul_hi_u32_u24_e64 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x7f,0xf8,0x00,0x00] - -v_mul_hi_u32_u24_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x7c,0xfc,0x00,0x00] - -v_mul_hi_u32_u24_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0xc1,0xfe,0x00,0x00] - -v_mul_hi_u32_u24_e64 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0xf0,0xfa,0x00,0x00] - -v_mul_hi_u32_u24_e64 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0xfd,0xd4,0x00,0x00] - -v_mul_hi_u32_u24_e64 v255, 0xaf123456, vcc_hi -// GFX11: encoding: [0xff,0x00,0x0c,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] - -v_mul_i32_i24_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x01,0x05,0x02,0x00] - -v_mul_i32_i24_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x09,0xd5,0xff,0xff,0x03,0x00] - -v_mul_i32_i24_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x01,0x04,0x00,0x00] - -v_mul_i32_i24_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x69,0xd2,0x00,0x00] - -v_mul_i32_i24_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x6a,0xf6,0x00,0x00] - -v_mul_i32_i24_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_mul_i32_i24_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x7b,0xfa,0x01,0x00] - -v_mul_i32_i24_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x7d,0xe0,0x01,0x00] - -v_mul_i32_i24_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x7e,0x82,0x01,0x00] - -v_mul_i32_i24_e64 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x7f,0xf8,0x00,0x00] - -v_mul_i32_i24_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x7c,0xfc,0x00,0x00] - -v_mul_i32_i24_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x09,0xd5,0xc1,0xfe,0x00,0x00] - -v_mul_i32_i24_e64 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x09,0xd5,0xf0,0xfa,0x00,0x00] - -v_mul_i32_i24_e64 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x09,0xd5,0xfd,0xd4,0x00,0x00] - -v_mul_i32_i24_e64 v255, 0xaf123456, vcc_hi clamp -// GFX11: encoding: [0xff,0x80,0x09,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] - -v_mul_legacy_f32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x01,0x05,0x02,0x00] - -v_mul_legacy_f32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0xff,0xff,0x03,0x00] - -v_mul_legacy_f32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x01,0x04,0x00,0x00] - -v_mul_legacy_f32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x69,0xd2,0x00,0x00] - -v_mul_legacy_f32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x6a,0xf6,0x00,0x00] - -v_mul_legacy_f32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_mul_legacy_f32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x7b,0xfa,0x01,0x00] - -v_mul_legacy_f32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x7d,0xe0,0x01,0x00] - -v_mul_legacy_f32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x7e,0x82,0x01,0x00] - -v_mul_legacy_f32_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x07,0xd5,0x7f,0xf8,0x00,0x00] - -v_mul_legacy_f32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x7c,0xfc,0x00,0x00] - -v_mul_legacy_f32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0xc1,0xfe,0x00,0x00] - -v_mul_legacy_f32_e64 v5, 0.5, -m0 mul:2 -// GFX11: encoding: [0x05,0x00,0x07,0xd5,0xf0,0xfa,0x00,0x48] - -v_mul_legacy_f32_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX11: encoding: [0x05,0x02,0x07,0xd5,0xfd,0xd4,0x00,0x30] - -v_mul_legacy_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 -// GFX11: encoding: [0xff,0x83,0x07,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] - v_mul_lo_u16 v5, v1, v2 // GFX11: encoding: [0x05,0x00,0x05,0xd7,0x01,0x05,0x02,0x00] @@ -9146,51 +5026,6 @@ v_mul_lo_u32 v5, src_scc, vcc_lo v_mul_lo_u32 v255, 0xaf123456, vcc_hi // GFX11: encoding: [0xff,0x00,0x2c,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_mul_u32_u24_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x01,0x05,0x02,0x00] - -v_mul_u32_u24_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0xff,0xff,0x03,0x00] - -v_mul_u32_u24_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x01,0x04,0x00,0x00] - -v_mul_u32_u24_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x69,0xd2,0x00,0x00] - -v_mul_u32_u24_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x6a,0xf6,0x00,0x00] - -v_mul_u32_u24_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_mul_u32_u24_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x7b,0xfa,0x01,0x00] - -v_mul_u32_u24_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x7d,0xe0,0x01,0x00] - -v_mul_u32_u24_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x7e,0x82,0x01,0x00] - -v_mul_u32_u24_e64 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x7f,0xf8,0x00,0x00] - -v_mul_u32_u24_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x7c,0xfc,0x00,0x00] - -v_mul_u32_u24_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0xc1,0xfe,0x00,0x00] - -v_mul_u32_u24_e64 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0xf0,0xfa,0x00,0x00] - -v_mul_u32_u24_e64 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0xfd,0xd4,0x00,0x00] - -v_mul_u32_u24_e64 v255, 0xaf123456, vcc_hi clamp -// GFX11: encoding: [0xff,0x80,0x0b,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] - v_mullit_f32 v5, v1, v2, s3 // GFX11: encoding: [0x05,0x00,0x18,0xd6,0x01,0x05,0x0e,0x00] @@ -9230,104 +5065,11 @@ v_mullit_f32 v5, -1, -|exec_hi|, -|src_scc| v_mullit_f32 v5, 0.5, -m0, 0.5 mul:2 // GFX11: encoding: [0x05,0x00,0x18,0xd6,0xf0,0xfa,0xc0,0x4b] -v_mullit_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 -// GFX11: encoding: [0x05,0x02,0x18,0xd6,0xfd,0xd4,0x04,0x33] - -v_mullit_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 -// GFX11: encoding: [0xff,0x83,0x18,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] - -v_nop_e64 -// GFX11: encoding: [0x00,0x00,0x80,0xd5,0x00,0x00,0x00,0x00] - -v_not_b16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x01,0x01,0x00,0x00] - -v_not_b16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0xff,0x01,0x00,0x00] - -v_not_b16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x01,0x00,0x00,0x00] - -v_not_b16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x69,0x00,0x00,0x00] - -v_not_b16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x6a,0x00,0x00,0x00] - -v_not_b16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x6b,0x00,0x00,0x00] - -v_not_b16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x7b,0x00,0x00,0x00] - -v_not_b16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x7d,0x00,0x00,0x00] - -v_not_b16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x7e,0x00,0x00,0x00] - -v_not_b16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x7f,0x00,0x00,0x00] - -v_not_b16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x7c,0x00,0x00,0x00] - -v_not_b16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0xc1,0x00,0x00,0x00] - -v_not_b16_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00] - -v_not_b16_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0xfd,0x00,0x00,0x00] - -v_not_b16_e64 v255, 0xfe0b -// GFX11: encoding: [0xff,0x00,0xe9,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] - -v_not_b32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x01,0x01,0x00,0x00] - -v_not_b32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0xff,0x01,0x00,0x00] - -v_not_b32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x01,0x00,0x00,0x00] - -v_not_b32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x69,0x00,0x00,0x00] - -v_not_b32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x6a,0x00,0x00,0x00] - -v_not_b32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x6b,0x00,0x00,0x00] - -v_not_b32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x7b,0x00,0x00,0x00] - -v_not_b32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x7d,0x00,0x00,0x00] - -v_not_b32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x7e,0x00,0x00,0x00] - -v_not_b32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x7f,0x00,0x00,0x00] - -v_not_b32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x7c,0x00,0x00,0x00] - -v_not_b32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0xc1,0x00,0x00,0x00] - -v_not_b32_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0xf0,0x00,0x00,0x00] - -v_not_b32_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0xfd,0x00,0x00,0x00] +v_mullit_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: encoding: [0x05,0x02,0x18,0xd6,0xfd,0xd4,0x04,0x33] -v_not_b32_e64 v255, 0xaf123456 -// GFX11: encoding: [0xff,0x00,0xb7,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] +v_mullit_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 +// GFX11: encoding: [0xff,0x83,0x18,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] v_or3_b32 v5, v1, v2, s3 // GFX11: encoding: [0x05,0x00,0x58,0xd6,0x01,0x05,0x0e,0x00] @@ -9419,51 +5161,6 @@ v_or_b16 v5, src_scc, vcc_lo v_or_b16 v255, 0xfe0b, vcc_hi // GFX11: encoding: [0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_or_b32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x01,0x05,0x02,0x00] - -v_or_b32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0xff,0xff,0x03,0x00] - -v_or_b32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x01,0x04,0x00,0x00] - -v_or_b32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x69,0xd2,0x00,0x00] - -v_or_b32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x6a,0xf6,0x00,0x00] - -v_or_b32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_or_b32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x7b,0xfa,0x01,0x00] - -v_or_b32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x7d,0xe0,0x01,0x00] - -v_or_b32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x7e,0x82,0x01,0x00] - -v_or_b32_e64 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x7f,0xf8,0x00,0x00] - -v_or_b32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x7c,0xfc,0x00,0x00] - -v_or_b32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0xc1,0xfe,0x00,0x00] - -v_or_b32_e64 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0xf0,0xfa,0x00,0x00] - -v_or_b32_e64 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0xfd,0xd4,0x00,0x00] - -v_or_b32_e64 v255, 0xaf123456, vcc_hi -// GFX11: encoding: [0xff,0x00,0x1c,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] - v_pack_b32_f16 v5, v1, v2 // GFX11: encoding: [0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00] @@ -9540,583 +5237,175 @@ v_perm_b32 v5, exec_hi, null, vcc_lo // GFX11: encoding: [0x05,0x00,0x44,0xd6,0x7f,0xf8,0xa8,0x01] v_perm_b32 v5, null, exec_lo, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] - -v_perm_b32 v5, -1, exec_hi, src_scc -// GFX11: encoding: [0x05,0x00,0x44,0xd6,0xc1,0xfe,0xf4,0x03] - -v_perm_b32 v5, 0.5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x44,0xd6,0xf0,0xfa,0xc0,0x03] - -v_perm_b32 v5, src_scc, vcc_lo, -1 -// GFX11: encoding: [0x05,0x00,0x44,0xd6,0xfd,0xd4,0x04,0x03] - -v_perm_b32 v255, 0xaf123456, vcc_hi, null -// GFX11: encoding: [0xff,0x00,0x44,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] - -v_permlane16_b32 v5, v1, s2, s3 -// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0x05,0x0c,0x00] - -v_permlane16_b32 v5, v1, s105, s105 -// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xd3,0xa4,0x01] - -v_permlane16_b32 v5, v1, ttmp15, ttmp15 -// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xf7,0xec,0x01] - -v_permlane16_b32 v5, v1, vcc_hi, exec_lo -// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xd7,0xf8,0x01] - -v_permlane16_b32 v5, v1, vcc_lo, m0 -// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xd5,0xf4,0x01] - -v_permlane16_b32 v5, v1, m0, vcc_hi -// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xfb,0xac,0x01] - -v_permlane16_b32 v5, v1, exec_hi, vcc_lo -// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xff,0xa8,0x01] - -v_permlane16_b32 v5, v1, exec_lo, src_scc -// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xfd,0xf4,0x03] - -v_permlane16_b32 v5, v1, null, 0.5 op_sel:[1,1] -// GFX11: encoding: [0x05,0x18,0x5b,0xd6,0x01,0xf9,0xc0,0x03] - -v_permlane16_b32 v5, v1, -1, -1 op_sel:[0,0] -// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0x83,0x05,0x03] - -v_permlane16_b32 v5, v1, 0.5, null op_sel:[1,0] -// GFX11: encoding: [0x05,0x08,0x5b,0xd6,0x01,0xe1,0xf1,0x01] - -v_permlane16_b32 v255, v255, src_scc, exec_hi op_sel:[0,1] -// GFX11: encoding: [0xff,0x10,0x5b,0xd6,0xff,0xfb,0xfd,0x01] - -v_permlanex16_b32 v5, v1, s2, s3 -// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0x05,0x0c,0x00] - -v_permlanex16_b32 v5, v1, s105, s105 -// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd3,0xa4,0x01] - -v_permlanex16_b32 v5, v1, ttmp15, ttmp15 -// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xf7,0xec,0x01] - -v_permlanex16_b32 v5, v1, vcc_hi, exec_lo -// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd7,0xf8,0x01] - -v_permlanex16_b32 v5, v1, vcc_lo, m0 -// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd5,0xf4,0x01] - -v_permlanex16_b32 v5, v1, m0, vcc_hi -// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xfb,0xac,0x01] - -v_permlanex16_b32 v5, v1, exec_hi, vcc_lo -// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xff,0xa8,0x01] - -v_permlanex16_b32 v5, v1, exec_lo, src_scc -// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xfd,0xf4,0x03] - -v_permlanex16_b32 v5, v1, null, 0.5 op_sel:[1,1] -// GFX11: encoding: [0x05,0x18,0x5c,0xd6,0x01,0xf9,0xc0,0x03] - -v_permlanex16_b32 v5, v1, -1, -1 op_sel:[0,0] -// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0x83,0x05,0x03] - -v_permlanex16_b32 v5, v1, 0.5, null op_sel:[1,0] -// GFX11: encoding: [0x05,0x08,0x5c,0xd6,0x01,0xe1,0xf1,0x01] - -v_permlanex16_b32 v255, v255, src_scc, exec_hi op_sel:[0,1] -// GFX11: encoding: [0xff,0x10,0x5c,0xd6,0xff,0xfb,0xfd,0x01] - -v_pipeflush_e64 -// GFX11: encoding: [0x00,0x00,0x9b,0xd5,0x00,0x00,0x00,0x00] - -v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0xea,0x01] - -v_qsad_pk_u16_u8 v[5:6], v[1:2], v255, ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x01,0xff,0xeb,0x01] - -v_qsad_pk_u16_u8 v[5:6], v[1:2], s2, ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0xe8,0x01] - -v_qsad_pk_u16_u8 v[5:6], v[1:2], s105, ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x01,0xd3,0xe8,0x01] - -v_qsad_pk_u16_u8 v[5:6], v[254:255], ttmp15, s[6:7] -// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0xfe,0xf7,0x18,0x00] - -v_qsad_pk_u16_u8 v[5:6], s[2:3], vcc_hi, v[3:4] -// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x02,0xd6,0x0c,0x04] - -v_qsad_pk_u16_u8 v[5:6], s[104:105], vcc_lo, s[104:105] -// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x68,0xd4,0xa0,0x01] - -v_qsad_pk_u16_u8 v[5:6], vcc, m0, v[254:255] -// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x6a,0xfa,0xf8,0x07] - -v_qsad_pk_u16_u8 v[5:6], ttmp[14:15], exec_hi, null -// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x7a,0xfe,0xf0,0x01] - -v_qsad_pk_u16_u8 v[5:6], exec, exec_lo, exec -// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x7e,0xfc,0xf8,0x01] - -v_qsad_pk_u16_u8 v[5:6], null, null, vcc -// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x7c,0xf8,0xa8,0x01] - -v_qsad_pk_u16_u8 v[5:6], -1, -1, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] - -v_qsad_pk_u16_u8 v[5:6], 0.5, 0.5, src_scc -// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0xf0,0xe0,0xf5,0x03] - -v_qsad_pk_u16_u8 v[5:6], src_scc, src_scc, 0.5 -// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0xfd,0xfa,0xc1,0x03] - -v_qsad_pk_u16_u8 v[254:255], 0xaf123456, 0xaf123456, -1 clamp -// GFX11: encoding: [0xfe,0x80,0x3a,0xd6,0xff,0xfe,0x05,0x03,0x56,0x34,0x12,0xaf] - -v_rcp_f16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x01,0x01,0x00,0x00] - -v_rcp_f16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0xff,0x01,0x00,0x00] - -v_rcp_f16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x01,0x00,0x00,0x00] - -v_rcp_f16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x69,0x00,0x00,0x00] - -v_rcp_f16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x6a,0x00,0x00,0x00] - -v_rcp_f16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x6b,0x00,0x00,0x00] - -v_rcp_f16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x7b,0x00,0x00,0x00] - -v_rcp_f16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x7d,0x00,0x00,0x00] - -v_rcp_f16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x7e,0x00,0x00,0x00] - -v_rcp_f16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x7f,0x00,0x00,0x00] - -v_rcp_f16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x7c,0x00,0x00,0x00] - -v_rcp_f16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0xc1,0x00,0x00,0x00] - -v_rcp_f16_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0xf0,0x00,0x00,0x08] - -v_rcp_f16_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0xfd,0x00,0x00,0x10] - -v_rcp_f16_e64 v255, -|0xfe0b| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xd4,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] - -v_rcp_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x01,0x01,0x00,0x00] - -v_rcp_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0xff,0x01,0x00,0x00] - -v_rcp_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x01,0x00,0x00,0x00] - -v_rcp_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x69,0x00,0x00,0x00] - -v_rcp_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x6a,0x00,0x00,0x00] - -v_rcp_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x6b,0x00,0x00,0x00] - -v_rcp_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x7b,0x00,0x00,0x00] - -v_rcp_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x7d,0x00,0x00,0x00] - -v_rcp_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x7e,0x00,0x00,0x00] - -v_rcp_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x7f,0x00,0x00,0x00] - -v_rcp_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x7c,0x00,0x00,0x00] - -v_rcp_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0xc1,0x00,0x00,0x00] - -v_rcp_f32_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0xf0,0x00,0x00,0x08] - -v_rcp_f32_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0xfd,0x00,0x00,0x10] - -v_rcp_f32_e64 v255, -|0xaf123456| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xaa,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] - -v_rcp_f64_e64 v[5:6], v[1:2] -// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0x01,0x01,0x00,0x00] - -v_rcp_f64_e64 v[5:6], v[254:255] -// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0xfe,0x01,0x00,0x00] - -v_rcp_f64_e64 v[5:6], s[2:3] -// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0x02,0x00,0x00,0x00] - -v_rcp_f64_e64 v[5:6], s[104:105] -// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0x68,0x00,0x00,0x00] - -v_rcp_f64_e64 v[5:6], vcc -// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0x6a,0x00,0x00,0x00] - -v_rcp_f64_e64 v[5:6], ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0x7a,0x00,0x00,0x00] - -v_rcp_f64_e64 v[5:6], exec -// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0x7e,0x00,0x00,0x00] - -v_rcp_f64_e64 v[5:6], null -// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0x7c,0x00,0x00,0x00] - -v_rcp_f64_e64 v[5:6], -1 -// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0xc1,0x00,0x00,0x00] - -v_rcp_f64_e64 v[5:6], 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0xf0,0x00,0x00,0x08] - -v_rcp_f64_e64 v[5:6], -|src_scc| mul:4 -// GFX11: encoding: [0x05,0x01,0xaf,0xd5,0xfd,0x00,0x00,0x30] - -v_rcp_f64_e64 v[254:255], 0xaf123456 clamp div:2 -// GFX11: encoding: [0xfe,0x80,0xaf,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] - -v_rcp_iflag_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x01,0x01,0x00,0x00] - -v_rcp_iflag_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xab,0xd5,0xff,0x01,0x00,0x00] - -v_rcp_iflag_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x01,0x00,0x00,0x00] - -v_rcp_iflag_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x69,0x00,0x00,0x00] - -v_rcp_iflag_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x6a,0x00,0x00,0x00] - -v_rcp_iflag_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x6b,0x00,0x00,0x00] - -v_rcp_iflag_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x7b,0x00,0x00,0x00] - -v_rcp_iflag_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x7d,0x00,0x00,0x00] - -v_rcp_iflag_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x7e,0x00,0x00,0x00] - -v_rcp_iflag_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x7f,0x00,0x00,0x00] - -v_rcp_iflag_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x7c,0x00,0x00,0x00] - -v_rcp_iflag_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xab,0xd5,0xc1,0x00,0x00,0x00] - -v_rcp_iflag_f32_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xab,0xd5,0xf0,0x00,0x00,0x08] - -v_rcp_iflag_f32_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xab,0xd5,0xfd,0x00,0x00,0x10] - -v_rcp_iflag_f32_e64 v255, -|0xaf123456| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xab,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] - -v_readlane_b32 s5, v1, s2 -// GFX11: encoding: [0x05,0x00,0x60,0xd7,0x01,0x05,0x00,0x00] - -v_readlane_b32 s5, v1, s105 -// GFX11: encoding: [0x05,0x00,0x60,0xd7,0x01,0xd3,0x00,0x00] - -v_readlane_b32 s105, v1, ttmp15 -// GFX11: encoding: [0x69,0x00,0x60,0xd7,0x01,0xf7,0x00,0x00] - -v_readlane_b32 vcc_lo, v1, vcc_hi -// GFX11: encoding: [0x6a,0x00,0x60,0xd7,0x01,0xd7,0x00,0x00] - -v_readlane_b32 vcc_hi, v1, vcc_lo -// GFX11: encoding: [0x6b,0x00,0x60,0xd7,0x01,0xd5,0x00,0x00] - -v_readlane_b32 ttmp15, v1, m0 -// GFX11: encoding: [0x7b,0x00,0x60,0xd7,0x01,0xfb,0x00,0x00] - -v_readlane_b32 null, v255, null -// GFX11: encoding: [0x7c,0x00,0x60,0xd7,0xff,0xf9,0x00,0x00] - -v_rndne_f16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x01,0x01,0x00,0x00] - -v_rndne_f16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xde,0xd5,0xff,0x01,0x00,0x00] - -v_rndne_f16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x01,0x00,0x00,0x00] - -v_rndne_f16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x69,0x00,0x00,0x00] - -v_rndne_f16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x6a,0x00,0x00,0x00] - -v_rndne_f16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x6b,0x00,0x00,0x00] - -v_rndne_f16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x7b,0x00,0x00,0x00] - -v_rndne_f16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x7d,0x00,0x00,0x00] - -v_rndne_f16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x7e,0x00,0x00,0x00] - -v_rndne_f16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x7f,0x00,0x00,0x00] - -v_rndne_f16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x7c,0x00,0x00,0x00] - -v_rndne_f16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xde,0xd5,0xc1,0x00,0x00,0x00] - -v_rndne_f16_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xde,0xd5,0xf0,0x00,0x00,0x08] - -v_rndne_f16_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xde,0xd5,0xfd,0x00,0x00,0x10] - -v_rndne_f16_e64 v255, -|0xfe0b| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xde,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] - -v_rndne_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x01,0x01,0x00,0x00] - -v_rndne_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0xff,0x01,0x00,0x00] - -v_rndne_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x01,0x00,0x00,0x00] - -v_rndne_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x69,0x00,0x00,0x00] - -v_rndne_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x6a,0x00,0x00,0x00] - -v_rndne_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x6b,0x00,0x00,0x00] - -v_rndne_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x7b,0x00,0x00,0x00] - -v_rndne_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x7d,0x00,0x00,0x00] - -v_rndne_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x7e,0x00,0x00,0x00] - -v_rndne_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x7f,0x00,0x00,0x00] - -v_rndne_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x7c,0x00,0x00,0x00] - -v_rndne_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0xc1,0x00,0x00,0x00] - -v_rndne_f32_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0xf0,0x00,0x00,0x08] +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -v_rndne_f32_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0xfd,0x00,0x00,0x10] +v_perm_b32 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0xc1,0xfe,0xf4,0x03] -v_rndne_f32_e64 v255, -|0xaf123456| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xa3,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +v_perm_b32 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0xf0,0xfa,0xc0,0x03] -v_rndne_f64_e64 v[5:6], v[1:2] -// GFX11: encoding: [0x05,0x00,0x99,0xd5,0x01,0x01,0x00,0x00] +v_perm_b32 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x44,0xd6,0xfd,0xd4,0x04,0x03] -v_rndne_f64_e64 v[5:6], v[254:255] -// GFX11: encoding: [0x05,0x00,0x99,0xd5,0xfe,0x01,0x00,0x00] +v_perm_b32 v255, 0xaf123456, vcc_hi, null +// GFX11: encoding: [0xff,0x00,0x44,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_rndne_f64_e64 v[5:6], s[2:3] -// GFX11: encoding: [0x05,0x00,0x99,0xd5,0x02,0x00,0x00,0x00] +v_permlane16_b32 v5, v1, s2, s3 +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0x05,0x0c,0x00] -v_rndne_f64_e64 v[5:6], s[104:105] -// GFX11: encoding: [0x05,0x00,0x99,0xd5,0x68,0x00,0x00,0x00] +v_permlane16_b32 v5, v1, s105, s105 +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xd3,0xa4,0x01] -v_rndne_f64_e64 v[5:6], vcc -// GFX11: encoding: [0x05,0x00,0x99,0xd5,0x6a,0x00,0x00,0x00] +v_permlane16_b32 v5, v1, ttmp15, ttmp15 +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xf7,0xec,0x01] -v_rndne_f64_e64 v[5:6], ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0x99,0xd5,0x7a,0x00,0x00,0x00] +v_permlane16_b32 v5, v1, vcc_hi, exec_lo +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xd7,0xf8,0x01] -v_rndne_f64_e64 v[5:6], exec -// GFX11: encoding: [0x05,0x00,0x99,0xd5,0x7e,0x00,0x00,0x00] +v_permlane16_b32 v5, v1, vcc_lo, m0 +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xd5,0xf4,0x01] -v_rndne_f64_e64 v[5:6], null -// GFX11: encoding: [0x05,0x00,0x99,0xd5,0x7c,0x00,0x00,0x00] +v_permlane16_b32 v5, v1, m0, vcc_hi +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xfb,0xac,0x01] -v_rndne_f64_e64 v[5:6], -1 -// GFX11: encoding: [0x05,0x00,0x99,0xd5,0xc1,0x00,0x00,0x00] +v_permlane16_b32 v5, v1, exec_hi, vcc_lo +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xff,0xa8,0x01] -v_rndne_f64_e64 v[5:6], 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x99,0xd5,0xf0,0x00,0x00,0x08] +v_permlane16_b32 v5, v1, exec_lo, src_scc +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xfd,0xf4,0x03] -v_rndne_f64_e64 v[5:6], -|src_scc| mul:4 -// GFX11: encoding: [0x05,0x01,0x99,0xd5,0xfd,0x00,0x00,0x30] +v_permlane16_b32 v5, v1, null, 0.5 op_sel:[1,1] +// GFX11: encoding: [0x05,0x18,0x5b,0xd6,0x01,0xf9,0xc0,0x03] -v_rndne_f64_e64 v[254:255], 0xaf123456 clamp div:2 -// GFX11: encoding: [0xfe,0x80,0x99,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +v_permlane16_b32 v5, v1, -1, -1 op_sel:[0,0] +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0x83,0x05,0x03] -v_rsq_f16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x01,0x01,0x00,0x00] +v_permlane16_b32 v5, v1, 0.5, null op_sel:[1,0] +// GFX11: encoding: [0x05,0x08,0x5b,0xd6,0x01,0xe1,0xf1,0x01] -v_rsq_f16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0xff,0x01,0x00,0x00] +v_permlane16_b32 v255, v255, src_scc, exec_hi op_sel:[0,1] +// GFX11: encoding: [0xff,0x10,0x5b,0xd6,0xff,0xfb,0xfd,0x01] -v_rsq_f16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x01,0x00,0x00,0x00] +v_permlane16_b32 v5, v1, 0xaf123456, s3 +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] -v_rsq_f16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x69,0x00,0x00,0x00] +v_permlane16_b32 v5, v1, s2, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf] -v_rsq_f16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x6a,0x00,0x00,0x00] +v_permlane16_b32 v5, v1, 0x12345678, 0x12345678 +// GFX11: encoding: [0x05,0x00,0x5b,0xd6,0x01,0xff,0xfd,0x03,0x78,0x56,0x34,0x12] -v_rsq_f16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x6b,0x00,0x00,0x00] +v_permlanex16_b32 v5, v1, s2, s3 +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0x05,0x0c,0x00] -v_rsq_f16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x7b,0x00,0x00,0x00] +v_permlanex16_b32 v5, v1, s105, s105 +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd3,0xa4,0x01] -v_rsq_f16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x7d,0x00,0x00,0x00] +v_permlanex16_b32 v5, v1, ttmp15, ttmp15 +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xf7,0xec,0x01] -v_rsq_f16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x7e,0x00,0x00,0x00] +v_permlanex16_b32 v5, v1, vcc_hi, exec_lo +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd7,0xf8,0x01] -v_rsq_f16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x7f,0x00,0x00,0x00] +v_permlanex16_b32 v5, v1, vcc_lo, m0 +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd5,0xf4,0x01] -v_rsq_f16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x7c,0x00,0x00,0x00] +v_permlanex16_b32 v5, v1, m0, vcc_hi +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xfb,0xac,0x01] -v_rsq_f16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0xc1,0x00,0x00,0x00] +v_permlanex16_b32 v5, v1, exec_hi, vcc_lo +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xff,0xa8,0x01] -v_rsq_f16_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0xf0,0x00,0x00,0x08] +v_permlanex16_b32 v5, v1, exec_lo, src_scc +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xfd,0xf4,0x03] -v_rsq_f16_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0xfd,0x00,0x00,0x10] +v_permlanex16_b32 v5, v1, null, 0.5 op_sel:[1,1] +// GFX11: encoding: [0x05,0x18,0x5c,0xd6,0x01,0xf9,0xc0,0x03] -v_rsq_f16_e64 v255, -|0xfe0b| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xd6,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] +v_permlanex16_b32 v5, v1, -1, -1 op_sel:[0,0] +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0x83,0x05,0x03] -v_rsq_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x01,0x01,0x00,0x00] +v_permlanex16_b32 v5, v1, 0.5, null op_sel:[1,0] +// GFX11: encoding: [0x05,0x08,0x5c,0xd6,0x01,0xe1,0xf1,0x01] -v_rsq_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xae,0xd5,0xff,0x01,0x00,0x00] +v_permlanex16_b32 v255, v255, src_scc, exec_hi op_sel:[0,1] +// GFX11: encoding: [0xff,0x10,0x5c,0xd6,0xff,0xfb,0xfd,0x01] -v_rsq_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x01,0x00,0x00,0x00] +v_permlanex16_b32 v5, v1, 0xaf123456, s3 +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] -v_rsq_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x69,0x00,0x00,0x00] +v_permlanex16_b32 v5, v1, s2, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf] -v_rsq_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x6a,0x00,0x00,0x00] +v_permlanex16_b32 v5, v1, 0x12345678, 0x12345678 +// GFX11: encoding: [0x05,0x00,0x5c,0xd6,0x01,0xff,0xfd,0x03,0x78,0x56,0x34,0x12] -v_rsq_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x6b,0x00,0x00,0x00] +v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0xea,0x01] -v_rsq_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x7b,0x00,0x00,0x00] +v_qsad_pk_u16_u8 v[5:6], v[1:2], v255, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x01,0xff,0xeb,0x01] -v_rsq_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x7d,0x00,0x00,0x00] +v_qsad_pk_u16_u8 v[5:6], v[1:2], s2, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0xe8,0x01] -v_rsq_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x7e,0x00,0x00,0x00] +v_qsad_pk_u16_u8 v[5:6], v[1:2], s105, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x01,0xd3,0xe8,0x01] -v_rsq_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x7f,0x00,0x00,0x00] +v_qsad_pk_u16_u8 v[5:6], v[254:255], ttmp15, s[6:7] +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0xfe,0xf7,0x18,0x00] -v_rsq_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x7c,0x00,0x00,0x00] +v_qsad_pk_u16_u8 v[5:6], s[2:3], vcc_hi, v[3:4] +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x02,0xd6,0x0c,0x04] -v_rsq_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xae,0xd5,0xc1,0x00,0x00,0x00] +v_qsad_pk_u16_u8 v[5:6], s[104:105], vcc_lo, s[104:105] +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x68,0xd4,0xa0,0x01] -v_rsq_f32_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xae,0xd5,0xf0,0x00,0x00,0x08] +v_qsad_pk_u16_u8 v[5:6], vcc, m0, v[254:255] +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x6a,0xfa,0xf8,0x07] -v_rsq_f32_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xae,0xd5,0xfd,0x00,0x00,0x10] +v_qsad_pk_u16_u8 v[5:6], ttmp[14:15], exec_hi, null +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x7a,0xfe,0xf0,0x01] -v_rsq_f32_e64 v255, -|0xaf123456| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xae,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +v_qsad_pk_u16_u8 v[5:6], exec, exec_lo, exec +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x7e,0xfc,0xf8,0x01] -v_rsq_f64_e64 v[5:6], v[1:2] -// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0x01,0x01,0x00,0x00] +v_qsad_pk_u16_u8 v[5:6], null, null, vcc +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0x7c,0xf8,0xa8,0x01] -v_rsq_f64_e64 v[5:6], v[254:255] -// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0xfe,0x01,0x00,0x00] +v_qsad_pk_u16_u8 v[5:6], -1, -1, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] -v_rsq_f64_e64 v[5:6], s[2:3] -// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0x02,0x00,0x00,0x00] +v_qsad_pk_u16_u8 v[5:6], 0.5, 0.5, src_scc +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0xf0,0xe0,0xf5,0x03] -v_rsq_f64_e64 v[5:6], s[104:105] -// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0x68,0x00,0x00,0x00] +v_qsad_pk_u16_u8 v[5:6], src_scc, src_scc, 0.5 +// GFX11: encoding: [0x05,0x00,0x3a,0xd6,0xfd,0xfa,0xc1,0x03] -v_rsq_f64_e64 v[5:6], vcc -// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0x6a,0x00,0x00,0x00] +v_qsad_pk_u16_u8 v[254:255], 0xaf123456, 0xaf123456, -1 clamp +// GFX11: encoding: [0xfe,0x80,0x3a,0xd6,0xff,0xfe,0x05,0x03,0x56,0x34,0x12,0xaf] -v_rsq_f64_e64 v[5:6], ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0x7a,0x00,0x00,0x00] +v_readlane_b32 s5, v1, s2 +// GFX11: encoding: [0x05,0x00,0x60,0xd7,0x01,0x05,0x00,0x00] -v_rsq_f64_e64 v[5:6], exec -// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0x7e,0x00,0x00,0x00] +v_readlane_b32 s5, v1, s105 +// GFX11: encoding: [0x05,0x00,0x60,0xd7,0x01,0xd3,0x00,0x00] -v_rsq_f64_e64 v[5:6], null -// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0x7c,0x00,0x00,0x00] +v_readlane_b32 s105, v1, ttmp15 +// GFX11: encoding: [0x69,0x00,0x60,0xd7,0x01,0xf7,0x00,0x00] -v_rsq_f64_e64 v[5:6], -1 -// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0xc1,0x00,0x00,0x00] +v_readlane_b32 vcc_lo, v1, vcc_hi +// GFX11: encoding: [0x6a,0x00,0x60,0xd7,0x01,0xd7,0x00,0x00] -v_rsq_f64_e64 v[5:6], 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0xf0,0x00,0x00,0x08] +v_readlane_b32 vcc_hi, v1, vcc_lo +// GFX11: encoding: [0x6b,0x00,0x60,0xd7,0x01,0xd5,0x00,0x00] -v_rsq_f64_e64 v[5:6], -|src_scc| mul:4 -// GFX11: encoding: [0x05,0x01,0xb1,0xd5,0xfd,0x00,0x00,0x30] +v_readlane_b32 ttmp15, v1, m0 +// GFX11: encoding: [0x7b,0x00,0x60,0xd7,0x01,0xfb,0x00,0x00] -v_rsq_f64_e64 v[254:255], 0xaf123456 clamp div:2 -// GFX11: encoding: [0xfe,0x80,0xb1,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +v_readlane_b32 null, v255, null +// GFX11: encoding: [0x7c,0x00,0x60,0xd7,0xff,0xf9,0x00,0x00] v_sad_hi_u8 v5, v1, v2, s3 // GFX11: encoding: [0x05,0x00,0x23,0xd6,0x01,0x05,0x0e,0x00] @@ -10256,415 +5545,47 @@ v_sad_u32 v255, 0xaf123456, vcc_hi, null clamp v_sad_u8 v5, v1, v2, s3 // GFX11: encoding: [0x05,0x00,0x22,0xd6,0x01,0x05,0x0e,0x00] -v_sad_u8 v5, v255, s2, s105 -// GFX11: encoding: [0x05,0x00,0x22,0xd6,0xff,0x05,0xa4,0x01] - -v_sad_u8 v5, s1, v255, exec_hi -// GFX11: encoding: [0x05,0x00,0x22,0xd6,0x01,0xfe,0xff,0x01] - -v_sad_u8 v5, s105, s105, exec_lo -// GFX11: encoding: [0x05,0x00,0x22,0xd6,0x69,0xd2,0xf8,0x01] - -v_sad_u8 v5, vcc_lo, ttmp15, v3 -// GFX11: encoding: [0x05,0x00,0x22,0xd6,0x6a,0xf6,0x0c,0x04] - -v_sad_u8 v5, vcc_hi, 0xaf123456, v255 -// GFX11: encoding: [0x05,0x00,0x22,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] - -v_sad_u8 v5, ttmp15, src_scc, ttmp15 -// GFX11: encoding: [0x05,0x00,0x22,0xd6,0x7b,0xfa,0xed,0x01] - -v_sad_u8 v5, m0, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x22,0xd6,0x7d,0xe0,0xf5,0x01] - -v_sad_u8 v5, exec_lo, -1, vcc_hi -// GFX11: encoding: [0x05,0x00,0x22,0xd6,0x7e,0x82,0xad,0x01] - -v_sad_u8 v5, exec_hi, null, vcc_lo -// GFX11: encoding: [0x05,0x00,0x22,0xd6,0x7f,0xf8,0xa8,0x01] - -v_sad_u8 v5, null, exec_lo, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x22,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] - -v_sad_u8 v5, -1, exec_hi, src_scc -// GFX11: encoding: [0x05,0x00,0x22,0xd6,0xc1,0xfe,0xf4,0x03] - -v_sad_u8 v5, 0.5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x22,0xd6,0xf0,0xfa,0xc0,0x03] - -v_sad_u8 v5, src_scc, vcc_lo, -1 -// GFX11: encoding: [0x05,0x00,0x22,0xd6,0xfd,0xd4,0x04,0x03] - -v_sad_u8 v255, 0xaf123456, vcc_hi, null clamp -// GFX11: encoding: [0xff,0x80,0x22,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] - -v_sat_pk_u8_i16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x01,0x01,0x00,0x00] - -v_sat_pk_u8_i16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0xff,0x01,0x00,0x00] - -v_sat_pk_u8_i16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x01,0x00,0x00,0x00] - -v_sat_pk_u8_i16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x69,0x00,0x00,0x00] - -v_sat_pk_u8_i16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x6a,0x00,0x00,0x00] - -v_sat_pk_u8_i16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x6b,0x00,0x00,0x00] - -v_sat_pk_u8_i16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x7b,0x00,0x00,0x00] - -v_sat_pk_u8_i16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x7d,0x00,0x00,0x00] - -v_sat_pk_u8_i16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x7e,0x00,0x00,0x00] - -v_sat_pk_u8_i16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x7f,0x00,0x00,0x00] - -v_sat_pk_u8_i16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x7c,0x00,0x00,0x00] - -v_sat_pk_u8_i16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0xc1,0x00,0x00,0x00] - -v_sat_pk_u8_i16_e64 v5, 0.5 -// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0xf0,0x00,0x00,0x00] - -v_sat_pk_u8_i16_e64 v5, src_scc -// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0xfd,0x00,0x00,0x00] - -v_sat_pk_u8_i16_e64 v255, 0xfe0b -// GFX11: encoding: [0xff,0x00,0xe2,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] - -v_sin_f16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x01,0x01,0x00,0x00] - -v_sin_f16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0xff,0x01,0x00,0x00] - -v_sin_f16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x01,0x00,0x00,0x00] - -v_sin_f16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x69,0x00,0x00,0x00] - -v_sin_f16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x6a,0x00,0x00,0x00] - -v_sin_f16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x6b,0x00,0x00,0x00] - -v_sin_f16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x7b,0x00,0x00,0x00] - -v_sin_f16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x7d,0x00,0x00,0x00] - -v_sin_f16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x7e,0x00,0x00,0x00] - -v_sin_f16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x7f,0x00,0x00,0x00] - -v_sin_f16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x7c,0x00,0x00,0x00] - -v_sin_f16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0xc1,0x00,0x00,0x00] - -v_sin_f16_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0xf0,0x00,0x00,0x08] - -v_sin_f16_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0xfd,0x00,0x00,0x10] - -v_sin_f16_e64 v255, -|0xfe0b| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xe0,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] - -v_sin_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x01,0x01,0x00,0x00] - -v_sin_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0xff,0x01,0x00,0x00] - -v_sin_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x01,0x00,0x00,0x00] - -v_sin_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x69,0x00,0x00,0x00] - -v_sin_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x6a,0x00,0x00,0x00] - -v_sin_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x6b,0x00,0x00,0x00] - -v_sin_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x7b,0x00,0x00,0x00] - -v_sin_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x7d,0x00,0x00,0x00] - -v_sin_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x7e,0x00,0x00,0x00] - -v_sin_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x7f,0x00,0x00,0x00] - -v_sin_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x7c,0x00,0x00,0x00] - -v_sin_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0xc1,0x00,0x00,0x00] - -v_sin_f32_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0xf0,0x00,0x00,0x08] - -v_sin_f32_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0xfd,0x00,0x00,0x10] - -v_sin_f32_e64 v255, -|0xaf123456| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xb5,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] - -v_sqrt_f16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x01,0x01,0x00,0x00] - -v_sqrt_f16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0xff,0x01,0x00,0x00] - -v_sqrt_f16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x01,0x00,0x00,0x00] - -v_sqrt_f16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x69,0x00,0x00,0x00] - -v_sqrt_f16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x6a,0x00,0x00,0x00] - -v_sqrt_f16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x6b,0x00,0x00,0x00] - -v_sqrt_f16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x7b,0x00,0x00,0x00] - -v_sqrt_f16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x7d,0x00,0x00,0x00] - -v_sqrt_f16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x7e,0x00,0x00,0x00] - -v_sqrt_f16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x7f,0x00,0x00,0x00] - -v_sqrt_f16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x7c,0x00,0x00,0x00] - -v_sqrt_f16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0xc1,0x00,0x00,0x00] - -v_sqrt_f16_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0xf0,0x00,0x00,0x08] - -v_sqrt_f16_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0xfd,0x00,0x00,0x10] - -v_sqrt_f16_e64 v255, -|0xfe0b| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xd5,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] - -v_sqrt_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x01,0x01,0x00,0x00] - -v_sqrt_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0xff,0x01,0x00,0x00] - -v_sqrt_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x01,0x00,0x00,0x00] - -v_sqrt_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x69,0x00,0x00,0x00] - -v_sqrt_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x6a,0x00,0x00,0x00] - -v_sqrt_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x6b,0x00,0x00,0x00] - -v_sqrt_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x7b,0x00,0x00,0x00] - -v_sqrt_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x7d,0x00,0x00,0x00] - -v_sqrt_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x7e,0x00,0x00,0x00] - -v_sqrt_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x7f,0x00,0x00,0x00] - -v_sqrt_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x7c,0x00,0x00,0x00] - -v_sqrt_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0xc1,0x00,0x00,0x00] - -v_sqrt_f32_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0xf0,0x00,0x00,0x08] - -v_sqrt_f32_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0xfd,0x00,0x00,0x10] - -v_sqrt_f32_e64 v255, -|0xaf123456| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xb3,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] - -v_sqrt_f64_e64 v[5:6], v[1:2] -// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0x01,0x01,0x00,0x00] - -v_sqrt_f64_e64 v[5:6], v[254:255] -// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0xfe,0x01,0x00,0x00] - -v_sqrt_f64_e64 v[5:6], s[2:3] -// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0x02,0x00,0x00,0x00] - -v_sqrt_f64_e64 v[5:6], s[104:105] -// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0x68,0x00,0x00,0x00] - -v_sqrt_f64_e64 v[5:6], vcc -// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0x6a,0x00,0x00,0x00] - -v_sqrt_f64_e64 v[5:6], ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0x7a,0x00,0x00,0x00] - -v_sqrt_f64_e64 v[5:6], exec -// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0x7e,0x00,0x00,0x00] - -v_sqrt_f64_e64 v[5:6], null -// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0x7c,0x00,0x00,0x00] - -v_sqrt_f64_e64 v[5:6], -1 -// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0xc1,0x00,0x00,0x00] - -v_sqrt_f64_e64 v[5:6], 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0xf0,0x00,0x00,0x08] - -v_sqrt_f64_e64 v[5:6], -|src_scc| mul:4 -// GFX11: encoding: [0x05,0x01,0xb4,0xd5,0xfd,0x00,0x00,0x30] - -v_sqrt_f64_e64 v[254:255], 0xaf123456 clamp div:2 -// GFX11: encoding: [0xfe,0x80,0xb4,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] - -v_sub_co_ci_u32_e64 v5, s6, v1, 0xaf123456, s3 -// W32: encoding: [0x05,0x06,0x21,0xd5,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64 v5, s6, v255, src_scc, s3 -// W32: encoding: [0x05,0x06,0x21,0xd5,0xff,0xfb,0x0d,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64 v5, s6, s105, s105, s3 -// W32: encoding: [0x05,0x06,0x21,0xd5,0x69,0xd2,0x0c,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64 v5, s6, vcc_lo, v2, s3 -// W32: encoding: [0x05,0x06,0x21,0xd5,0x6a,0x04,0x0e,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64 v5, s6, vcc_hi, v255, s3 -// W32: encoding: [0x05,0x06,0x21,0xd5,0x6b,0xfe,0x0f,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64 v5, s6, ttmp15, ttmp15, s3 -// W32: encoding: [0x05,0x06,0x21,0xd5,0x7b,0xf6,0x0c,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64 v5, s6, m0, 0.5, s3 -// W32: encoding: [0x05,0x06,0x21,0xd5,0x7d,0xe0,0x0d,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64 v5, s6, exec_lo, exec_lo, s3 -// W32: encoding: [0x05,0x06,0x21,0xd5,0x7e,0xfc,0x0c,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64 v5, s6, exec_hi, -1, s3 -// W32: encoding: [0x05,0x06,0x21,0xd5,0x7f,0x82,0x0d,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64 v5, s105, null, exec_hi, s105 -// W32: encoding: [0x05,0x69,0x21,0xd5,0x7c,0xfe,0xa4,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64 v5, vcc_lo, -1, m0, vcc_lo -// W32: encoding: [0x05,0x6a,0x21,0xd5,0xc1,0xfa,0xa8,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64 v5, vcc_hi, 0.5, vcc_lo, vcc_hi -// W32: encoding: [0x05,0x6b,0x21,0xd5,0xf0,0xd4,0xac,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64 v5, ttmp15, src_scc, null, ttmp15 -// W32: encoding: [0x05,0x7b,0x21,0xd5,0xfd,0xf8,0xec,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64 v5, s[12:13], v1, 0xaf123456, s[6:7] -// W64: encoding: [0x05,0x0c,0x21,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_sad_u8 v5, v255, s2, s105 +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0xff,0x05,0xa4,0x01] -v_sub_co_ci_u32_e64 v5, s[12:13], v255, src_scc, s[6:7] -// W64: encoding: [0x05,0x0c,0x21,0xd5,0xff,0xfb,0x19,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_sad_u8 v5, s1, v255, exec_hi +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0x01,0xfe,0xff,0x01] -v_sub_co_ci_u32_e64 v5, s[12:13], s105, s105, s[6:7] -// W64: encoding: [0x05,0x0c,0x21,0xd5,0x69,0xd2,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_sad_u8 v5, s105, s105, exec_lo +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0x69,0xd2,0xf8,0x01] -v_sub_co_ci_u32_e64 v5, s[12:13], vcc_lo, v2, s[6:7] -// W64: encoding: [0x05,0x0c,0x21,0xd5,0x6a,0x04,0x1a,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_sad_u8 v5, vcc_lo, ttmp15, v3 +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0x6a,0xf6,0x0c,0x04] -v_sub_co_ci_u32_e64 v5, s[12:13], vcc_hi, v255, s[6:7] -// W64: encoding: [0x05,0x0c,0x21,0xd5,0x6b,0xfe,0x1b,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_sad_u8 v5, vcc_hi, 0xaf123456, v255 +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -v_sub_co_ci_u32_e64 v5, s[12:13], ttmp15, ttmp15, s[6:7] -// W64: encoding: [0x05,0x0c,0x21,0xd5,0x7b,0xf6,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_sad_u8 v5, ttmp15, src_scc, ttmp15 +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0x7b,0xfa,0xed,0x01] -v_sub_co_ci_u32_e64 v5, s[12:13], m0, 0.5, s[6:7] -// W64: encoding: [0x05,0x0c,0x21,0xd5,0x7d,0xe0,0x19,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_sad_u8 v5, m0, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0x7d,0xe0,0xf5,0x01] -v_sub_co_ci_u32_e64 v5, s[12:13], exec_lo, exec_lo, s[6:7] -// W64: encoding: [0x05,0x0c,0x21,0xd5,0x7e,0xfc,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_sad_u8 v5, exec_lo, -1, vcc_hi +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0x7e,0x82,0xad,0x01] -v_sub_co_ci_u32_e64 v5, s[12:13], exec_hi, -1, s[6:7] -// W64: encoding: [0x05,0x0c,0x21,0xd5,0x7f,0x82,0x19,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_sad_u8 v5, exec_hi, null, vcc_lo +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0x7f,0xf8,0xa8,0x01] -v_sub_co_ci_u32_e64 v5, s[12:13], null, exec_hi, s[6:7] -// W64: encoding: [0x05,0x0c,0x21,0xd5,0x7c,0xfe,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_sad_u8 v5, null, exec_lo, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -v_sub_co_ci_u32_e64 v5, s[104:105], -1, m0, s[104:105] -// W64: encoding: [0x05,0x68,0x21,0xd5,0xc1,0xfa,0xa0,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_sad_u8 v5, -1, exec_hi, src_scc +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0xc1,0xfe,0xf4,0x03] -v_sub_co_ci_u32_e64 v5, vcc, 0.5, vcc_lo, vcc -// W64: encoding: [0x05,0x6a,0x21,0xd5,0xf0,0xd4,0xa8,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_sad_u8 v5, 0.5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0xf0,0xfa,0xc0,0x03] -v_sub_co_ci_u32_e64 v5, ttmp[14:15], src_scc, null, ttmp[14:15] -// W64: encoding: [0x05,0x7a,0x21,0xd5,0xfd,0xf8,0xe8,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_sad_u8 v5, src_scc, vcc_lo, -1 +// GFX11: encoding: [0x05,0x00,0x22,0xd6,0xfd,0xd4,0x04,0x03] -v_sub_co_ci_u32_e64 v255, null, 0xaf123456, vcc_hi, null clamp -// GFX11: encoding: [0xff,0xfc,0x21,0xd5,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +v_sad_u8 v255, 0xaf123456, vcc_hi, null clamp +// GFX11: encoding: [0xff,0x80,0x22,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] v_sub_co_u32 v5, s6, v1, v2 // W32: encoding: [0x05,0x06,0x01,0xd7,0x01,0x05,0x02,0x00] @@ -10781,96 +5702,6 @@ v_sub_co_u32 v5, ttmp[14:15], src_scc, vcc_lo v_sub_co_u32 v255, null, 0xaf123456, vcc_hi clamp // GFX11: encoding: [0xff,0xfc,0x01,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_sub_f16_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x33,0xd5,0x01,0x05,0x02,0x00] - -v_sub_f16_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x33,0xd5,0xff,0xff,0x03,0x00] - -v_sub_f16_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x33,0xd5,0x01,0x04,0x00,0x00] - -v_sub_f16_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x33,0xd5,0x69,0xd2,0x00,0x00] - -v_sub_f16_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x33,0xd5,0x6a,0xf6,0x00,0x00] - -v_sub_f16_e64 v5, vcc_hi, 0xfe0b -// GFX11: encoding: [0x05,0x00,0x33,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] - -v_sub_f16_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x33,0xd5,0x7b,0xfa,0x01,0x00] - -v_sub_f16_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x33,0xd5,0x7d,0xe0,0x01,0x00] - -v_sub_f16_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x33,0xd5,0x7e,0x82,0x01,0x00] - -v_sub_f16_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x33,0xd5,0x7f,0xf8,0x00,0x00] - -v_sub_f16_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x33,0xd5,0x7c,0xfc,0x00,0x00] - -v_sub_f16_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x33,0xd5,0xc1,0xfe,0x00,0x00] - -v_sub_f16_e64 v5, 0.5, -m0 mul:2 -// GFX11: encoding: [0x05,0x00,0x33,0xd5,0xf0,0xfa,0x00,0x48] - -v_sub_f16_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX11: encoding: [0x05,0x02,0x33,0xd5,0xfd,0xd4,0x00,0x30] - -v_sub_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 -// GFX11: encoding: [0xff,0x83,0x33,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] - -v_sub_f32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x04,0xd5,0x01,0x05,0x02,0x00] - -v_sub_f32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x04,0xd5,0xff,0xff,0x03,0x00] - -v_sub_f32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x04,0xd5,0x01,0x04,0x00,0x00] - -v_sub_f32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x04,0xd5,0x69,0xd2,0x00,0x00] - -v_sub_f32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x04,0xd5,0x6a,0xf6,0x00,0x00] - -v_sub_f32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x04,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_sub_f32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x04,0xd5,0x7b,0xfa,0x01,0x00] - -v_sub_f32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x04,0xd5,0x7d,0xe0,0x01,0x00] - -v_sub_f32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x04,0xd5,0x7e,0x82,0x01,0x00] - -v_sub_f32_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x04,0xd5,0x7f,0xf8,0x00,0x00] - -v_sub_f32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x04,0xd5,0x7c,0xfc,0x00,0x00] - -v_sub_f32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x04,0xd5,0xc1,0xfe,0x00,0x00] - -v_sub_f32_e64 v5, 0.5, -m0 mul:2 -// GFX11: encoding: [0x05,0x00,0x04,0xd5,0xf0,0xfa,0x00,0x48] - -v_sub_f32_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX11: encoding: [0x05,0x02,0x04,0xd5,0xfd,0xd4,0x00,0x30] - -v_sub_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 -// GFX11: encoding: [0xff,0x83,0x04,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] - v_sub_nc_i16 v5, v1, v2 // GFX11: encoding: [0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00] @@ -11006,158 +5837,6 @@ v_sub_nc_u16 v5, src_scc, vcc_lo op_sel:[0,1,0] v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp // GFX11: encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_sub_nc_u32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x01,0x05,0x02,0x00] - -v_sub_nc_u32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x26,0xd5,0xff,0xff,0x03,0x00] - -v_sub_nc_u32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x01,0x04,0x00,0x00] - -v_sub_nc_u32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x69,0xd2,0x00,0x00] - -v_sub_nc_u32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x6a,0xf6,0x00,0x00] - -v_sub_nc_u32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_sub_nc_u32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x7b,0xfa,0x01,0x00] - -v_sub_nc_u32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x7d,0xe0,0x01,0x00] - -v_sub_nc_u32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x7e,0x82,0x01,0x00] - -v_sub_nc_u32_e64 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x7f,0xf8,0x00,0x00] - -v_sub_nc_u32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x7c,0xfc,0x00,0x00] - -v_sub_nc_u32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x26,0xd5,0xc1,0xfe,0x00,0x00] - -v_sub_nc_u32_e64 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x26,0xd5,0xf0,0xfa,0x00,0x00] - -v_sub_nc_u32_e64 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x26,0xd5,0xfd,0xd4,0x00,0x00] - -v_sub_nc_u32_e64 v255, 0xaf123456, vcc_hi clamp -// GFX11: encoding: [0xff,0x80,0x26,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] - -v_subrev_co_ci_u32_e64 v5, s6, v1, 0xaf123456, s3 -// W32: encoding: [0x05,0x06,0x22,0xd5,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s6, v255, src_scc, s3 -// W32: encoding: [0x05,0x06,0x22,0xd5,0xff,0xfb,0x0d,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s6, s105, s105, s3 -// W32: encoding: [0x05,0x06,0x22,0xd5,0x69,0xd2,0x0c,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s6, vcc_lo, v2, s3 -// W32: encoding: [0x05,0x06,0x22,0xd5,0x6a,0x04,0x0e,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s6, vcc_hi, v255, s3 -// W32: encoding: [0x05,0x06,0x22,0xd5,0x6b,0xfe,0x0f,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s6, ttmp15, ttmp15, s3 -// W32: encoding: [0x05,0x06,0x22,0xd5,0x7b,0xf6,0x0c,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s6, m0, 0.5, s3 -// W32: encoding: [0x05,0x06,0x22,0xd5,0x7d,0xe0,0x0d,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s6, exec_lo, exec_lo, s3 -// W32: encoding: [0x05,0x06,0x22,0xd5,0x7e,0xfc,0x0c,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s6, exec_hi, -1, s3 -// W32: encoding: [0x05,0x06,0x22,0xd5,0x7f,0x82,0x0d,0x00] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s105, null, exec_hi, s105 -// W32: encoding: [0x05,0x69,0x22,0xd5,0x7c,0xfe,0xa4,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, vcc_lo, -1, m0, vcc_lo -// W32: encoding: [0x05,0x6a,0x22,0xd5,0xc1,0xfa,0xa8,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, vcc_hi, 0.5, vcc_lo, vcc_hi -// W32: encoding: [0x05,0x6b,0x22,0xd5,0xf0,0xd4,0xac,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, ttmp15, src_scc, null, ttmp15 -// W32: encoding: [0x05,0x7b,0x22,0xd5,0xfd,0xf8,0xec,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s[12:13], v1, 0xaf123456, s[6:7] -// W64: encoding: [0x05,0x0c,0x22,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s[12:13], v255, src_scc, s[6:7] -// W64: encoding: [0x05,0x0c,0x22,0xd5,0xff,0xfb,0x19,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s[12:13], s105, s105, s[6:7] -// W64: encoding: [0x05,0x0c,0x22,0xd5,0x69,0xd2,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s[12:13], vcc_lo, v2, s[6:7] -// W64: encoding: [0x05,0x0c,0x22,0xd5,0x6a,0x04,0x1a,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s[12:13], vcc_hi, v255, s[6:7] -// W64: encoding: [0x05,0x0c,0x22,0xd5,0x6b,0xfe,0x1b,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s[12:13], ttmp15, ttmp15, s[6:7] -// W64: encoding: [0x05,0x0c,0x22,0xd5,0x7b,0xf6,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s[12:13], m0, 0.5, s[6:7] -// W64: encoding: [0x05,0x0c,0x22,0xd5,0x7d,0xe0,0x19,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s[12:13], exec_lo, exec_lo, s[6:7] -// W64: encoding: [0x05,0x0c,0x22,0xd5,0x7e,0xfc,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s[12:13], exec_hi, -1, s[6:7] -// W64: encoding: [0x05,0x0c,0x22,0xd5,0x7f,0x82,0x19,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s[12:13], null, exec_hi, s[6:7] -// W64: encoding: [0x05,0x0c,0x22,0xd5,0x7c,0xfe,0x18,0x00] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, s[104:105], -1, m0, s[104:105] -// W64: encoding: [0x05,0x68,0x22,0xd5,0xc1,0xfa,0xa0,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, vcc, 0.5, vcc_lo, vcc -// W64: encoding: [0x05,0x6a,0x22,0xd5,0xf0,0xd4,0xa8,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v5, ttmp[14:15], src_scc, null, ttmp[14:15] -// W64: encoding: [0x05,0x7a,0x22,0xd5,0xfd,0xf8,0xe8,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64 v255, null, 0xaf123456, vcc_hi, null clamp -// GFX11: encoding: [0xff,0xfc,0x22,0xd5,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] - v_subrev_co_u32 v5, s6, v1, v2 // W32: encoding: [0x05,0x06,0x02,0xd7,0x01,0x05,0x02,0x00] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -11273,141 +5952,6 @@ v_subrev_co_u32 v5, ttmp[14:15], src_scc, vcc_lo v_subrev_co_u32 v255, null, 0xaf123456, vcc_hi clamp // GFX11: encoding: [0xff,0xfc,0x02,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_subrev_f16_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x34,0xd5,0x01,0x05,0x02,0x00] - -v_subrev_f16_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x34,0xd5,0xff,0xff,0x03,0x00] - -v_subrev_f16_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x34,0xd5,0x01,0x04,0x00,0x00] - -v_subrev_f16_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x34,0xd5,0x69,0xd2,0x00,0x00] - -v_subrev_f16_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x34,0xd5,0x6a,0xf6,0x00,0x00] - -v_subrev_f16_e64 v5, vcc_hi, 0xfe0b -// GFX11: encoding: [0x05,0x00,0x34,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] - -v_subrev_f16_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x34,0xd5,0x7b,0xfa,0x01,0x00] - -v_subrev_f16_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x34,0xd5,0x7d,0xe0,0x01,0x00] - -v_subrev_f16_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x34,0xd5,0x7e,0x82,0x01,0x00] - -v_subrev_f16_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x34,0xd5,0x7f,0xf8,0x00,0x00] - -v_subrev_f16_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x34,0xd5,0x7c,0xfc,0x00,0x00] - -v_subrev_f16_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x34,0xd5,0xc1,0xfe,0x00,0x00] - -v_subrev_f16_e64 v5, 0.5, -m0 mul:2 -// GFX11: encoding: [0x05,0x00,0x34,0xd5,0xf0,0xfa,0x00,0x48] - -v_subrev_f16_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX11: encoding: [0x05,0x02,0x34,0xd5,0xfd,0xd4,0x00,0x30] - -v_subrev_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 -// GFX11: encoding: [0xff,0x83,0x34,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] - -v_subrev_f32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x05,0xd5,0x01,0x05,0x02,0x00] - -v_subrev_f32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x05,0xd5,0xff,0xff,0x03,0x00] - -v_subrev_f32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x05,0xd5,0x01,0x04,0x00,0x00] - -v_subrev_f32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x05,0xd5,0x69,0xd2,0x00,0x00] - -v_subrev_f32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x05,0xd5,0x6a,0xf6,0x00,0x00] - -v_subrev_f32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x05,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_subrev_f32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x05,0xd5,0x7b,0xfa,0x01,0x00] - -v_subrev_f32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x05,0xd5,0x7d,0xe0,0x01,0x00] - -v_subrev_f32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x05,0xd5,0x7e,0x82,0x01,0x00] - -v_subrev_f32_e64 v5, |exec_hi|, null -// GFX11: encoding: [0x05,0x01,0x05,0xd5,0x7f,0xf8,0x00,0x00] - -v_subrev_f32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x05,0xd5,0x7c,0xfc,0x00,0x00] - -v_subrev_f32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x05,0xd5,0xc1,0xfe,0x00,0x00] - -v_subrev_f32_e64 v5, 0.5, -m0 mul:2 -// GFX11: encoding: [0x05,0x00,0x05,0xd5,0xf0,0xfa,0x00,0x48] - -v_subrev_f32_e64 v5, -src_scc, |vcc_lo| mul:4 -// GFX11: encoding: [0x05,0x02,0x05,0xd5,0xfd,0xd4,0x00,0x30] - -v_subrev_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 -// GFX11: encoding: [0xff,0x83,0x05,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] - -v_subrev_nc_u32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x01,0x05,0x02,0x00] - -v_subrev_nc_u32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x27,0xd5,0xff,0xff,0x03,0x00] - -v_subrev_nc_u32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x01,0x04,0x00,0x00] - -v_subrev_nc_u32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x69,0xd2,0x00,0x00] - -v_subrev_nc_u32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x6a,0xf6,0x00,0x00] - -v_subrev_nc_u32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_subrev_nc_u32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x7b,0xfa,0x01,0x00] - -v_subrev_nc_u32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x7d,0xe0,0x01,0x00] - -v_subrev_nc_u32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x7e,0x82,0x01,0x00] - -v_subrev_nc_u32_e64 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x7f,0xf8,0x00,0x00] - -v_subrev_nc_u32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x7c,0xfc,0x00,0x00] - -v_subrev_nc_u32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x27,0xd5,0xc1,0xfe,0x00,0x00] - -v_subrev_nc_u32_e64 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x27,0xd5,0xf0,0xfa,0x00,0x00] - -v_subrev_nc_u32_e64 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x27,0xd5,0xfd,0xd4,0x00,0x00] - -v_subrev_nc_u32_e64 v255, 0xaf123456, vcc_hi clamp -// GFX11: encoding: [0xff,0x80,0x27,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] - v_trig_preop_f64 v[5:6], v[1:2], v2 // GFX11: encoding: [0x05,0x00,0x2f,0xd7,0x01,0x05,0x02,0x00] @@ -11453,132 +5997,6 @@ v_trig_preop_f64 v[5:6], -|src_scc|, src_scc mul:4 v_trig_preop_f64 v[254:255], 0xaf123456, 0xaf123456 clamp div:2 // GFX11: encoding: [0xfe,0x80,0x2f,0xd7,0xff,0xfe,0x01,0x18,0x56,0x34,0x12,0xaf] -v_trunc_f16_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x01,0x01,0x00,0x00] - -v_trunc_f16_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0xff,0x01,0x00,0x00] - -v_trunc_f16_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x01,0x00,0x00,0x00] - -v_trunc_f16_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x69,0x00,0x00,0x00] - -v_trunc_f16_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x6a,0x00,0x00,0x00] - -v_trunc_f16_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x6b,0x00,0x00,0x00] - -v_trunc_f16_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x7b,0x00,0x00,0x00] - -v_trunc_f16_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x7d,0x00,0x00,0x00] - -v_trunc_f16_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x7e,0x00,0x00,0x00] - -v_trunc_f16_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x7f,0x00,0x00,0x00] - -v_trunc_f16_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x7c,0x00,0x00,0x00] - -v_trunc_f16_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0xc1,0x00,0x00,0x00] - -v_trunc_f16_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0xf0,0x00,0x00,0x08] - -v_trunc_f16_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0xfd,0x00,0x00,0x10] - -v_trunc_f16_e64 v255, -|0xfe0b| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xdd,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] - -v_trunc_f32_e64 v5, v1 -// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x01,0x01,0x00,0x00] - -v_trunc_f32_e64 v5, v255 -// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0xff,0x01,0x00,0x00] - -v_trunc_f32_e64 v5, s1 -// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x01,0x00,0x00,0x00] - -v_trunc_f32_e64 v5, s105 -// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x69,0x00,0x00,0x00] - -v_trunc_f32_e64 v5, vcc_lo -// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x6a,0x00,0x00,0x00] - -v_trunc_f32_e64 v5, vcc_hi -// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x6b,0x00,0x00,0x00] - -v_trunc_f32_e64 v5, ttmp15 -// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x7b,0x00,0x00,0x00] - -v_trunc_f32_e64 v5, m0 -// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x7d,0x00,0x00,0x00] - -v_trunc_f32_e64 v5, exec_lo -// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x7e,0x00,0x00,0x00] - -v_trunc_f32_e64 v5, exec_hi -// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x7f,0x00,0x00,0x00] - -v_trunc_f32_e64 v5, null -// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x7c,0x00,0x00,0x00] - -v_trunc_f32_e64 v5, -1 -// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0xc1,0x00,0x00,0x00] - -v_trunc_f32_e64 v5, 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0xf0,0x00,0x00,0x08] - -v_trunc_f32_e64 v5, src_scc mul:4 -// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0xfd,0x00,0x00,0x10] - -v_trunc_f32_e64 v255, -|0xaf123456| clamp div:2 -// GFX11: encoding: [0xff,0x81,0xa1,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] - -v_trunc_f64_e64 v[5:6], v[1:2] -// GFX11: encoding: [0x05,0x00,0x97,0xd5,0x01,0x01,0x00,0x00] - -v_trunc_f64_e64 v[5:6], v[254:255] -// GFX11: encoding: [0x05,0x00,0x97,0xd5,0xfe,0x01,0x00,0x00] - -v_trunc_f64_e64 v[5:6], s[2:3] -// GFX11: encoding: [0x05,0x00,0x97,0xd5,0x02,0x00,0x00,0x00] - -v_trunc_f64_e64 v[5:6], s[104:105] -// GFX11: encoding: [0x05,0x00,0x97,0xd5,0x68,0x00,0x00,0x00] - -v_trunc_f64_e64 v[5:6], vcc -// GFX11: encoding: [0x05,0x00,0x97,0xd5,0x6a,0x00,0x00,0x00] - -v_trunc_f64_e64 v[5:6], ttmp[14:15] -// GFX11: encoding: [0x05,0x00,0x97,0xd5,0x7a,0x00,0x00,0x00] - -v_trunc_f64_e64 v[5:6], exec -// GFX11: encoding: [0x05,0x00,0x97,0xd5,0x7e,0x00,0x00,0x00] - -v_trunc_f64_e64 v[5:6], null -// GFX11: encoding: [0x05,0x00,0x97,0xd5,0x7c,0x00,0x00,0x00] - -v_trunc_f64_e64 v[5:6], -1 -// GFX11: encoding: [0x05,0x00,0x97,0xd5,0xc1,0x00,0x00,0x00] - -v_trunc_f64_e64 v[5:6], 0.5 mul:2 -// GFX11: encoding: [0x05,0x00,0x97,0xd5,0xf0,0x00,0x00,0x08] - -v_trunc_f64_e64 v[5:6], -|src_scc| mul:4 -// GFX11: encoding: [0x05,0x01,0x97,0xd5,0xfd,0x00,0x00,0x30] - -v_trunc_f64_e64 v[254:255], 0xaf123456 clamp div:2 -// GFX11: encoding: [0xfe,0x80,0x97,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] - v_writelane_b32 v5, s1, s2 // GFX11: encoding: [0x05,0x00,0x61,0xd7,0x01,0x04,0x00,0x00] @@ -11663,51 +6081,6 @@ v_xad_u32 v5, src_scc, vcc_lo, -1 v_xad_u32 v255, 0xaf123456, vcc_hi, null // GFX11: encoding: [0xff,0x00,0x45,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -v_xnor_b32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x01,0x05,0x02,0x00] - -v_xnor_b32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0xff,0xff,0x03,0x00] - -v_xnor_b32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x01,0x04,0x00,0x00] - -v_xnor_b32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x69,0xd2,0x00,0x00] - -v_xnor_b32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x6a,0xf6,0x00,0x00] - -v_xnor_b32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_xnor_b32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x7b,0xfa,0x01,0x00] - -v_xnor_b32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x7d,0xe0,0x01,0x00] - -v_xnor_b32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x7e,0x82,0x01,0x00] - -v_xnor_b32_e64 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x7f,0xf8,0x00,0x00] - -v_xnor_b32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x7c,0xfc,0x00,0x00] - -v_xnor_b32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0xc1,0xfe,0x00,0x00] - -v_xnor_b32_e64 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0xf0,0xfa,0x00,0x00] - -v_xnor_b32_e64 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0xfd,0xd4,0x00,0x00] - -v_xnor_b32_e64 v255, 0xaf123456, vcc_hi -// GFX11: encoding: [0xff,0x00,0x1e,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] - v_xor3_b32 v5, v1, v2, s3 // GFX11: encoding: [0x05,0x00,0x40,0xd6,0x01,0x05,0x0e,0x00] @@ -11797,48 +6170,3 @@ v_xor_b16 v5, src_scc, vcc_lo v_xor_b16 v255, 0xfe0b, vcc_hi // GFX11: encoding: [0xff,0x00,0x64,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] - -v_xor_b32_e64 v5, v1, v2 -// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x01,0x05,0x02,0x00] - -v_xor_b32_e64 v5, v255, v255 -// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0xff,0xff,0x03,0x00] - -v_xor_b32_e64 v5, s1, s2 -// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x01,0x04,0x00,0x00] - -v_xor_b32_e64 v5, s105, s105 -// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x69,0xd2,0x00,0x00] - -v_xor_b32_e64 v5, vcc_lo, ttmp15 -// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x6a,0xf6,0x00,0x00] - -v_xor_b32_e64 v5, vcc_hi, 0xaf123456 -// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] - -v_xor_b32_e64 v5, ttmp15, src_scc -// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x7b,0xfa,0x01,0x00] - -v_xor_b32_e64 v5, m0, 0.5 -// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x7d,0xe0,0x01,0x00] - -v_xor_b32_e64 v5, exec_lo, -1 -// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x7e,0x82,0x01,0x00] - -v_xor_b32_e64 v5, exec_hi, null -// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x7f,0xf8,0x00,0x00] - -v_xor_b32_e64 v5, null, exec_lo -// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x7c,0xfc,0x00,0x00] - -v_xor_b32_e64 v5, -1, exec_hi -// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0xc1,0xfe,0x00,0x00] - -v_xor_b32_e64 v5, 0.5, m0 -// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0xf0,0xfa,0x00,0x00] - -v_xor_b32_e64 v5, src_scc, vcc_lo -// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0xfd,0xd4,0x00,0x00] - -v_xor_b32_e64 v255, 0xaf123456, vcc_hi -// GFX11: encoding: [0xff,0x00,0x1d,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s index 0ae183f504875..dd8f465dc0a5c 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s @@ -45,113 +45,6 @@ v_add3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ v_add3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0x55,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 quad_perm:[3,2,1,0] -// W32: [0x05,0x06,0x20,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 quad_perm:[0,1,2,3] -// W32: [0x05,0x06,0x20,0xd5,0xfa,0x04,0x0e,0x00,0x01,0xe4,0x00,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_mirror -// W32: [0x05,0x06,0x20,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_half_mirror -// W32: [0x05,0x06,0x20,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shl:1 -// W32: [0x05,0x06,0x20,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x01,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shl:15 -// W32: [0x05,0x06,0x20,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x0f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shr:1 -// W32: [0x05,0x06,0x20,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x11,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shr:15 -// W32: [0x05,0x06,0x20,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x1f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_ror:1 -// W32: [0x05,0x06,0x20,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x21,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s105, v1, v2, s105 row_ror:15 -// W32: [0x05,0x69,0x20,0xd5,0xfa,0x04,0xa6,0x01,0x01,0x2f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf -// W32: [0x05,0x6a,0x20,0xd5,0xfa,0x04,0xae,0x01,0x01,0x50,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, vcc_hi, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 -// W32: [0x05,0x6b,0x20,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, ttmp15, v1, v2, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// W32: [0x05,0x7b,0x20,0xd5,0xfa,0x04,0xee,0x01,0x01,0x60,0x09,0x13] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[3,2,1,0] -// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[0,1,2,3] -// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_mirror -// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_half_mirror -// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:1 -// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:15 -// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:1 -// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:15 -// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:1 -// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:15 -// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf -// W64: [0x05,0x68,0x20,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 -// W64: [0x05,0x6a,0x20,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// W64: [0x05,0x7a,0x20,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x09,0x13] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0xfc,0x20,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] - v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -259,90 +152,6 @@ v_add_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask: v_add_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0xfc,0x00,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -v_add_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_add_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_add_f16_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_add_f16_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_add_f16_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_add_f16_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_add_f16_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_add_f16_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_add_f16_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_add_f16_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_add_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_add_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x01,0x32,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] - -v_add_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x02,0x32,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] - -v_add_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x83,0x32,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] - -v_add_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_add_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_add_f32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_add_f32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_add_f32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_add_f32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_add_f32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_add_f32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_add_f32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_add_f32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_add_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_add_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x01,0x03,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] - -v_add_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x02,0x03,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] - -v_add_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x83,0x03,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] - v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -511,48 +320,6 @@ v_add_nc_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctr v_add_nc_u16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x80,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -v_add_nc_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_add_nc_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_add_nc_u32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_add_nc_u32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_add_nc_u32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_add_nc_u32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_add_nc_u32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_add_nc_u32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_add_nc_u32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_add_nc_u32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_add_nc_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_add_nc_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_add_nc_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] - -v_add_nc_u32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x80,0x25,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - v_alignbit_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -679,48 +446,6 @@ v_and_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 v_and_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -v_and_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_and_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_and_b32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_and_b32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_and_b32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_and_b32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_and_b32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_and_b32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_and_b32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_and_b32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_and_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_and_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_and_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] - -v_and_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0x1b,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -805,48 +530,6 @@ v_ashrrev_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ct v_ashrrev_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0x3a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -v_ashrrev_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_ashrrev_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_ashrrev_i32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_ashrrev_i32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_ashrrev_i32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_ashrrev_i32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_ashrrev_i32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_ashrrev_i32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_ashrrev_i32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_ashrrev_i32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_ashrrev_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_ashrrev_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_ashrrev_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] - -v_ashrrev_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0x1a,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -1057,216 +740,6 @@ v_bfm_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 v_bfm_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0x1d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -v_bfrev_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_bfrev_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_bfrev_b32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_bfrev_b32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_bfrev_b32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_bfrev_b32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_bfrev_b32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_bfrev_b32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_bfrev_b32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_bfrev_b32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_bfrev_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_bfrev_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_bfrev_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_bfrev_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] - -v_ceil_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_ceil_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_ceil_f16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_ceil_f16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_ceil_f16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_ceil_f16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_ceil_f16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_ceil_f16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_ceil_f16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_ceil_f16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_ceil_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_ceil_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_ceil_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_ceil_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xdc,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_ceil_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_ceil_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_ceil_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_ceil_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_ceil_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_ceil_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_ceil_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_ceil_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_ceil_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_ceil_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_ceil_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_ceil_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_ceil_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_ceil_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xa2,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_cls_i32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cls_i32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cls_i32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cls_i32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cls_i32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cls_i32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cls_i32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cls_i32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cls_i32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cls_i32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cls_i32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cls_i32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_cls_i32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_cls_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] - -v_clz_i32_u32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_clz_i32_u32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_clz_i32_u32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_clz_i32_u32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_clz_i32_u32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_clz_i32_u32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_clz_i32_u32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_clz_i32_u32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_clz_i32_u32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_clz_i32_u32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_clz_i32_u32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_clz_i32_u32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_clz_i32_u32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_clz_i32_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] - v_cndmask_b16_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1374,301 +847,68 @@ v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_m v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x03,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x0d,0x30] -v_cndmask_b32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] -// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cndmask_b32_e64_dpp v5, v1, v2, s3 quad_perm:[0,1,2,3] -// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x0e,0x00,0x01,0xe4,0x00,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_cndmask_b32_e64_dpp v5, v1, v2, s3 row_mirror -// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_cndmask_b32_e64_dpp v5, v1, v2, s3 row_half_mirror -// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cubeid_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -v_cndmask_b32_e64_dpp v5, v1, v2, s3 row_shl:1 -// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x01,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cubeid_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -v_cndmask_b32_e64_dpp v5, v1, v2, s3 row_shl:15 -// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x0f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cubeid_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -v_cndmask_b32_e64_dpp v5, v1, v2, s3 row_shr:1 -// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x11,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cubeid_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -v_cndmask_b32_e64_dpp v5, v1, v2, s3 row_shr:15 -// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x1f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cubeid_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x0c,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -v_cndmask_b32_e64_dpp v5, v1, v2, s3 row_ror:1 -// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x21,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cubeid_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x0c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -v_cndmask_b32_e64_dpp v5, v1, v2, s105 row_ror:15 -// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0xa6,0x01,0x01,0x2f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cubeid_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x0c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -v_cndmask_b32_e64_dpp v5, v1, v2, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf -// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0xae,0x01,0x01,0x50,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cubeid_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x0c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -v_cndmask_b32_e64_dpp v5, |v1|, -v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 -// W32: [0x05,0x01,0x01,0xd5,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cubeid_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x0c,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] -v_cndmask_b32_e64_dpp v5, -v1, |v2|, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// W32: [0x05,0x02,0x01,0xd5,0xfa,0x04,0xee,0x21,0x01,0x60,0x09,0x13] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cubeid_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x0c,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] -v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] quad_perm:[3,2,1,0] -// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cubeid_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x0c,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] -v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] quad_perm:[0,1,2,3] -// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_mirror -// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_half_mirror -// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cubema_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_shl:1 -// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cubema_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_shl:15 -// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cubema_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_shr:1 -// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cubema_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_shr:15 -// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_ror:1 -// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_ror:15 -// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf -// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, |v1|, -v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 -// W64: [0x05,0x01,0x01,0xd5,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, -v1, |v2|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// W64: [0x05,0x02,0x01,0xd5,0xfa,0x04,0xea,0x21,0x01,0x60,0x09,0x13] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v255, -|v255|, -|v255|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x03,0x01,0xd5,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x0d,0x30] - -v_cos_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cos_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cos_f16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cos_f16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cos_f16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cos_f16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cos_f16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cos_f16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cos_f16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cos_f16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cos_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cos_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_cos_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_cos_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xe1,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_cos_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cos_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cos_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cos_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cos_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cos_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cos_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cos_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cos_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cos_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cos_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cos_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_cos_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_cos_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xb6,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_ctz_i32_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_ctz_i32_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_ctz_i32_b32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_ctz_i32_b32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_ctz_i32_b32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_ctz_i32_b32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_ctz_i32_b32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_ctz_i32_b32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_ctz_i32_b32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_ctz_i32_b32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_ctz_i32_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_ctz_i32_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_ctz_i32_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_ctz_i32_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] - -v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX11: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -v_cubeid_f32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX11: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -v_cubeid_f32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX11: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -v_cubeid_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX11: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -v_cubeid_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX11: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -v_cubeid_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX11: [0x05,0x01,0x0c,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] - -v_cubeid_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX11: [0x05,0x02,0x0c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] - -v_cubeid_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 -// GFX11: [0x05,0x04,0x0c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] - -v_cubeid_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x03,0x0c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] - -v_cubeid_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x05,0x0c,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] - -v_cubeid_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x06,0x0c,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] - -v_cubeid_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x87,0x0c,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] - -v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -v_cubema_f32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX11: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -v_cubema_f32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX11: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -v_cubema_f32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX11: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -v_cubema_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX11: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -v_cubema_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX11: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_cubema_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] v_cubema_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 // GFX11: [0x05,0x01,0x0f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] @@ -1676,902 +916,104 @@ v_cubema_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 v_cubema_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 // GFX11: [0x05,0x02,0x0f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -v_cubema_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 -// GFX11: [0x05,0x04,0x0f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] - -v_cubema_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x03,0x0f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] - -v_cubema_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x05,0x0f,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] - -v_cubema_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x06,0x0f,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] - -v_cubema_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x87,0x0f,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] - -v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -v_cubesc_f32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -v_cubesc_f32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -v_cubesc_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -v_cubesc_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -v_cubesc_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX11: [0x05,0x01,0x0d,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] - -v_cubesc_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX11: [0x05,0x02,0x0d,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] - -v_cubesc_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 -// GFX11: [0x05,0x04,0x0d,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] - -v_cubesc_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x03,0x0d,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] - -v_cubesc_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x05,0x0d,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] - -v_cubesc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x06,0x0d,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] - -v_cubesc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x87,0x0d,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] - -v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -v_cubetc_f32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -v_cubetc_f32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -v_cubetc_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -v_cubetc_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -v_cubetc_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX11: [0x05,0x01,0x0e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] - -v_cubetc_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX11: [0x05,0x02,0x0e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] - -v_cubetc_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 -// GFX11: [0x05,0x04,0x0e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] - -v_cubetc_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x03,0x0e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] - -v_cubetc_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x05,0x0e,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] - -v_cubetc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x06,0x0e,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] - -v_cubetc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x87,0x0e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] - -v_cvt_f16_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_f16_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_f16_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_f16_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_f16_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_f16_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_f16_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_f16_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_f16_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_f16_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_f16_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_f16_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_cvt_f16_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_cvt_f16_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_cvt_f16_i16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_f16_i16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_f16_i16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_f16_i16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_f16_i16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_f16_i16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_f16_i16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_f16_i16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_f16_i16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_f16_i16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_f16_i16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_f16_i16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_cvt_f16_i16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_cvt_f16_i16_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x80,0xd1,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] - -v_cvt_f16_u16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_f16_u16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_f16_u16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_f16_u16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_f16_u16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_f16_u16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_f16_u16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_f16_u16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_f16_u16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_f16_u16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_f16_u16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_f16_u16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_cvt_f16_u16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_cvt_f16_u16_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x80,0xd0,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] - -v_cvt_f32_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_f32_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_f32_f16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_f32_f16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_f32_f16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_f32_f16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_f32_f16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_f32_f16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_f32_f16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_f32_f16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_f32_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_f32_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_cvt_f32_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_cvt_f32_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_cvt_f32_i32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_f32_i32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_f32_i32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_f32_i32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_f32_i32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_f32_i32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_f32_i32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_f32_i32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_f32_i32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_f32_i32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_f32_i32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_f32_i32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_cvt_f32_i32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_cvt_f32_i32_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x80,0x85,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] - -v_cvt_f32_u32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_f32_u32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_f32_u32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_f32_u32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_f32_u32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_f32_u32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_f32_u32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_f32_u32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_f32_u32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_f32_u32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_f32_u32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_f32_u32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_cvt_f32_u32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_cvt_f32_u32_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x80,0x86,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] - -v_cvt_f32_ubyte0_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_f32_ubyte0_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_f32_ubyte0_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_f32_ubyte0_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_f32_ubyte0_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_f32_ubyte0_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_f32_ubyte0_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_f32_ubyte0_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_f32_ubyte0_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_f32_ubyte0_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_f32_ubyte0_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_f32_ubyte0_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_cvt_f32_ubyte0_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_cvt_f32_ubyte0_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x80,0x91,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] - -v_cvt_f32_ubyte1_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_f32_ubyte1_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_f32_ubyte1_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_f32_ubyte1_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_f32_ubyte1_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_f32_ubyte1_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_f32_ubyte1_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_f32_ubyte1_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_f32_ubyte1_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_f32_ubyte1_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_f32_ubyte1_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_f32_ubyte1_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_cvt_f32_ubyte1_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_cvt_f32_ubyte1_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x80,0x92,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] - -v_cvt_f32_ubyte2_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_f32_ubyte2_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_f32_ubyte2_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_f32_ubyte2_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_f32_ubyte2_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_f32_ubyte2_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_f32_ubyte2_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_f32_ubyte2_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_f32_ubyte2_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_f32_ubyte2_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_f32_ubyte2_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_f32_ubyte2_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_cvt_f32_ubyte2_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_cvt_f32_ubyte2_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x80,0x93,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] - -v_cvt_f32_ubyte3_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_f32_ubyte3_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_f32_ubyte3_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_f32_ubyte3_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_f32_ubyte3_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_f32_ubyte3_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_f32_ubyte3_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_f32_ubyte3_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_f32_ubyte3_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_f32_ubyte3_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_f32_ubyte3_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_f32_ubyte3_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_cvt_f32_ubyte3_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_cvt_f32_ubyte3_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x80,0x94,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] - -v_cvt_floor_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_floor_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_floor_i32_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_floor_i32_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_floor_i32_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_floor_i32_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_floor_i32_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_floor_i32_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_floor_i32_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_floor_i32_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_floor_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_floor_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_cvt_floor_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_cvt_floor_i32_f32_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x01,0x8d,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] - -v_cvt_flr_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_flr_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_flr_i32_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_flr_i32_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_flr_i32_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_flr_i32_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_flr_i32_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_flr_i32_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_flr_i32_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_flr_i32_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_flr_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_flr_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_cvt_flr_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_cvt_flr_i32_f32_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x01,0x8d,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] - -v_cvt_i16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_i16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_i16_f16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_i16_f16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_i16_f16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_i16_f16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_i16_f16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_i16_f16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_i16_f16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_i16_f16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_i16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_i16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_cvt_i16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_cvt_i16_f16_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xd3,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] - -v_cvt_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_i32_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_i32_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_i32_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_i32_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_i32_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_i32_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_i32_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_i32_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_cvt_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_cvt_i32_f32_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0x88,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] - -v_cvt_i32_i16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_i32_i16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_i32_i16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_i32_i16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_i32_i16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_i32_i16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_i32_i16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_i32_i16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_i32_i16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_i32_i16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_i32_i16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_i32_i16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_cvt_i32_i16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_cvt_i32_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] - -v_cvt_nearest_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_nearest_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_cvt_nearest_i32_f32_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x01,0x8c,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] - -v_cvt_norm_i16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_norm_i16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_norm_i16_f16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_norm_i16_f16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_norm_i16_f16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_norm_i16_f16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_norm_i16_f16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_norm_i16_f16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_norm_i16_f16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_norm_i16_f16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +v_cubema_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x0f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -v_cvt_norm_i16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +v_cubema_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x0f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -v_cvt_norm_i16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +v_cubema_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x0f,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] -v_cvt_norm_i16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] +v_cubema_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x0f,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] -v_cvt_norm_i16_f16_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x01,0xe3,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] +v_cubema_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x0f,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] -v_cvt_norm_u16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_norm_u16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_cvt_norm_u16_f16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_cvt_norm_u16_f16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +v_cubesc_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -v_cvt_norm_u16_f16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +v_cubesc_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -v_cvt_norm_u16_f16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +v_cubesc_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -v_cvt_norm_u16_f16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +v_cubesc_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -v_cvt_norm_u16_f16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +v_cubesc_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x0d,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -v_cvt_norm_u16_f16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +v_cubesc_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x0d,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -v_cvt_norm_u16_f16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +v_cubesc_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x0d,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -v_cvt_norm_u16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +v_cubesc_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x0d,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -v_cvt_norm_u16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +v_cubesc_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x0d,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] -v_cvt_norm_u16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] +v_cubesc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x0d,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] -v_cvt_norm_u16_f16_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x01,0xe4,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] +v_cubesc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x0d,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] -v_cvt_off_f32_i4_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_off_f32_i4_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_cvt_off_f32_i4_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_cvt_off_f32_i4_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +v_cubetc_f32_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -v_cvt_off_f32_i4_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +v_cubetc_f32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -v_cvt_off_f32_i4_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +v_cubetc_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -v_cvt_off_f32_i4_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +v_cubetc_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -v_cvt_off_f32_i4_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +v_cubetc_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 +// GFX11: [0x05,0x01,0x0e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -v_cvt_off_f32_i4_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +v_cubetc_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 +// GFX11: [0x05,0x02,0x0e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -v_cvt_off_f32_i4_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +v_cubetc_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 +// GFX11: [0x05,0x04,0x0e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -v_cvt_off_f32_i4_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +v_cubetc_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x0e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -v_cvt_off_f32_i4_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +v_cubetc_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x0e,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] -v_cvt_off_f32_i4_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] +v_cubetc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x0e,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] -v_cvt_off_f32_i4_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x80,0x8e,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] +v_cubetc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x0e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -2741,48 +1183,6 @@ v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0 v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x03,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_cvt_pk_rtz_f16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x01,0x2f,0xd5,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] - -v_cvt_pk_rtz_f16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x02,0x2f,0xd5,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] - -v_cvt_pk_rtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x83,0x2f,0xd5,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] - v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -3077,216 +1477,6 @@ v_cvt_pknorm_u16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x v_cvt_pknorm_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x03,0x22,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_cvt_pkrtz_f16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x01,0x2f,0xd5,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] - -v_cvt_pkrtz_f16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x02,0x2f,0xd5,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] - -v_cvt_pkrtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x83,0x2f,0xd5,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] - -v_cvt_rpi_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_rpi_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_cvt_rpi_i32_f32_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x01,0x8c,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] - -v_cvt_u16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_u16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_u16_f16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_u16_f16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_u16_f16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_u16_f16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_u16_f16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_u16_f16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_u16_f16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_u16_f16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_u16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_u16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_cvt_u16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_cvt_u16_f16_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xd2,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] - -v_cvt_u32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_u32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_u32_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_u32_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_u32_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_u32_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_u32_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_u32_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_u32_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_u32_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_u32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_u32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_cvt_u32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_cvt_u32_f32_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0x87,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] - -v_cvt_u32_u16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_cvt_u32_u16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_cvt_u32_u16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_cvt_u32_u16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_cvt_u32_u16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_cvt_u32_u16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_cvt_u32_u16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_cvt_u32_u16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_cvt_u32_u16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_cvt_u32_u16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_cvt_u32_u16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_cvt_u32_u16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_cvt_u32_u16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_cvt_u32_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] - v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -3317,311 +1507,17 @@ v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 // GFX11: [0x05,0x04,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x03,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] - -v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x05,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] - -v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x06,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] - -v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x87,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] - -v_exp_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_exp_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_exp_f16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_exp_f16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_exp_f16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_exp_f16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_exp_f16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_exp_f16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_exp_f16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_exp_f16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_exp_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_exp_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_exp_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_exp_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xd8,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_exp_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_exp_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_exp_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_exp_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_exp_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_exp_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_exp_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_exp_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_exp_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_exp_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_exp_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_exp_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_exp_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_exp_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xa5,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_ffbh_i32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_ffbh_i32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_ffbh_i32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_ffbh_i32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_ffbh_i32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_ffbh_i32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_ffbh_i32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_ffbh_i32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_ffbh_i32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_ffbh_i32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_ffbh_i32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_ffbh_i32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_ffbh_i32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_ffbh_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] - -v_ffbh_u32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_ffbh_u32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_ffbh_u32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_ffbh_u32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_ffbh_u32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_ffbh_u32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_ffbh_u32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_ffbh_u32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_ffbh_u32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_ffbh_u32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_ffbh_u32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_ffbh_u32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_ffbh_u32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_ffbh_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] - -v_ffbl_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_ffbl_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_ffbl_b32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_ffbl_b32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_ffbl_b32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_ffbl_b32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_ffbl_b32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_ffbl_b32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_ffbl_b32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_ffbl_b32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_ffbl_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_ffbl_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_ffbl_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_ffbl_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] - -v_floor_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_floor_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_floor_f16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_floor_f16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_floor_f16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_floor_f16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_floor_f16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_floor_f16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_floor_f16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_floor_f16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_floor_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_floor_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_floor_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_floor_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xdb,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_floor_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_floor_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_floor_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_floor_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_floor_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_floor_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_floor_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_floor_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_floor_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_floor_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_floor_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -v_floor_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -v_floor_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] +v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] -v_floor_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xa4,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -3707,300 +1603,6 @@ v_fma_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask v_fma_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x87,0x13,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] -v_fract_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_fract_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_fract_f16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_fract_f16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_fract_f16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_fract_f16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_fract_f16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_fract_f16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_fract_f16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_fract_f16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_fract_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_fract_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_fract_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_fract_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xdf,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_fract_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_fract_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_fract_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_fract_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_fract_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_fract_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_fract_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_fract_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_fract_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_fract_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_fract_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_fract_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_fract_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_fract_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xa0,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_frexp_exp_i16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_frexp_exp_i16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_frexp_exp_i16_f16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_frexp_exp_i16_f16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_frexp_exp_i16_f16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_frexp_exp_i16_f16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_frexp_exp_i16_f16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_frexp_exp_i16_f16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_frexp_exp_i16_f16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_frexp_exp_i16_f16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_frexp_exp_i16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_frexp_exp_i16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_frexp_exp_i16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_frexp_exp_i16_f16_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x01,0xda,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] - -v_frexp_exp_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_frexp_exp_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_frexp_exp_i32_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_frexp_exp_i32_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_frexp_exp_i32_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_frexp_exp_i32_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_frexp_exp_i32_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_frexp_exp_i32_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_frexp_exp_i32_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_frexp_exp_i32_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_frexp_exp_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_frexp_exp_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_frexp_exp_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_frexp_exp_i32_f32_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x01,0xbf,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] - -v_frexp_mant_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_frexp_mant_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_frexp_mant_f16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_frexp_mant_f16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_frexp_mant_f16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_frexp_mant_f16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_frexp_mant_f16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_frexp_mant_f16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_frexp_mant_f16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_frexp_mant_f16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_frexp_mant_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_frexp_mant_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_frexp_mant_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_frexp_mant_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xd9,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_frexp_mant_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_frexp_mant_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_frexp_mant_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_frexp_mant_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_frexp_mant_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_frexp_mant_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_frexp_mant_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_frexp_mant_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_frexp_mant_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_frexp_mant_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_frexp_mant_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_frexp_mant_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_frexp_mant_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_frexp_mant_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xc0,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_ldexp_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_ldexp_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_ldexp_f16_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_ldexp_f16_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_ldexp_f16_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_ldexp_f16_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_ldexp_f16_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_ldexp_f16_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_ldexp_f16_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_ldexp_f16_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_ldexp_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_ldexp_f16_e64_dpp v5, v1, v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x08,0x01,0x5f,0x01,0x01] - -v_ldexp_f16_e64_dpp v5, v1, v2 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x10,0x01,0x60,0x09,0x13] - -v_ldexp_f16_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0x3b,0xd5,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x0d,0x30] - v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -4085,90 +1687,6 @@ v_lerp_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_c v_lerp_u8_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0x15,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -v_log_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_log_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_log_f16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_log_f16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_log_f16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_log_f16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_log_f16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_log_f16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_log_f16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_log_f16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_log_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_log_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_log_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_log_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xd7,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_log_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_log_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_log_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_log_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_log_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_log_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_log_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_log_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_log_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_log_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_log_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_log_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_log_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_log_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xa7,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -4295,48 +1813,6 @@ v_lshlrev_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ct v_lshlrev_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0x38,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -v_lshlrev_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_lshlrev_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_lshlrev_b32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_lshlrev_b32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_lshlrev_b32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_lshlrev_b32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_lshlrev_b32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_lshlrev_b32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_lshlrev_b32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_lshlrev_b32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_lshlrev_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_lshlrev_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_lshlrev_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] - -v_lshlrev_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0x18,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - v_lshrrev_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -4379,48 +1855,6 @@ v_lshrrev_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ct v_lshrrev_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0x39,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -v_lshrrev_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_lshrrev_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_lshrrev_b32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_lshrrev_b32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_lshrrev_b32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_lshrrev_b32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_lshrrev_b32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_lshrrev_b32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_lshrrev_b32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_lshrrev_b32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_lshrrev_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_lshrrev_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_lshrrev_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] - -v_lshrrev_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0x19,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -4925,90 +2359,6 @@ v_max3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ v_max3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0x1e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -v_max_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_max_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_max_f16_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_max_f16_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_max_f16_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_max_f16_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_max_f16_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_max_f16_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_max_f16_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_max_f16_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_max_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_max_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x01,0x39,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] - -v_max_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x02,0x39,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] - -v_max_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x83,0x39,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] - -v_max_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_max_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_max_f32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_max_f32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_max_f32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_max_f32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_max_f32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_max_f32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_max_f32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_max_f32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_max_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_max_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x01,0x10,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] - -v_max_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x02,0x10,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] - -v_max_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x83,0x10,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] - v_max_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -5051,48 +2401,6 @@ v_max_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 v_max_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0x0a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -v_max_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_max_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_max_i32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_max_i32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_max_i32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_max_i32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_max_i32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_max_i32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_max_i32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_max_i32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_max_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_max_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_max_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] - -v_max_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0x12,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - v_max_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -5135,48 +2443,6 @@ v_max_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 v_max_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0x09,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -v_max_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_max_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_max_u32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_max_u32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_max_u32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_max_u32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_max_u32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_max_u32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_max_u32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_max_u32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_max_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_max_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_max_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] - -v_max_u32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0x14,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - v_maxmin_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -5918,104 +3184,20 @@ v_min3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 v_min3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 // GFX11: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] -v_min3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX11: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] - -v_min3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] - -v_min3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] - -v_min3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] - -v_min3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0x1b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -v_min_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_min_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_min_f16_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_min_f16_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_min_f16_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_min_f16_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_min_f16_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_min_f16_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_min_f16_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_min_f16_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_min_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_min_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x01,0x3a,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] - -v_min_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x02,0x3a,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] - -v_min_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x83,0x3a,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] - -v_min_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_min_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_min_f32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_min_f32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_min_f32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_min_f32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_min_f32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_min_f32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_min_f32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_min_f32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] -v_min_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] -v_min_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x01,0x0f,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] +v_min3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] -v_min_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x02,0x0f,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] +v_min3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] -v_min_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x83,0x0f,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] +v_min3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x1b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] v_min_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -6059,48 +3241,6 @@ v_min_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 v_min_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0x0c,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -v_min_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_min_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_min_i32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_min_i32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_min_i32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_min_i32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_min_i32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_min_i32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_min_i32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_min_i32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_min_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_min_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_min_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] - -v_min_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0x11,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - v_min_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -6143,48 +3283,6 @@ v_min_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 v_min_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0x0b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -v_min_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_min_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_min_u32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_min_u32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_min_u32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_min_u32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_min_u32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_min_u32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_min_u32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_min_u32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_min_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_min_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_min_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] - -v_min_u32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0x13,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - v_minmax_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -6353,216 +3451,6 @@ v_minmax_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 boun v_minmax_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0x63,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -v_mov_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_mov_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_mov_b32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_mov_b32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_mov_b32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_mov_b32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_mov_b32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_mov_b32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_mov_b32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_mov_b32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_mov_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_mov_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_mov_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_mov_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] - -v_movreld_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_movreld_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_movreld_b32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_movreld_b32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_movreld_b32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_movreld_b32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_movreld_b32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_movreld_b32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_movreld_b32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_movreld_b32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_movreld_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_movreld_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_movreld_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_movreld_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] - -v_movrels_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_movrels_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_movrels_b32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_movrels_b32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_movrels_b32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_movrels_b32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_movrels_b32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_movrels_b32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_movrels_b32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_movrels_b32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_movrels_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_movrels_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_movrels_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_movrels_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] - -v_movrelsd_2_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_movrelsd_2_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_movrelsd_2_b32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_movrelsd_2_b32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_movrelsd_2_b32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_movrelsd_2_b32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_movrelsd_2_b32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_movrelsd_2_b32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_movrelsd_2_b32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_movrelsd_2_b32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_movrelsd_2_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_movrelsd_2_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_movrelsd_2_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_movrelsd_2_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] - -v_movrelsd_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_movrelsd_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_movrelsd_b32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_movrelsd_b32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_movrelsd_b32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_movrelsd_b32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_movrelsd_b32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_movrelsd_b32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_movrelsd_b32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_movrelsd_b32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_movrelsd_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_movrelsd_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_movrelsd_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_movrelsd_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] - v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -6605,300 +3493,6 @@ v_msad_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_c v_msad_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x80,0x39,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_mul_dx9_zero_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x01,0x07,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] - -v_mul_dx9_zero_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x02,0x07,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] - -v_mul_dx9_zero_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x83,0x07,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] - -v_mul_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_mul_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_mul_f16_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_mul_f16_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_mul_f16_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_mul_f16_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_mul_f16_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_mul_f16_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_mul_f16_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_mul_f16_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_mul_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_mul_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x01,0x35,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] - -v_mul_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x02,0x35,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] - -v_mul_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x83,0x35,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] - -v_mul_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_mul_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_mul_f32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_mul_f32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_mul_f32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_mul_f32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_mul_f32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_mul_f32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_mul_f32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_mul_f32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_mul_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_mul_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x01,0x08,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] - -v_mul_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x02,0x08,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] - -v_mul_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x83,0x08,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] - -v_mul_hi_i32_i24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_mul_hi_i32_i24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] - -v_mul_hi_i32_i24_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0x0a,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -v_mul_hi_u32_u24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_mul_hi_u32_u24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] - -v_mul_hi_u32_u24_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0x0c,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -v_mul_i32_i24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_mul_i32_i24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_mul_i32_i24_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_mul_i32_i24_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_mul_i32_i24_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_mul_i32_i24_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_mul_i32_i24_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_mul_i32_i24_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_mul_i32_i24_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_mul_i32_i24_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_mul_i32_i24_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_mul_i32_i24_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_mul_i32_i24_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] - -v_mul_i32_i24_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x80,0x09,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -v_mul_legacy_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_mul_legacy_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_mul_legacy_f32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_mul_legacy_f32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_mul_legacy_f32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_mul_legacy_f32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_mul_legacy_f32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_mul_legacy_f32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_mul_legacy_f32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_mul_legacy_f32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_mul_legacy_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_mul_legacy_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x01,0x07,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] - -v_mul_legacy_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x02,0x07,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] - -v_mul_legacy_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x83,0x07,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] - v_mul_lo_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -6941,48 +3535,6 @@ v_mul_lo_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctr v_mul_lo_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0x05,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -v_mul_u32_u24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_mul_u32_u24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_mul_u32_u24_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_mul_u32_u24_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_mul_u32_u24_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_mul_u32_u24_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_mul_u32_u24_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_mul_u32_u24_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_mul_u32_u24_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_mul_u32_u24_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_mul_u32_u24_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_mul_u32_u24_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_mul_u32_u24_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] - -v_mul_u32_u24_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x80,0x0b,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -7013,101 +3565,17 @@ v_mullit_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 v_mullit_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 // GFX11: [0x05,0x04,0x18,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -v_mullit_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x03,0x18,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] - -v_mullit_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x05,0x18,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] - -v_mullit_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x06,0x18,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] - -v_mullit_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x87,0x18,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] - -v_not_b16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_not_b16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_not_b16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_not_b16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_not_b16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_not_b16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_not_b16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_not_b16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_not_b16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_not_b16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_not_b16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_not_b16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_not_b16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_not_b16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] - -v_not_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_not_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_not_b32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_not_b32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_not_b32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_not_b32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_not_b32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_not_b32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_not_b32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_not_b32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_not_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +v_mullit_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x03,0x18,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -v_not_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +v_mullit_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x05,0x18,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] -v_not_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] +v_mullit_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x06,0x18,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] -v_not_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +v_mullit_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x87,0x18,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -7193,48 +3661,6 @@ v_or_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 v_or_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -v_or_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_or_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_or_b32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_or_b32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_or_b32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_or_b32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_or_b32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_or_b32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_or_b32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_or_b32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_or_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_or_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_or_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] - -v_or_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0x1c,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -7319,300 +3745,6 @@ v_perm_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ v_perm_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0x44,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -v_rcp_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_rcp_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_rcp_f16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_rcp_f16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_rcp_f16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_rcp_f16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_rcp_f16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_rcp_f16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_rcp_f16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_rcp_f16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_rcp_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_rcp_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_rcp_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_rcp_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xd4,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_rcp_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_rcp_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_rcp_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_rcp_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_rcp_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_rcp_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_rcp_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_rcp_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_rcp_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_rcp_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_rcp_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_rcp_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_rcp_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_rcp_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xaa,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_rcp_iflag_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_rcp_iflag_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_rcp_iflag_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_rcp_iflag_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_rcp_iflag_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_rcp_iflag_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_rcp_iflag_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_rcp_iflag_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_rcp_iflag_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_rcp_iflag_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_rcp_iflag_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_rcp_iflag_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_rcp_iflag_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_rcp_iflag_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xab,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_rndne_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_rndne_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_rndne_f16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_rndne_f16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_rndne_f16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_rndne_f16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_rndne_f16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_rndne_f16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_rndne_f16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_rndne_f16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_rndne_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_rndne_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_rndne_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_rndne_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xde,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_rndne_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_rndne_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_rndne_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_rndne_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_rndne_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_rndne_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_rndne_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_rndne_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_rndne_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_rndne_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_rndne_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_rndne_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_rndne_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_rndne_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xa3,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_rsq_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_rsq_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_rsq_f16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_rsq_f16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_rsq_f16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_rsq_f16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_rsq_f16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_rsq_f16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_rsq_f16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_rsq_f16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_rsq_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_rsq_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_rsq_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_rsq_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xd6,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_rsq_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_rsq_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_rsq_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_rsq_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_rsq_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_rsq_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_rsq_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_rsq_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_rsq_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_rsq_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_rsq_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_rsq_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_rsq_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_rsq_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xae,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -7745,358 +3877,41 @@ v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_sad_u8_e64_dpp v5, v1, v2, v3 row_mirror -// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -v_sad_u8_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -v_sad_u8_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -v_sad_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -v_sad_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -v_sad_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] - -v_sad_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] - -v_sad_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] - -v_sad_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] - -v_sad_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] - -v_sad_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] - -v_sad_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x80,0x22,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -v_sat_pk_u8_i16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_sat_pk_u8_i16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_sat_pk_u8_i16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_sat_pk_u8_i16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_sat_pk_u8_i16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_sat_pk_u8_i16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_sat_pk_u8_i16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_sat_pk_u8_i16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_sat_pk_u8_i16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_sat_pk_u8_i16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_sat_pk_u8_i16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_sat_pk_u8_i16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] - -v_sat_pk_u8_i16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] - -v_sat_pk_u8_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] - -v_sin_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_sin_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_sin_f16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_sin_f16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_sin_f16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_sin_f16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_sin_f16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_sin_f16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_sin_f16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_sin_f16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_sin_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_sin_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_sin_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_sin_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xe0,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_sin_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_sin_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_sin_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_sin_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_sin_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_sin_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_sin_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_sin_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_sin_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_sin_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_sin_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_sin_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_sin_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_sin_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xb5,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_sqrt_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_sqrt_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_sqrt_f16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_sqrt_f16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_sqrt_f16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_sqrt_f16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_sqrt_f16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_sqrt_f16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_sqrt_f16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_sqrt_f16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_sqrt_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_sqrt_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_sqrt_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_sqrt_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xd5,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_sqrt_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_sqrt_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_sqrt_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_sqrt_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_sqrt_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_sqrt_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_sqrt_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_sqrt_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_sqrt_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_sqrt_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_sqrt_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_sqrt_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_sqrt_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_sqrt_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xb3,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 quad_perm:[3,2,1,0] -// W32: [0x05,0x06,0x21,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 quad_perm:[0,1,2,3] -// W32: [0x05,0x06,0x21,0xd5,0xfa,0x04,0x0e,0x00,0x01,0xe4,0x00,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_mirror -// W32: [0x05,0x06,0x21,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_half_mirror -// W32: [0x05,0x06,0x21,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shl:1 -// W32: [0x05,0x06,0x21,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x01,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shl:15 -// W32: [0x05,0x06,0x21,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x0f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shr:1 -// W32: [0x05,0x06,0x21,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x11,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shr:15 -// W32: [0x05,0x06,0x21,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x1f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_ror:1 -// W32: [0x05,0x06,0x21,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x21,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, s105, v1, v2, s105 row_ror:15 -// W32: [0x05,0x69,0x21,0xd5,0xfa,0x04,0xa6,0x01,0x01,0x2f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf -// W32: [0x05,0x6a,0x21,0xd5,0xfa,0x04,0xae,0x01,0x01,0x50,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, vcc_hi, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 -// W32: [0x05,0x6b,0x21,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, ttmp15, v1, v2, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// W32: [0x05,0x7b,0x21,0xd5,0xfa,0x04,0xee,0x01,0x01,0x60,0x09,0x13] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[3,2,1,0] -// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[0,1,2,3] -// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_mirror -// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_sad_u8_e64_dpp v5, v1, v2, v3 row_mirror +// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_half_mirror -// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_sad_u8_e64_dpp v5, v1, v2, v255 row_half_mirror +// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:1 -// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_sad_u8_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:15 -// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_sad_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 +// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:1 -// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_sad_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 +// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:15 -// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_sad_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 +// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] -v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:1 -// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_sad_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 +// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] -v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:15 -// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_sad_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 +// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] -v_sub_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf -// W64: [0x05,0x68,0x21,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_sad_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] -v_sub_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 -// W64: [0x05,0x6a,0x21,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_sad_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] -v_sub_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// W64: [0x05,0x7a,0x21,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x09,0x13] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_sad_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] -v_sub_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0xfc,0x21,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] +v_sad_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x22,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -8205,90 +4020,6 @@ v_sub_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask: v_sub_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0xfc,0x01,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -v_sub_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_sub_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_sub_f16_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_sub_f16_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_sub_f16_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_sub_f16_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_sub_f16_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_sub_f16_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_sub_f16_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_sub_f16_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_sub_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_sub_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x01,0x33,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] - -v_sub_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x02,0x33,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] - -v_sub_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x83,0x33,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] - -v_sub_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_sub_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_sub_f32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_sub_f32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_sub_f32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_sub_f32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_sub_f32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_sub_f32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_sub_f32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_sub_f32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_sub_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_sub_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x01,0x04,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] - -v_sub_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x02,0x04,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] - -v_sub_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x83,0x04,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] - v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -8415,155 +4146,6 @@ v_sub_nc_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctr v_sub_nc_u16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x80,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -v_sub_nc_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_sub_nc_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_sub_nc_u32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_sub_nc_u32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_sub_nc_u32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_sub_nc_u32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_sub_nc_u32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_sub_nc_u32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_sub_nc_u32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_sub_nc_u32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_sub_nc_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_sub_nc_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_sub_nc_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] - -v_sub_nc_u32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x80,0x26,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 quad_perm:[3,2,1,0] -// W32: [0x05,0x06,0x22,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 quad_perm:[0,1,2,3] -// W32: [0x05,0x06,0x22,0xd5,0xfa,0x04,0x0e,0x00,0x01,0xe4,0x00,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_mirror -// W32: [0x05,0x06,0x22,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_half_mirror -// W32: [0x05,0x06,0x22,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shl:1 -// W32: [0x05,0x06,0x22,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x01,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shl:15 -// W32: [0x05,0x06,0x22,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x0f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shr:1 -// W32: [0x05,0x06,0x22,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x11,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shr:15 -// W32: [0x05,0x06,0x22,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x1f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_ror:1 -// W32: [0x05,0x06,0x22,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x21,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s105, v1, v2, s105 row_ror:15 -// W32: [0x05,0x69,0x22,0xd5,0xfa,0x04,0xa6,0x01,0x01,0x2f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf -// W32: [0x05,0x6a,0x22,0xd5,0xfa,0x04,0xae,0x01,0x01,0x50,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, vcc_hi, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 -// W32: [0x05,0x6b,0x22,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, ttmp15, v1, v2, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// W32: [0x05,0x7b,0x22,0xd5,0xfa,0x04,0xee,0x01,0x01,0x60,0x09,0x13] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[3,2,1,0] -// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[0,1,2,3] -// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_mirror -// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_half_mirror -// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:1 -// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:15 -// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:1 -// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:15 -// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:1 -// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:15 -// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf -// W64: [0x05,0x68,0x22,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 -// W64: [0x05,0x6a,0x22,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// W64: [0x05,0x7a,0x22,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x09,0x13] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0xfc,0x22,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] - v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -8671,216 +4253,6 @@ v_subrev_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_ma v_subrev_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0xfc,0x02,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -v_subrev_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_subrev_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_subrev_f16_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_subrev_f16_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_subrev_f16_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_subrev_f16_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_subrev_f16_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_subrev_f16_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_subrev_f16_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_subrev_f16_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_subrev_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_subrev_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x01,0x34,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] - -v_subrev_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x02,0x34,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] - -v_subrev_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x83,0x34,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] - -v_subrev_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_subrev_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_subrev_f32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_subrev_f32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_subrev_f32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_subrev_f32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_subrev_f32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_subrev_f32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_subrev_f32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_subrev_f32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_subrev_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_subrev_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x01,0x05,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] - -v_subrev_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x02,0x05,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] - -v_subrev_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x83,0x05,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] - -v_subrev_nc_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_subrev_nc_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_subrev_nc_u32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_subrev_nc_u32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_subrev_nc_u32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_subrev_nc_u32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_subrev_nc_u32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_subrev_nc_u32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_subrev_nc_u32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_subrev_nc_u32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_subrev_nc_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_subrev_nc_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_subrev_nc_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] - -v_subrev_nc_u32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x80,0x27,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -v_trunc_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_trunc_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_trunc_f16_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_trunc_f16_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_trunc_f16_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_trunc_f16_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_trunc_f16_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_trunc_f16_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_trunc_f16_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_trunc_f16_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_trunc_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_trunc_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_trunc_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_trunc_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xdd,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - -v_trunc_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] - -v_trunc_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] - -v_trunc_f32_e64_dpp v5, v1 row_mirror -// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] - -v_trunc_f32_e64_dpp v5, v1 row_half_mirror -// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] - -v_trunc_f32_e64_dpp v5, v1 row_shl:1 -// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] - -v_trunc_f32_e64_dpp v5, v1 row_shl:15 -// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] - -v_trunc_f32_e64_dpp v5, v1 row_shr:1 -// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] - -v_trunc_f32_e64_dpp v5, v1 row_shr:15 -// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] - -v_trunc_f32_e64_dpp v5, v1 row_ror:1 -// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] - -v_trunc_f32_e64_dpp v5, v1 row_ror:15 -// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] - -v_trunc_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] - -v_trunc_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] - -v_trunc_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] - -v_trunc_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x81,0xa1,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] - v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -8923,48 +4295,6 @@ v_xad_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_c v_xad_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0x45,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -v_xnor_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_xnor_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_xnor_b32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_xnor_b32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_xnor_b32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_xnor_b32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_xnor_b32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_xnor_b32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_xnor_b32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_xnor_b32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_xnor_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_xnor_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_xnor_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] - -v_xnor_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0x1e,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -9049,48 +4379,6 @@ v_xor_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -v_xor_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_xor_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_xor_b32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_xor_b32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_xor_b32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_xor_b32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_xor_b32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_xor_b32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_xor_b32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_xor_b32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_xor_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_xor_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -v_xor_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] - -v_xor_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x00,0x1d,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf // GFX11: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] @@ -9157,90 +4445,6 @@ v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 // GFX11: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -v_fmac_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_fmac_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_fmac_f16_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_fmac_f16_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_fmac_f16_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_fmac_f16_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_fmac_f16_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_fmac_f16_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_fmac_f16_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_fmac_f16_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_fmac_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_fmac_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x01,0x36,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] - -v_fmac_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x02,0x36,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] - -v_fmac_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x83,0x36,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] - -v_fmac_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -v_fmac_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -v_fmac_f32_e64_dpp v5, v1, v2 row_mirror -// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -v_fmac_f32_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -v_fmac_f32_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -v_fmac_f32_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -v_fmac_f32_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -v_fmac_f32_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -v_fmac_f32_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -v_fmac_f32_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -v_fmac_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -v_fmac_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: [0x05,0x01,0x2b,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] - -v_fmac_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: [0x05,0x02,0x2b,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] - -v_fmac_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: [0xff,0x83,0x2b,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] - v_mad_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf // GFX11: [0x05,0x78,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s new file mode 100644 index 0000000000000..15c0cda5a4232 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop1.s @@ -0,0 +1,2815 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s + +v_bfrev_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_bfrev_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_bfrev_b32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_bfrev_b32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_bfrev_b32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_bfrev_b32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_bfrev_b32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_bfrev_b32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_bfrev_b32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_bfrev_b32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_bfrev_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_bfrev_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_bfrev_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_bfrev_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] + +v_ceil_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_ceil_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_ceil_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_ceil_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_ceil_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_ceil_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_ceil_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_ceil_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_ceil_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_ceil_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_ceil_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_ceil_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_ceil_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_ceil_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xdc,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_ceil_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_ceil_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_ceil_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_ceil_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_ceil_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_ceil_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_ceil_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_ceil_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_ceil_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_ceil_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_ceil_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_ceil_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_ceil_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_ceil_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xa2,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_cls_i32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cls_i32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cls_i32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cls_i32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cls_i32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cls_i32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cls_i32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cls_i32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cls_i32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cls_i32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cls_i32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cls_i32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_cls_i32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_cls_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] + +v_clz_i32_u32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_clz_i32_u32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_clz_i32_u32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_clz_i32_u32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_clz_i32_u32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_clz_i32_u32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_clz_i32_u32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_clz_i32_u32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_clz_i32_u32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_clz_i32_u32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_clz_i32_u32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_clz_i32_u32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_clz_i32_u32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_clz_i32_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] + +v_cos_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cos_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cos_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cos_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cos_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cos_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cos_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cos_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cos_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cos_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cos_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cos_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_cos_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_cos_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xe1,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_cos_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cos_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cos_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cos_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cos_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cos_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cos_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cos_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cos_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cos_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cos_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cos_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_cos_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_cos_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xb6,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_ctz_i32_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_ctz_i32_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_ctz_i32_b32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_ctz_i32_b32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_ctz_i32_b32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_ctz_i32_b32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_ctz_i32_b32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_ctz_i32_b32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_ctz_i32_b32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_ctz_i32_b32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_ctz_i32_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_ctz_i32_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_ctz_i32_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_ctz_i32_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] + +v_cvt_f16_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_f16_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_f16_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_f16_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_f16_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_f16_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_f16_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_f16_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_f16_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_f16_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_f16_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_f16_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_cvt_f16_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_cvt_f16_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_cvt_f16_i16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_f16_i16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_f16_i16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_f16_i16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_f16_i16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_f16_i16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_f16_i16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_f16_i16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_f16_i16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_f16_i16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_f16_i16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_f16_i16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_cvt_f16_i16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_cvt_f16_i16_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0xd1,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] + +v_cvt_f16_u16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_f16_u16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_f16_u16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_f16_u16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_f16_u16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_f16_u16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_f16_u16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_f16_u16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_f16_u16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_f16_u16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_f16_u16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_f16_u16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_cvt_f16_u16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_cvt_f16_u16_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0xd0,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] + +v_cvt_f32_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_f32_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_f32_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_f32_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_f32_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_f32_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_f32_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_f32_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_f32_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_f32_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_f32_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_f32_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_cvt_f32_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_cvt_f32_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_cvt_f32_i32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_f32_i32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_f32_i32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_f32_i32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_f32_i32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_f32_i32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_f32_i32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_f32_i32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_f32_i32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_f32_i32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_f32_i32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_f32_i32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_cvt_f32_i32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_cvt_f32_i32_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x85,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] + +v_cvt_f32_u32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_f32_u32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_f32_u32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_f32_u32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_f32_u32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_f32_u32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_f32_u32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_f32_u32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_f32_u32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_f32_u32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_f32_u32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_f32_u32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_cvt_f32_u32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_cvt_f32_u32_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x86,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] + +v_cvt_f32_ubyte0_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_f32_ubyte0_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_f32_ubyte0_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_f32_ubyte0_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_f32_ubyte0_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_f32_ubyte0_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_f32_ubyte0_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_f32_ubyte0_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_f32_ubyte0_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_f32_ubyte0_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_f32_ubyte0_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_f32_ubyte0_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_cvt_f32_ubyte0_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_cvt_f32_ubyte0_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x91,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] + +v_cvt_f32_ubyte1_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_f32_ubyte1_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_f32_ubyte1_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_f32_ubyte1_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_f32_ubyte1_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_f32_ubyte1_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_f32_ubyte1_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_f32_ubyte1_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_f32_ubyte1_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_f32_ubyte1_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_f32_ubyte1_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_f32_ubyte1_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_cvt_f32_ubyte1_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_cvt_f32_ubyte1_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x92,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] + +v_cvt_f32_ubyte2_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_f32_ubyte2_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_f32_ubyte2_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_f32_ubyte2_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_f32_ubyte2_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_f32_ubyte2_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_f32_ubyte2_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_f32_ubyte2_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_f32_ubyte2_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_f32_ubyte2_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_f32_ubyte2_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_f32_ubyte2_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_cvt_f32_ubyte2_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_cvt_f32_ubyte2_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x93,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] + +v_cvt_f32_ubyte3_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_f32_ubyte3_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_f32_ubyte3_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_f32_ubyte3_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_f32_ubyte3_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_f32_ubyte3_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_f32_ubyte3_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_f32_ubyte3_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_f32_ubyte3_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_f32_ubyte3_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_f32_ubyte3_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_f32_ubyte3_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_cvt_f32_ubyte3_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_cvt_f32_ubyte3_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x94,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] + +v_cvt_floor_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_floor_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_floor_i32_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_floor_i32_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_floor_i32_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_floor_i32_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_floor_i32_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_floor_i32_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_floor_i32_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_floor_i32_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_floor_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_floor_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_cvt_floor_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_cvt_floor_i32_f32_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x01,0x8d,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] + +v_cvt_flr_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_flr_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_flr_i32_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_flr_i32_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_flr_i32_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_flr_i32_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_flr_i32_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_flr_i32_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_flr_i32_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_flr_i32_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_flr_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_flr_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_cvt_flr_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_cvt_flr_i32_f32_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x01,0x8d,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] + +v_cvt_i16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_i16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_i16_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_i16_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_i16_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_i16_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_i16_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_i16_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_i16_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_i16_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_i16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_i16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_cvt_i16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_cvt_i16_f16_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xd3,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] + +v_cvt_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_i32_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_i32_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_i32_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_i32_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_i32_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_i32_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_i32_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_i32_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_cvt_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_cvt_i32_f32_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0x88,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] + +v_cvt_i32_i16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_i32_i16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_i32_i16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_i32_i16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_i32_i16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_i32_i16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_i32_i16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_i32_i16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_i32_i16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_i32_i16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_i32_i16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_i32_i16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_cvt_i32_i16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_cvt_i32_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] + +v_cvt_nearest_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_nearest_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_cvt_nearest_i32_f32_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x01,0x8c,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] + +v_cvt_norm_i16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_norm_i16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_norm_i16_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_norm_i16_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_norm_i16_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_norm_i16_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_norm_i16_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_norm_i16_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_norm_i16_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_norm_i16_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_norm_i16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_norm_i16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_cvt_norm_i16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_cvt_norm_i16_f16_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x01,0xe3,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] + +v_cvt_norm_u16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_norm_u16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_norm_u16_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_norm_u16_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_norm_u16_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_norm_u16_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_norm_u16_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_norm_u16_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_norm_u16_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_norm_u16_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_norm_u16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_norm_u16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_cvt_norm_u16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_cvt_norm_u16_f16_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x01,0xe4,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] + +v_cvt_off_f32_i4_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_off_f32_i4_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_off_f32_i4_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_off_f32_i4_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_off_f32_i4_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_off_f32_i4_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_off_f32_i4_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_off_f32_i4_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_off_f32_i4_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_off_f32_i4_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_off_f32_i4_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_off_f32_i4_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_cvt_off_f32_i4_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_cvt_off_f32_i4_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x8e,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] + +v_cvt_rpi_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_rpi_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_cvt_rpi_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_cvt_rpi_i32_f32_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x01,0x8c,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] + +v_cvt_u16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_u16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_u16_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_u16_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_u16_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_u16_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_u16_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_u16_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_u16_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_u16_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_u16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_u16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_cvt_u16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_cvt_u16_f16_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xd2,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] + +v_cvt_u32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_u32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_u32_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_u32_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_u32_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_u32_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_u32_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_u32_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_u32_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_u32_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_u32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_u32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_cvt_u32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_cvt_u32_f32_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0x87,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] + +v_cvt_u32_u16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_u32_u16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_u32_u16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cvt_u32_u16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cvt_u32_u16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_cvt_u32_u16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_u32_u16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cvt_u32_u16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_u32_u16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cvt_u32_u16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_u32_u16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_cvt_u32_u16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_cvt_u32_u16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_cvt_u32_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] + +v_exp_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_exp_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_exp_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_exp_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_exp_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_exp_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_exp_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_exp_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_exp_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_exp_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_exp_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_exp_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_exp_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_exp_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xd8,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_exp_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_exp_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_exp_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_exp_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_exp_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_exp_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_exp_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_exp_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_exp_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_exp_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_exp_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_exp_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_exp_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_exp_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xa5,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_ffbh_i32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_ffbh_i32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_ffbh_i32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_ffbh_i32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_ffbh_i32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_ffbh_i32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_ffbh_i32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_ffbh_i32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_ffbh_i32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_ffbh_i32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_ffbh_i32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_ffbh_i32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_ffbh_i32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_ffbh_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] + +v_ffbh_u32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_ffbh_u32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_ffbh_u32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_ffbh_u32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_ffbh_u32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_ffbh_u32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_ffbh_u32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_ffbh_u32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_ffbh_u32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_ffbh_u32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_ffbh_u32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_ffbh_u32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_ffbh_u32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_ffbh_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] + +v_ffbl_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_ffbl_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_ffbl_b32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_ffbl_b32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_ffbl_b32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_ffbl_b32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_ffbl_b32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_ffbl_b32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_ffbl_b32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_ffbl_b32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_ffbl_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_ffbl_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_ffbl_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_ffbl_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] + +v_floor_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_floor_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_floor_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_floor_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_floor_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_floor_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_floor_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_floor_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_floor_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_floor_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_floor_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_floor_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_floor_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_floor_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xdb,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_floor_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_floor_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_floor_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_floor_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_floor_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_floor_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_floor_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_floor_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_floor_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_floor_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_floor_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_floor_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_floor_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_floor_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xa4,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_fract_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_fract_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_fract_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_fract_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_fract_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_fract_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_fract_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_fract_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_fract_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_fract_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_fract_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_fract_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_fract_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_fract_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xdf,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_fract_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_fract_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_fract_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_fract_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_fract_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_fract_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_fract_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_fract_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_fract_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_fract_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_fract_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_fract_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_fract_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_fract_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xa0,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_frexp_exp_i16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_frexp_exp_i16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_frexp_exp_i16_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_frexp_exp_i16_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_frexp_exp_i16_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_frexp_exp_i16_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_frexp_exp_i16_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_frexp_exp_i16_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_frexp_exp_i16_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_frexp_exp_i16_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_frexp_exp_i16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_frexp_exp_i16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_frexp_exp_i16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_frexp_exp_i16_f16_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x01,0xda,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] + +v_frexp_exp_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_frexp_exp_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_frexp_exp_i32_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_frexp_exp_i32_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_frexp_exp_i32_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_frexp_exp_i32_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_frexp_exp_i32_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_frexp_exp_i32_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_frexp_exp_i32_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_frexp_exp_i32_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_frexp_exp_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_frexp_exp_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_frexp_exp_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_frexp_exp_i32_f32_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x01,0xbf,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] + +v_frexp_mant_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_frexp_mant_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_frexp_mant_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_frexp_mant_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_frexp_mant_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_frexp_mant_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_frexp_mant_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_frexp_mant_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_frexp_mant_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_frexp_mant_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_frexp_mant_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_frexp_mant_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_frexp_mant_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_frexp_mant_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xd9,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_frexp_mant_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_frexp_mant_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_frexp_mant_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_frexp_mant_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_frexp_mant_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_frexp_mant_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_frexp_mant_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_frexp_mant_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_frexp_mant_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_frexp_mant_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_frexp_mant_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_frexp_mant_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_frexp_mant_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_frexp_mant_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xc0,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_log_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_log_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_log_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_log_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_log_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_log_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_log_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_log_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_log_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_log_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_log_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_log_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_log_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_log_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xd7,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_log_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_log_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_log_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_log_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_log_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_log_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_log_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_log_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_log_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_log_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_log_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_log_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_log_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_log_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xa7,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_mov_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_mov_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_mov_b32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_mov_b32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_mov_b32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_mov_b32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_mov_b32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_mov_b32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_mov_b32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_mov_b32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_mov_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_mov_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_mov_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_mov_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] + +v_movreld_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_movreld_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_movreld_b32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_movreld_b32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_movreld_b32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_movreld_b32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_movreld_b32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_movreld_b32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_movreld_b32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_movreld_b32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_movreld_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_movreld_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_movreld_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_movreld_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] + +v_movrels_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_movrels_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_movrels_b32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_movrels_b32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_movrels_b32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_movrels_b32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_movrels_b32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_movrels_b32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_movrels_b32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_movrels_b32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_movrels_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_movrels_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_movrels_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_movrels_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] + +v_movrelsd_2_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_movrelsd_2_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_movrelsd_2_b32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_movrelsd_2_b32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_movrelsd_2_b32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_movrelsd_2_b32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_movrelsd_2_b32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_movrelsd_2_b32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_movrelsd_2_b32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_movrelsd_2_b32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_movrelsd_2_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_movrelsd_2_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_movrelsd_2_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_movrelsd_2_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] + +v_movrelsd_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_movrelsd_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_movrelsd_b32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_movrelsd_b32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_movrelsd_b32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_movrelsd_b32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_movrelsd_b32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_movrelsd_b32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_movrelsd_b32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_movrelsd_b32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_movrelsd_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_movrelsd_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_movrelsd_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_movrelsd_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] + +v_not_b16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_not_b16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_not_b16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_not_b16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_not_b16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_not_b16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_not_b16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_not_b16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_not_b16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_not_b16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_not_b16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_not_b16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_not_b16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_not_b16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] + +v_not_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_not_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_not_b32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_not_b32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_not_b32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_not_b32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_not_b32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_not_b32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_not_b32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_not_b32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_not_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_not_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_not_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_not_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] + +v_rcp_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_rcp_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_rcp_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_rcp_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_rcp_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_rcp_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_rcp_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_rcp_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_rcp_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_rcp_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_rcp_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_rcp_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_rcp_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_rcp_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xd4,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_rcp_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_rcp_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_rcp_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_rcp_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_rcp_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_rcp_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_rcp_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_rcp_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_rcp_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_rcp_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_rcp_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_rcp_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_rcp_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_rcp_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xaa,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_rcp_iflag_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_rcp_iflag_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_rcp_iflag_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_rcp_iflag_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_rcp_iflag_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_rcp_iflag_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_rcp_iflag_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_rcp_iflag_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_rcp_iflag_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_rcp_iflag_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_rcp_iflag_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_rcp_iflag_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_rcp_iflag_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_rcp_iflag_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xab,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_rndne_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_rndne_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_rndne_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_rndne_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_rndne_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_rndne_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_rndne_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_rndne_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_rndne_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_rndne_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_rndne_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_rndne_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_rndne_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_rndne_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xde,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_rndne_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_rndne_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_rndne_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_rndne_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_rndne_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_rndne_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_rndne_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_rndne_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_rndne_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_rndne_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_rndne_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_rndne_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_rndne_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_rndne_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xa3,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_rsq_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_rsq_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_rsq_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_rsq_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_rsq_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_rsq_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_rsq_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_rsq_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_rsq_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_rsq_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_rsq_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_rsq_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_rsq_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_rsq_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xd6,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_rsq_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_rsq_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_rsq_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_rsq_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_rsq_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_rsq_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_rsq_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_rsq_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_rsq_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_rsq_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_rsq_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_rsq_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_rsq_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_rsq_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xae,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_sat_pk_u8_i16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_sat_pk_u8_i16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_sat_pk_u8_i16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_sat_pk_u8_i16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_sat_pk_u8_i16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_sat_pk_u8_i16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_sat_pk_u8_i16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_sat_pk_u8_i16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_sat_pk_u8_i16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_sat_pk_u8_i16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_sat_pk_u8_i16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_sat_pk_u8_i16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] + +v_sat_pk_u8_i16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x09,0x13] + +v_sat_pk_u8_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] + +v_sin_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_sin_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_sin_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_sin_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_sin_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_sin_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_sin_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_sin_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_sin_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_sin_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_sin_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_sin_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_sin_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_sin_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xe0,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_sin_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_sin_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_sin_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_sin_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_sin_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_sin_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_sin_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_sin_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_sin_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_sin_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_sin_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_sin_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_sin_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_sin_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xb5,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_sqrt_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_sqrt_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_sqrt_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_sqrt_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_sqrt_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_sqrt_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_sqrt_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_sqrt_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_sqrt_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_sqrt_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_sqrt_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_sqrt_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_sqrt_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_sqrt_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xd5,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_sqrt_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_sqrt_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_sqrt_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_sqrt_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_sqrt_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_sqrt_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_sqrt_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_sqrt_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_sqrt_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_sqrt_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_sqrt_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_sqrt_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_sqrt_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_sqrt_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xb3,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_trunc_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_trunc_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_trunc_f16_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_trunc_f16_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_trunc_f16_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_trunc_f16_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_trunc_f16_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_trunc_f16_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_trunc_f16_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_trunc_f16_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_trunc_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_trunc_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_trunc_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_trunc_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xdd,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] + +v_trunc_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_trunc_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] + +v_trunc_f32_e64_dpp v5, v1 row_mirror +// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] + +v_trunc_f32_e64_dpp v5, v1 row_half_mirror +// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] + +v_trunc_f32_e64_dpp v5, v1 row_shl:1 +// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] + +v_trunc_f32_e64_dpp v5, v1 row_shl:15 +// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_trunc_f32_e64_dpp v5, v1 row_shr:1 +// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] + +v_trunc_f32_e64_dpp v5, v1 row_shr:15 +// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] + +v_trunc_f32_e64_dpp v5, v1 row_ror:1 +// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] + +v_trunc_f32_e64_dpp v5, v1 row_ror:15 +// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] + +v_trunc_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] + +v_trunc_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] + +v_trunc_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x09,0x13] + +v_trunc_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0xa1,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop2.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop2.s new file mode 100644 index 0000000000000..2e7d1f6df12d4 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16_from_vop2.s @@ -0,0 +1,1986 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=W64-ERR --implicit-check-not=error: %s + +v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 quad_perm:[3,2,1,0] +// W32: [0x05,0x06,0x20,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 quad_perm:[0,1,2,3] +// W32: [0x05,0x06,0x20,0xd5,0xfa,0x04,0x0e,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_mirror +// W32: [0x05,0x06,0x20,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_half_mirror +// W32: [0x05,0x06,0x20,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shl:1 +// W32: [0x05,0x06,0x20,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shl:15 +// W32: [0x05,0x06,0x20,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shr:1 +// W32: [0x05,0x06,0x20,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shr:15 +// W32: [0x05,0x06,0x20,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_ror:1 +// W32: [0x05,0x06,0x20,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s105, v1, v2, s105 row_ror:15 +// W32: [0x05,0x69,0x20,0xd5,0xfa,0x04,0xa6,0x01,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf +// W32: [0x05,0x6a,0x20,0xd5,0xfa,0x04,0xae,0x01,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, vcc_hi, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: [0x05,0x6b,0x20,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, ttmp15, v1, v2, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: [0x05,0x7b,0x20,0xd5,0xfa,0x04,0xee,0x01,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[3,2,1,0] +// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[0,1,2,3] +// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_mirror +// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_half_mirror +// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:1 +// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:15 +// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:1 +// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:15 +// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:1 +// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:15 +// W64: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf +// W64: [0x05,0x68,0x20,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: [0x05,0x6a,0x20,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: [0x05,0x7a,0x20,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0xfc,0x20,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] + +v_add_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_add_f16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_add_f16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_add_f16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_add_f16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_add_f16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_add_f16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_add_f16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_add_f16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_add_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_add_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x32,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] + +v_add_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x32,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] + +v_add_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x83,0x32,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] + +v_add_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_add_f32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_add_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_add_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_add_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_add_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_add_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_add_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_add_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_add_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_add_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x03,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] + +v_add_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x03,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] + +v_add_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x83,0x03,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] + +v_add_nc_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_nc_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_add_nc_u32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_add_nc_u32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_add_nc_u32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_add_nc_u32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_add_nc_u32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_add_nc_u32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_add_nc_u32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_add_nc_u32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_add_nc_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_add_nc_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_add_nc_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_add_nc_u32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x25,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_and_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_and_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_and_b32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_and_b32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_and_b32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_and_b32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_and_b32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_and_b32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_and_b32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_and_b32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_and_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_and_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_and_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_and_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x1b,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_ashrrev_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_ashrrev_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_ashrrev_i32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_ashrrev_i32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_ashrrev_i32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_ashrrev_i32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_ashrrev_i32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_ashrrev_i32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_ashrrev_i32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_ashrrev_i32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_ashrrev_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_ashrrev_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_ashrrev_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_ashrrev_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x1a,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_cndmask_b32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s3 quad_perm:[0,1,2,3] +// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x0e,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s3 row_mirror +// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s3 row_half_mirror +// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s3 row_shl:1 +// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s3 row_shl:15 +// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s3 row_shr:1 +// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s3 row_shr:15 +// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s3 row_ror:1 +// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s105 row_ror:15 +// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0xa6,0x01,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf +// W32: [0x05,0x00,0x01,0xd5,0xfa,0x04,0xae,0x01,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, |v1|, -v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: [0x05,0x01,0x01,0xd5,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, -v1, |v2|, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: [0x05,0x02,0x01,0xd5,0xfa,0x04,0xee,0x21,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] quad_perm:[3,2,1,0] +// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] quad_perm:[0,1,2,3] +// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_mirror +// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_half_mirror +// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_shl:1 +// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_shl:15 +// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_shr:1 +// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_shr:15 +// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_ror:1 +// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_ror:15 +// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf +// W64: [0x05,0x00,0x01,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, |v1|, -v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: [0x05,0x01,0x01,0xd5,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, -v1, |v2|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: [0x05,0x02,0x01,0xd5,0xfa,0x04,0xea,0x21,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v255, -|v255|, -|v255|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x03,0x01,0xd5,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x0d,0x30] + +v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_rtz_f16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x2f,0xd5,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cvt_pk_rtz_f16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x2f,0xd5,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_rtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x83,0x2f,0xd5,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] + +v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pkrtz_f16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x2f,0xd5,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cvt_pkrtz_f16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x2f,0xd5,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pkrtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x83,0x2f,0xd5,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] + +v_fmac_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_fmac_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_fmac_f16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_fmac_f16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_fmac_f16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_fmac_f16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_fmac_f16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_fmac_f16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_fmac_f16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_fmac_f16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_fmac_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_fmac_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x36,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] + +v_fmac_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x36,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] + +v_fmac_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x83,0x36,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] + +v_fmac_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_fmac_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_fmac_f32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_fmac_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_fmac_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_fmac_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_fmac_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_fmac_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_fmac_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_fmac_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_fmac_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_fmac_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x2b,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] + +v_fmac_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x2b,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] + +v_fmac_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x83,0x2b,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] + +v_ldexp_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_ldexp_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_ldexp_f16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_ldexp_f16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_ldexp_f16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_ldexp_f16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_ldexp_f16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_ldexp_f16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_ldexp_f16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_ldexp_f16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_ldexp_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_ldexp_f16_e64_dpp v5, v1, v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x08,0x01,0x5f,0x01,0x01] + +v_ldexp_f16_e64_dpp v5, v1, v2 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x10,0x01,0x60,0x09,0x13] + +v_ldexp_f16_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x81,0x3b,0xd5,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x0d,0x30] + +v_lshlrev_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_lshlrev_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_lshlrev_b32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_lshlrev_b32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_lshlrev_b32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_lshlrev_b32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_lshlrev_b32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_lshlrev_b32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_lshlrev_b32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_lshlrev_b32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_lshlrev_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_lshlrev_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_lshlrev_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_lshlrev_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x18,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_lshrrev_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_lshrrev_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_lshrrev_b32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_lshrrev_b32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_lshrrev_b32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_lshrrev_b32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_lshrrev_b32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_lshrrev_b32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_lshrrev_b32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_lshrrev_b32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_lshrrev_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_lshrrev_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_lshrrev_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_lshrrev_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x19,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_max_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_max_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_max_f16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_max_f16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_max_f16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_max_f16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_max_f16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_max_f16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_max_f16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_max_f16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_max_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_max_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x39,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] + +v_max_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x39,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] + +v_max_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x83,0x39,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] + +v_max_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_max_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_max_f32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_max_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_max_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_max_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_max_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_max_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_max_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_max_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_max_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_max_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x10,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] + +v_max_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x10,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] + +v_max_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x83,0x10,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] + +v_max_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_max_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_max_i32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_max_i32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_max_i32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_max_i32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_max_i32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_max_i32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_max_i32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_max_i32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_max_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_max_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_max_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_max_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x12,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_max_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_max_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_max_u32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_max_u32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_max_u32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_max_u32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_max_u32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_max_u32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_max_u32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_max_u32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_max_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_max_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_max_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_max_u32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x14,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_min_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_min_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_min_f16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_min_f16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_min_f16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_min_f16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_min_f16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_min_f16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_min_f16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_min_f16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_min_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_min_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x3a,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] + +v_min_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x3a,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] + +v_min_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x83,0x3a,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] + +v_min_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_min_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_min_f32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_min_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_min_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_min_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_min_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_min_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_min_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_min_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_min_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_min_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x0f,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] + +v_min_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x0f,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] + +v_min_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x83,0x0f,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] + +v_min_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_min_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_min_i32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_min_i32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_min_i32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_min_i32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_min_i32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_min_i32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_min_i32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_min_i32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_min_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_min_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_min_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_min_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x11,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_min_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_min_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_min_u32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_min_u32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_min_u32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_min_u32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_min_u32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_min_u32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_min_u32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_min_u32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_min_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_min_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_min_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_min_u32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x13,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_mul_dx9_zero_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x07,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] + +v_mul_dx9_zero_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x07,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] + +v_mul_dx9_zero_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x83,0x07,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] + +v_mul_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mul_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_mul_f16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_mul_f16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_mul_f16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_mul_f16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_mul_f16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_mul_f16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_mul_f16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_mul_f16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_mul_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_mul_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x35,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] + +v_mul_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x35,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] + +v_mul_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x83,0x35,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] + +v_mul_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mul_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_mul_f32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_mul_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_mul_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_mul_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_mul_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_mul_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_mul_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_mul_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_mul_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_mul_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x08,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] + +v_mul_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x08,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] + +v_mul_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x83,0x08,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] + +v_mul_hi_i32_i24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mul_hi_i32_i24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_mul_hi_i32_i24_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x0a,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_mul_hi_u32_u24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mul_hi_u32_u24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_mul_hi_u32_u24_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x0c,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_mul_i32_i24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mul_i32_i24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_mul_i32_i24_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_mul_i32_i24_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_mul_i32_i24_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_mul_i32_i24_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_mul_i32_i24_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_mul_i32_i24_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_mul_i32_i24_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_mul_i32_i24_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_mul_i32_i24_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_mul_i32_i24_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_mul_i32_i24_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_mul_i32_i24_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x09,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_mul_legacy_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mul_legacy_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_mul_legacy_f32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_mul_legacy_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_mul_legacy_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_mul_legacy_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_mul_legacy_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_mul_legacy_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_mul_legacy_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_mul_legacy_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_mul_legacy_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_mul_legacy_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x07,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] + +v_mul_legacy_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x07,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] + +v_mul_legacy_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x83,0x07,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] + +v_mul_u32_u24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mul_u32_u24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_mul_u32_u24_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_mul_u32_u24_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_mul_u32_u24_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_mul_u32_u24_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_mul_u32_u24_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_mul_u32_u24_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_mul_u32_u24_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_mul_u32_u24_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_mul_u32_u24_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_mul_u32_u24_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_mul_u32_u24_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_mul_u32_u24_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x0b,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_or_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_or_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_or_b32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_or_b32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_or_b32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_or_b32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_or_b32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_or_b32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_or_b32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_or_b32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_or_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_or_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_or_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_or_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x1c,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 quad_perm:[3,2,1,0] +// W32: [0x05,0x06,0x21,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 quad_perm:[0,1,2,3] +// W32: [0x05,0x06,0x21,0xd5,0xfa,0x04,0x0e,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_mirror +// W32: [0x05,0x06,0x21,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_half_mirror +// W32: [0x05,0x06,0x21,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shl:1 +// W32: [0x05,0x06,0x21,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shl:15 +// W32: [0x05,0x06,0x21,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shr:1 +// W32: [0x05,0x06,0x21,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shr:15 +// W32: [0x05,0x06,0x21,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_ror:1 +// W32: [0x05,0x06,0x21,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s105, v1, v2, s105 row_ror:15 +// W32: [0x05,0x69,0x21,0xd5,0xfa,0x04,0xa6,0x01,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf +// W32: [0x05,0x6a,0x21,0xd5,0xfa,0x04,0xae,0x01,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, vcc_hi, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: [0x05,0x6b,0x21,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, ttmp15, v1, v2, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: [0x05,0x7b,0x21,0xd5,0xfa,0x04,0xee,0x01,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[3,2,1,0] +// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[0,1,2,3] +// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_mirror +// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_half_mirror +// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:1 +// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:15 +// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:1 +// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:15 +// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:1 +// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:15 +// W64: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf +// W64: [0x05,0x68,0x21,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: [0x05,0x6a,0x21,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: [0x05,0x7a,0x21,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0xfc,0x21,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] + +v_sub_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_sub_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_sub_f16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_sub_f16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_sub_f16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_sub_f16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_sub_f16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_sub_f16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_sub_f16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_sub_f16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_sub_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_sub_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x33,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] + +v_sub_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x33,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] + +v_sub_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x83,0x33,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] + +v_sub_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_sub_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_sub_f32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_sub_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_sub_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_sub_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_sub_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_sub_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_sub_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_sub_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_sub_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_sub_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x04,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] + +v_sub_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x04,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] + +v_sub_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x83,0x04,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] + +v_sub_nc_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_sub_nc_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_sub_nc_u32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_sub_nc_u32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_sub_nc_u32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_sub_nc_u32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_sub_nc_u32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_sub_nc_u32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_sub_nc_u32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_sub_nc_u32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_sub_nc_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_sub_nc_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_sub_nc_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_sub_nc_u32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x26,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 quad_perm:[3,2,1,0] +// W32: [0x05,0x06,0x22,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 quad_perm:[0,1,2,3] +// W32: [0x05,0x06,0x22,0xd5,0xfa,0x04,0x0e,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_mirror +// W32: [0x05,0x06,0x22,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_half_mirror +// W32: [0x05,0x06,0x22,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shl:1 +// W32: [0x05,0x06,0x22,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shl:15 +// W32: [0x05,0x06,0x22,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shr:1 +// W32: [0x05,0x06,0x22,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_shr:15 +// W32: [0x05,0x06,0x22,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 row_ror:1 +// W32: [0x05,0x06,0x22,0xd5,0xfa,0x04,0x0e,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s105, v1, v2, s105 row_ror:15 +// W32: [0x05,0x69,0x22,0xd5,0xfa,0x04,0xa6,0x01,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf +// W32: [0x05,0x6a,0x22,0xd5,0xfa,0x04,0xae,0x01,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, vcc_hi, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: [0x05,0x6b,0x22,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, ttmp15, v1, v2, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: [0x05,0x7b,0x22,0xd5,0xfa,0x04,0xee,0x01,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[3,2,1,0] +// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[0,1,2,3] +// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_mirror +// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_half_mirror +// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:1 +// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:15 +// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:1 +// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:15 +// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:1 +// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:15 +// W64: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf +// W64: [0x05,0x68,0x22,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: [0x05,0x6a,0x22,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: [0x05,0x7a,0x22,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0xfc,0x22,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] + +v_subrev_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_subrev_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_subrev_f16_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_subrev_f16_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_subrev_f16_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_subrev_f16_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_subrev_f16_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_subrev_f16_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_subrev_f16_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_subrev_f16_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_subrev_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_subrev_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x34,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] + +v_subrev_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x34,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] + +v_subrev_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x83,0x34,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] + +v_subrev_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_subrev_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_subrev_f32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_subrev_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_subrev_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_subrev_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_subrev_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_subrev_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_subrev_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_subrev_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_subrev_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_subrev_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x01,0x05,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] + +v_subrev_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x02,0x05,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x09,0x13] + +v_subrev_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x83,0x05,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] + +v_subrev_nc_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_subrev_nc_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_subrev_nc_u32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_subrev_nc_u32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_subrev_nc_u32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_subrev_nc_u32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_subrev_nc_u32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_subrev_nc_u32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_subrev_nc_u32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_subrev_nc_u32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_subrev_nc_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_subrev_nc_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_subrev_nc_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_subrev_nc_u32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x80,0x27,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_xnor_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_xnor_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_xnor_b32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_xnor_b32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_xnor_b32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_xnor_b32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_xnor_b32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_xnor_b32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_xnor_b32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_xnor_b32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_xnor_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_xnor_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_xnor_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_xnor_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x1e,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +v_xor_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_xor_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_xor_b32_e64_dpp v5, v1, v2 row_mirror +// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_xor_b32_e64_dpp v5, v1, v2 row_half_mirror +// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_xor_b32_e64_dpp v5, v1, v2 row_shl:1 +// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_xor_b32_e64_dpp v5, v1, v2 row_shl:15 +// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_xor_b32_e64_dpp v5, v1, v2 row_shr:1 +// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_xor_b32_e64_dpp v5, v1, v2 row_shr:15 +// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_xor_b32_e64_dpp v5, v1, v2 row_ror:1 +// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_xor_b32_e64_dpp v5, v1, v2 row_ror:15 +// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_xor_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_xor_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +v_xor_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] + +v_xor_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: [0xff,0x00,0x1d,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s index e9e5321a0adea..5742817e63801 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s @@ -39,45 +39,6 @@ v_add3_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_add3_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x55,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] -// W32: [0x05,0x06,0x20,0xd5,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s105, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] -// W32: [0x05,0x69,0x20,0xd5,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] -// W32: [0x05,0x6a,0x20,0xd5,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, vcc_hi, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] -// W32: [0x05,0x6b,0x20,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, ttmp15, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// W32: [0x05,0x7b,0x20,0xd5,0xea,0x04,0xee,0x01,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] -// W64: [0x05,0x0c,0x20,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] -// W64: [0x05,0x68,0x20,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] -// W64: [0x05,0x6a,0x20,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 -// W64: [0x05,0x7a,0x20,0xd5,0xea,0x04,0xea,0x01,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_add_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0xfc,0x20,0xd5,0xe9,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] - v_add_co_u32_e64_dpp v5, s6, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x06,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -117,30 +78,6 @@ v_add_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_add_co_u32_e64_dpp v255, null, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0xfc,0x00,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_add_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x32,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_add_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x01,0x32,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] - -v_add_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x02,0x32,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] - -v_add_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x83,0x32,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] - -v_add_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x03,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_add_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x01,0x03,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] - -v_add_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x02,0x03,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] - -v_add_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x83,0x03,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] - v_add_lshl_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x47,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -204,15 +141,6 @@ v_add_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_add_nc_u16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x80,0x03,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_add_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x25,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_add_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x25,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_add_nc_u32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x80,0x25,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - v_alignbit_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x16,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -288,15 +216,6 @@ v_and_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_and_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x62,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_and_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x1b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_and_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x1b,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_and_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0x1b,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - v_and_or_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x57,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -342,15 +261,6 @@ v_ashrrev_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_ashrrev_i16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x3a,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_ashrrev_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x1a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_ashrrev_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x1a,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_ashrrev_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0x1a,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - v_bcnt_u32_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x1e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -477,57 +387,6 @@ v_bfm_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_bfm_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x1d,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_bfrev_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xb8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_bfrev_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xb8,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_bfrev_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0xb8,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] - -v_ceil_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_ceil_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_ceil_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xdc,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_ceil_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xdc,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_ceil_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xa2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_ceil_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xa2,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_ceil_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xa2,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_ceil_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xa2,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_cls_i32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xbb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cls_i32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xbb,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cls_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0xbb,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] - -v_clz_i32_u32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xb9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_clz_i32_u32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xb9,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_clz_i32_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0xb9,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] - v_cndmask_b16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -567,78 +426,6 @@ v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x03,0x5d,0xd6,0xe9,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00] -v_cndmask_b32_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] -// W32: [0x05,0x00,0x01,0xd5,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] -// W32: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] -// W32: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, |v1|, -v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] -// W32: [0x05,0x01,0x01,0xd5,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, -v1, |v2|, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// W32: [0x05,0x02,0x01,0xd5,0xea,0x04,0xee,0x21,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] -// W64: [0x05,0x00,0x01,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] -// W64: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, |v1|, -v2, vcc dpp8:[7,6,5,4,3,2,1,0] -// W64: [0x05,0x01,0x01,0xd5,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v5, -v1, |v2|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 -// W64: [0x05,0x02,0x01,0xd5,0xea,0x04,0xea,0x21,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cndmask_b32_e64_dpp v255, -|v255|, -|v255|, null dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x03,0x01,0xd5,0xe9,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00] - -v_cos_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xe1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cos_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xe1,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_cos_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xe1,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_cos_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xe1,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_cos_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xb6,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cos_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xb6,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_cos_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xb6,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_cos_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xb6,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_ctz_i32_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xba,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_ctz_i32_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xba,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_ctz_i32_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0xba,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] - v_cubeid_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -783,210 +570,6 @@ v_cubetc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_cubetc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x87,0x0e,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] -v_cvt_f16_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_f16_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_cvt_f16_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x8a,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_cvt_f16_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0x8a,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_cvt_f16_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_f16_i16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_cvt_f16_i16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xd1,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_cvt_f16_i16_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x80,0xd1,0xd5,0xe9,0x00,0x00,0x18,0xff,0x00,0x00,0x00] - -v_cvt_f16_u16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xd0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_f16_u16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xd0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_cvt_f16_u16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xd0,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_cvt_f16_u16_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x80,0xd0,0xd5,0xe9,0x00,0x00,0x18,0xff,0x00,0x00,0x00] - -v_cvt_f32_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_f32_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_cvt_f32_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x8b,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_cvt_f32_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0x8b,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_cvt_f32_i32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_f32_i32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_cvt_f32_i32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x85,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_cvt_f32_i32_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x80,0x85,0xd5,0xe9,0x00,0x00,0x18,0xff,0x00,0x00,0x00] - -v_cvt_f32_u32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x86,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_f32_u32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x86,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_cvt_f32_u32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x86,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_cvt_f32_u32_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x80,0x86,0xd5,0xe9,0x00,0x00,0x18,0xff,0x00,0x00,0x00] - -v_cvt_f32_ubyte0_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x91,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_f32_ubyte0_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x91,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_cvt_f32_ubyte0_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x91,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_cvt_f32_ubyte0_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x80,0x91,0xd5,0xe9,0x00,0x00,0x18,0xff,0x00,0x00,0x00] - -v_cvt_f32_ubyte1_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x92,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_f32_ubyte1_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x92,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_cvt_f32_ubyte1_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x92,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_cvt_f32_ubyte1_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x80,0x92,0xd5,0xe9,0x00,0x00,0x18,0xff,0x00,0x00,0x00] - -v_cvt_f32_ubyte2_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x93,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_f32_ubyte2_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x93,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_cvt_f32_ubyte2_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x93,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_cvt_f32_ubyte2_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x80,0x93,0xd5,0xe9,0x00,0x00,0x18,0xff,0x00,0x00,0x00] - -v_cvt_f32_ubyte3_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x94,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_f32_ubyte3_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x94,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_cvt_f32_ubyte3_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x94,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_cvt_f32_ubyte3_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x80,0x94,0xd5,0xe9,0x00,0x00,0x18,0xff,0x00,0x00,0x00] - -v_cvt_floor_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x8d,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_floor_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x8d,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_floor_i32_f32_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x01,0x8d,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] - -v_cvt_flr_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x8d,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_flr_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x8d,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_flr_i32_f32_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x01,0x8d,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] - -v_cvt_i16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xd3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_i16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xd3,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_i16_f16_e64_dpp v255, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xd3,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] - -v_cvt_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x88,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x88,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_i32_f32_e64_dpp v255, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0x88,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] - -v_cvt_i32_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xea,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_i32_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xea,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_i32_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0xea,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] - -v_cvt_nearest_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x8c,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_nearest_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x8c,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_nearest_i32_f32_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x01,0x8c,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] - -v_cvt_norm_i16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xe3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_norm_i16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xe3,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_norm_i16_f16_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x01,0xe3,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] - -v_cvt_norm_u16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xe4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_norm_u16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xe4,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_norm_u16_f16_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x01,0xe4,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] - -v_cvt_off_f32_i4_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x8e,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_off_f32_i4_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x8e,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_cvt_off_f32_i4_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x8e,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_cvt_off_f32_i4_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x80,0x8e,0xd5,0xe9,0x00,0x00,0x18,0xff,0x00,0x00,0x00] - v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x06,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -1032,18 +615,6 @@ v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x03,0x13,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] -v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x2f,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_cvt_pk_rtz_f16_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x01,0x2f,0xd5,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] - -v_cvt_pk_rtz_f16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x02,0x2f,0xd5,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] - -v_cvt_pk_rtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x83,0x2f,0xd5,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] - v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x07,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -1149,54 +720,6 @@ v_cvt_pknorm_u16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 v_cvt_pknorm_u16_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x03,0x22,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] -v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x2f,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_cvt_pkrtz_f16_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x01,0x2f,0xd5,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] - -v_cvt_pkrtz_f16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x02,0x2f,0xd5,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] - -v_cvt_pkrtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x83,0x2f,0xd5,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] - -v_cvt_rpi_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x8c,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_rpi_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x8c,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_rpi_i32_f32_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x01,0x8c,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] - -v_cvt_u16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xd2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_u16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xd2,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_u16_f16_e64_dpp v255, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xd2,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] - -v_cvt_u32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x87,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_u32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x87,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_u32_f32_e64_dpp v255, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0x87,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] - -v_cvt_u32_u16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xeb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_u32_u16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xeb,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_cvt_u32_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0xeb,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] - v_div_fixup_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -1233,81 +756,6 @@ v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x87,0x54,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] -v_exp_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xd8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_exp_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xd8,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_exp_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xd8,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_exp_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xd8,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_exp_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xa5,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_exp_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xa5,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_exp_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xa5,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_exp_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xa5,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_ffbh_i32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xbb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_ffbh_i32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xbb,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_ffbh_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0xbb,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] - -v_ffbh_u32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xb9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_ffbh_u32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xb9,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_ffbh_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0xb9,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] - -v_ffbl_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xba,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_ffbl_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xba,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_ffbl_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0xba,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] - -v_floor_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_floor_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_floor_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xdb,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_floor_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xdb,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_floor_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xa4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_floor_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xa4,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_floor_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xa4,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_floor_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xa4,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - v_fma_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -1380,84 +828,6 @@ v_fma_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_fma_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x87,0x13,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] -v_fract_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xdf,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_fract_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xdf,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_fract_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xdf,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_fract_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xdf,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_fract_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xa0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_fract_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xa0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_fract_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xa0,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_fract_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xa0,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_frexp_exp_i16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xda,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_frexp_exp_i16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xda,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_frexp_exp_i16_f16_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x01,0xda,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] - -v_frexp_exp_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xbf,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_frexp_exp_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xbf,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_frexp_exp_i32_f32_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x01,0xbf,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] - -v_frexp_mant_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xd9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_frexp_mant_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xd9,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_frexp_mant_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xd9,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_frexp_mant_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xd9,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_frexp_mant_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xc0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_frexp_mant_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xc0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_frexp_mant_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xc0,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_frexp_mant_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xc0,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_ldexp_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_ldexp_f16_e64_dpp v5, v1, v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x08,0x01,0x77,0x39,0x05] - -v_ldexp_f16_e64_dpp v5, v1, v2 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x3b,0xd5,0xea,0x04,0x02,0x10,0x01,0x77,0x39,0x05] - -v_ldexp_f16_e64_dpp v255, -|v255|, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0x3b,0xd5,0xe9,0xfe,0x03,0x38,0xff,0x00,0x00,0x00] - v_ldexp_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x1c,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -1506,30 +876,6 @@ v_lerp_u8_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_lerp_u8_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x15,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_log_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xd7,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_log_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xd7,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_log_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xd7,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_log_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xd7,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_log_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xa7,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_log_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xa7,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_log_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xa7,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_log_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xa7,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - v_lshl_add_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x46,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -1611,15 +957,6 @@ v_lshlrev_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_lshlrev_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x38,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_lshlrev_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x18,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_lshlrev_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x18,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_lshlrev_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0x18,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - v_lshrrev_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x39,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -1629,15 +966,6 @@ v_lshrrev_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_lshrrev_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x39,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_lshrrev_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x19,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_lshrrev_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x19,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_lshrrev_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0x19,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - v_mad_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -2058,30 +1386,6 @@ v_max3_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_max3_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x1e,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_max_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x39,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_max_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x01,0x39,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] - -v_max_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x02,0x39,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] - -v_max_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x83,0x39,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] - -v_max_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x10,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_max_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x01,0x10,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] - -v_max_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x02,0x10,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] - -v_max_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x83,0x10,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] - v_max_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x0a,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -2091,15 +1395,6 @@ v_max_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_max_i16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x0a,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_max_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x12,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_max_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x12,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_max_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0x12,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - v_max_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x09,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -2109,15 +1404,6 @@ v_max_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_max_u16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x09,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_max_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x14,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_max_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x14,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_max_u32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0x14,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - v_maxmin_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x60,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -2700,30 +1986,6 @@ v_min3_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_min3_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x1b,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_min_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x3a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_min_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x01,0x3a,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] - -v_min_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x02,0x3a,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] - -v_min_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x83,0x3a,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] - -v_min_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x0f,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_min_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x01,0x0f,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] - -v_min_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x02,0x0f,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] - -v_min_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x83,0x0f,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] - v_min_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x0c,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -2733,15 +1995,6 @@ v_min_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_min_i16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x0c,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_min_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x11,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_min_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x11,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_min_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0x11,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - v_min_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x0b,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -2751,15 +2004,6 @@ v_min_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_min_u16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x0b,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_min_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x13,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_min_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x13,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_min_u32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0x13,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - v_minmax_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x61,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -2904,51 +2148,6 @@ v_minmax_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_minmax_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x63,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_mov_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_mov_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x81,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_mov_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0x81,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] - -v_movreld_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_movreld_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xc2,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_movreld_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] - -v_movrels_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_movrels_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xc3,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_movrels_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] - -v_movrelsd_2_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_movrelsd_2_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xc8,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_movrelsd_2_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] - -v_movrelsd_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_movrelsd_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xc4,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_movrelsd_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] - v_msad_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x39,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -2985,81 +2184,6 @@ v_msad_u8_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_msad_u8_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x80,0x39,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x07,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_mul_dx9_zero_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x01,0x07,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] - -v_mul_dx9_zero_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x02,0x07,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] - -v_mul_dx9_zero_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x83,0x07,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] - -v_mul_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x35,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_mul_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x01,0x35,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] - -v_mul_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x02,0x35,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] - -v_mul_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x83,0x35,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] - -v_mul_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x08,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_mul_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x01,0x08,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] - -v_mul_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x02,0x08,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] - -v_mul_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x83,0x08,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] - -v_mul_hi_i32_i24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x0a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_mul_hi_i32_i24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x0a,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_mul_hi_i32_i24_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0x0a,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - -v_mul_hi_u32_u24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x0c,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_mul_hi_u32_u24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x0c,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_mul_hi_u32_u24_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0x0c,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - -v_mul_i32_i24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x09,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_mul_i32_i24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x09,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_mul_i32_i24_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x80,0x09,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - -v_mul_legacy_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x07,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_mul_legacy_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x01,0x07,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] - -v_mul_legacy_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x02,0x07,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] - -v_mul_legacy_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x83,0x07,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] - v_mul_lo_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x05,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -3069,15 +2193,6 @@ v_mul_lo_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_mul_lo_u16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x05,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_mul_u32_u24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x0b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_mul_u32_u24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x0b,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_mul_u32_u24_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x80,0x0b,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - v_mullit_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x18,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -3114,24 +2229,6 @@ v_mullit_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_mullit_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x87,0x18,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] -v_not_b16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xe9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_not_b16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xe9,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_not_b16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0xe9,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] - -v_not_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xb7,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_not_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xb7,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_not_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0xb7,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] - v_or3_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x58,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -3177,15 +2274,6 @@ v_or_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_or_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x63,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_or_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x1c,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_or_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x1c,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_or_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0x1c,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - v_pack_b32_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x11,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -3234,90 +2322,6 @@ v_perm_b32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_perm_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x44,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_rcp_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xd4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_rcp_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xd4,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_rcp_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xd4,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_rcp_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xd4,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_rcp_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xaa,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_rcp_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xaa,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_rcp_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xaa,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_rcp_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xaa,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_rcp_iflag_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xab,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_rcp_iflag_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xab,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_rcp_iflag_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xab,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_rcp_iflag_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xab,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_rndne_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xde,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_rndne_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xde,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_rndne_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xde,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_rndne_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xde,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_rndne_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xa3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_rndne_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xa3,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_rndne_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xa3,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_rndne_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xa3,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_rsq_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xd6,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_rsq_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xd6,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_rsq_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xd6,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_rsq_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xd6,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_rsq_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xae,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_rsq_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xae,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_rsq_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xae,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_rsq_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xae,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - v_sad_hi_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x23,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -3462,102 +2466,6 @@ v_sad_u8_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_sad_u8_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x80,0x22,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_sat_pk_u8_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xe2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_sat_pk_u8_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xe2,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_sat_pk_u8_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0xe2,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] - -v_sin_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xe0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_sin_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xe0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_sin_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xe0,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_sin_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xe0,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_sin_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xb5,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_sin_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xb5,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_sin_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xb5,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_sin_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xb5,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_sqrt_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xd5,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_sqrt_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xd5,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_sqrt_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xd5,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_sqrt_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xd5,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_sqrt_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xb3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_sqrt_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xb3,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_sqrt_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xb3,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_sqrt_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xb3,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] -// W32: [0x05,0x06,0x21,0xd5,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, s105, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] -// W32: [0x05,0x69,0x21,0xd5,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] -// W32: [0x05,0x6a,0x21,0xd5,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, vcc_hi, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] -// W32: [0x05,0x6b,0x21,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, ttmp15, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// W32: [0x05,0x7b,0x21,0xd5,0xea,0x04,0xee,0x01,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] -// W64: [0x05,0x0c,0x21,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] -// W64: [0x05,0x68,0x21,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] -// W64: [0x05,0x6a,0x21,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 -// W64: [0x05,0x7a,0x21,0xd5,0xea,0x04,0xea,0x01,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_sub_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0xfc,0x21,0xd5,0xe9,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] - v_sub_co_u32_e64_dpp v5, s6, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x06,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -3597,30 +2505,6 @@ v_sub_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_sub_co_u32_e64_dpp v255, null, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0xfc,0x01,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_sub_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x33,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_sub_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x01,0x33,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] - -v_sub_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x02,0x33,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] - -v_sub_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x83,0x33,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] - -v_sub_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x04,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_sub_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x01,0x04,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] - -v_sub_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x02,0x04,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] - -v_sub_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x83,0x04,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] - v_sub_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x0e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -3648,54 +2532,6 @@ v_sub_nc_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_sub_nc_u16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x80,0x04,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_sub_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x26,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_sub_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x26,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_sub_nc_u32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x80,0x26,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - -v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] -// W32: [0x05,0x06,0x22,0xd5,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s105, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] -// W32: [0x05,0x69,0x22,0xd5,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] -// W32: [0x05,0x6a,0x22,0xd5,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, vcc_hi, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] -// W32: [0x05,0x6b,0x22,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, ttmp15, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// W32: [0x05,0x7b,0x22,0xd5,0xea,0x04,0xee,0x01,0x01,0x77,0x39,0x05] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] -// W64: [0x05,0x0c,0x22,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] -// W64: [0x05,0x68,0x22,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] -// W64: [0x05,0x6a,0x22,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 -// W64: [0x05,0x7a,0x22,0xd5,0xea,0x04,0xea,0x01,0x01,0x77,0x39,0x05] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_subrev_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0xfc,0x22,0xd5,0xe9,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] - v_subrev_co_u32_e64_dpp v5, s6, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x06,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -3735,63 +2571,6 @@ v_subrev_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_subrev_co_u32_e64_dpp v255, null, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0xfc,0x02,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_subrev_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x34,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_subrev_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x01,0x34,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] - -v_subrev_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x02,0x34,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] - -v_subrev_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x83,0x34,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] - -v_subrev_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x05,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_subrev_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x01,0x05,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] - -v_subrev_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x02,0x05,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] - -v_subrev_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x83,0x05,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] - -v_subrev_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x27,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_subrev_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x27,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_subrev_nc_u32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x80,0x27,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - -v_trunc_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xdd,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_trunc_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xdd,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_trunc_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xdd,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_trunc_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xdd,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - -v_trunc_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xa1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] - -v_trunc_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0xa1,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] - -v_trunc_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0xa1,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] - -v_trunc_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x81,0xa1,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] - v_xad_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x45,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -3828,15 +2607,6 @@ v_xad_u32_e64_dpp v5, v1, v2, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_xad_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x45,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_xnor_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x1e,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_xnor_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x1e,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_xnor_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0x1e,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - v_xor3_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x00,0x40,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -3882,15 +2652,6 @@ v_xor_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_xor_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x00,0x64,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_xor_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x1d,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_xor_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x00,0x1d,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_xor_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x00,0x1d,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] - v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x58,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -3957,30 +2718,6 @@ v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 // GFX11: [0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] -v_fmac_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x36,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_fmac_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x01,0x36,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] - -v_fmac_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x02,0x36,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] - -v_fmac_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x83,0x36,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] - -v_fmac_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x00,0x2b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] - -v_fmac_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: [0x05,0x01,0x2b,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] - -v_fmac_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: [0x05,0x02,0x2b,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] - -v_fmac_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: [0xff,0x83,0x2b,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] - v_mad_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] // GFX11: [0x05,0x78,0x53,0xd6,0xe9,0x04,0xfe,0x01,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s new file mode 100644 index 0000000000000..f8478673e4fc2 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop1.s @@ -0,0 +1,718 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX11 %s + +v_bfrev_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xb8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_bfrev_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xb8,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_bfrev_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0xb8,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +v_ceil_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_ceil_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_ceil_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xdc,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_ceil_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xdc,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_ceil_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xa2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_ceil_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xa2,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_ceil_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xa2,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_ceil_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xa2,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_cls_i32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xbb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cls_i32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xbb,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cls_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0xbb,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +v_clz_i32_u32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xb9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_clz_i32_u32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xb9,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_clz_i32_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0xb9,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +v_cos_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xe1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cos_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xe1,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_cos_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xe1,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_cos_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xe1,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_cos_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xb6,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cos_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xb6,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_cos_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xb6,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_cos_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xb6,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_ctz_i32_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xba,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_ctz_i32_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xba,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_ctz_i32_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0xba,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +v_cvt_f16_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_f16_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_cvt_f16_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x8a,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_cvt_f16_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0x8a,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_cvt_f16_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_f16_i16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_cvt_f16_i16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xd1,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_cvt_f16_i16_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0xd1,0xd5,0xe9,0x00,0x00,0x18,0xff,0x00,0x00,0x00] + +v_cvt_f16_u16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xd0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_f16_u16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xd0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_cvt_f16_u16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xd0,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_cvt_f16_u16_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0xd0,0xd5,0xe9,0x00,0x00,0x18,0xff,0x00,0x00,0x00] + +v_cvt_f32_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_f32_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_cvt_f32_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x8b,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_cvt_f32_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0x8b,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_cvt_f32_i32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_f32_i32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_cvt_f32_i32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x85,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_cvt_f32_i32_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x85,0xd5,0xe9,0x00,0x00,0x18,0xff,0x00,0x00,0x00] + +v_cvt_f32_u32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x86,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_f32_u32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x86,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_cvt_f32_u32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x86,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_cvt_f32_u32_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x86,0xd5,0xe9,0x00,0x00,0x18,0xff,0x00,0x00,0x00] + +v_cvt_f32_ubyte0_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x91,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_f32_ubyte0_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x91,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_cvt_f32_ubyte0_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x91,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_cvt_f32_ubyte0_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x91,0xd5,0xe9,0x00,0x00,0x18,0xff,0x00,0x00,0x00] + +v_cvt_f32_ubyte1_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x92,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_f32_ubyte1_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x92,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_cvt_f32_ubyte1_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x92,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_cvt_f32_ubyte1_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x92,0xd5,0xe9,0x00,0x00,0x18,0xff,0x00,0x00,0x00] + +v_cvt_f32_ubyte2_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x93,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_f32_ubyte2_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x93,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_cvt_f32_ubyte2_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x93,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_cvt_f32_ubyte2_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x93,0xd5,0xe9,0x00,0x00,0x18,0xff,0x00,0x00,0x00] + +v_cvt_f32_ubyte3_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x94,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_f32_ubyte3_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x94,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_cvt_f32_ubyte3_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x94,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_cvt_f32_ubyte3_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x94,0xd5,0xe9,0x00,0x00,0x18,0xff,0x00,0x00,0x00] + +v_cvt_floor_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x8d,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_floor_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x8d,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_floor_i32_f32_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x01,0x8d,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] + +v_cvt_flr_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x8d,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_flr_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x8d,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_flr_i32_f32_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x01,0x8d,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] + +v_cvt_i16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xd3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_i16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xd3,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_i16_f16_e64_dpp v255, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xd3,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] + +v_cvt_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x88,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x88,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_i32_f32_e64_dpp v255, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0x88,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] + +v_cvt_i32_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xea,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_i32_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xea,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_i32_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0xea,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +v_cvt_nearest_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x8c,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_nearest_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x8c,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_nearest_i32_f32_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x01,0x8c,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] + +v_cvt_norm_i16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xe3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_norm_i16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xe3,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_norm_i16_f16_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x01,0xe3,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] + +v_cvt_norm_u16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xe4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_norm_u16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xe4,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_norm_u16_f16_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x01,0xe4,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] + +v_cvt_off_f32_i4_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x8e,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_off_f32_i4_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x8e,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_cvt_off_f32_i4_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x8e,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_cvt_off_f32_i4_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x8e,0xd5,0xe9,0x00,0x00,0x18,0xff,0x00,0x00,0x00] + +v_cvt_rpi_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x8c,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_rpi_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x8c,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_rpi_i32_f32_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x01,0x8c,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] + +v_cvt_u16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xd2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_u16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xd2,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_u16_f16_e64_dpp v255, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xd2,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] + +v_cvt_u32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x87,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_u32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x87,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_u32_f32_e64_dpp v255, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0x87,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] + +v_cvt_u32_u16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xeb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_u32_u16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xeb,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_u32_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0xeb,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +v_exp_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xd8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_exp_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xd8,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_exp_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xd8,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_exp_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xd8,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_exp_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xa5,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_exp_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xa5,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_exp_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xa5,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_exp_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xa5,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_ffbh_i32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xbb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_ffbh_i32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xbb,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_ffbh_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0xbb,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +v_ffbh_u32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xb9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_ffbh_u32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xb9,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_ffbh_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0xb9,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +v_ffbl_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xba,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_ffbl_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xba,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_ffbl_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0xba,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +v_floor_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_floor_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_floor_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xdb,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_floor_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xdb,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_floor_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xa4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_floor_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xa4,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_floor_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xa4,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_floor_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xa4,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_fract_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xdf,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_fract_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xdf,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_fract_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xdf,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_fract_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xdf,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_fract_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xa0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_fract_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xa0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_fract_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xa0,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_fract_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xa0,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_frexp_exp_i16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xda,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_frexp_exp_i16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xda,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_frexp_exp_i16_f16_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x01,0xda,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] + +v_frexp_exp_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xbf,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_frexp_exp_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xbf,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_frexp_exp_i32_f32_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x01,0xbf,0xd5,0xe9,0x00,0x00,0x20,0xff,0x00,0x00,0x00] + +v_frexp_mant_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xd9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_frexp_mant_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xd9,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_frexp_mant_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xd9,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_frexp_mant_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xd9,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_frexp_mant_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xc0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_frexp_mant_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xc0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_frexp_mant_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xc0,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_frexp_mant_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xc0,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_log_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xd7,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_log_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xd7,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_log_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xd7,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_log_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xd7,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_log_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xa7,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_log_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xa7,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_log_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xa7,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_log_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xa7,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_mov_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_mov_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x81,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_mov_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x81,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +v_movreld_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_movreld_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xc2,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_movreld_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +v_movrels_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_movrels_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xc3,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_movrels_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +v_movrelsd_2_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_movrelsd_2_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xc8,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_movrelsd_2_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +v_movrelsd_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_movrelsd_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xc4,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_movrelsd_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +v_not_b16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xe9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_not_b16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xe9,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_not_b16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0xe9,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +v_not_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xb7,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_not_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xb7,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_not_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0xb7,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +v_rcp_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xd4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_rcp_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xd4,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_rcp_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xd4,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_rcp_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xd4,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_rcp_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xaa,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_rcp_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xaa,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_rcp_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xaa,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_rcp_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xaa,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_rcp_iflag_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xab,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_rcp_iflag_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xab,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_rcp_iflag_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xab,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_rcp_iflag_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xab,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_rndne_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xde,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_rndne_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xde,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_rndne_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xde,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_rndne_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xde,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_rndne_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xa3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_rndne_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xa3,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_rndne_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xa3,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_rndne_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xa3,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_rsq_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xd6,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_rsq_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xd6,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_rsq_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xd6,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_rsq_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xd6,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_rsq_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xae,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_rsq_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xae,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_rsq_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xae,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_rsq_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xae,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_sat_pk_u8_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xe2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_sat_pk_u8_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xe2,0xd5,0xea,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_sat_pk_u8_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0xe2,0xd5,0xe9,0x00,0x00,0x00,0xff,0x00,0x00,0x00] + +v_sin_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xe0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_sin_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xe0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_sin_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xe0,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_sin_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xe0,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_sin_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xb5,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_sin_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xb5,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_sin_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xb5,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_sin_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xb5,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_sqrt_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xd5,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_sqrt_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xd5,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_sqrt_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xd5,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_sqrt_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xd5,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_sqrt_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xb3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_sqrt_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xb3,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_sqrt_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xb3,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_sqrt_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xb3,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_trunc_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xdd,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_trunc_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xdd,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_trunc_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xdd,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_trunc_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xdd,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] + +v_trunc_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xa1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] + +v_trunc_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0xa1,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] + +v_trunc_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0xa1,0xd5,0xea,0x00,0x00,0x10,0x01,0x77,0x39,0x05] + +v_trunc_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0xa1,0xd5,0xe9,0x00,0x00,0x38,0xff,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop2.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop2.s new file mode 100644 index 0000000000000..6e1a65d7d3210 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8_from_vop2.s @@ -0,0 +1,550 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=W64-ERR --implicit-check-not=error: %s + +v_add_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x06,0x20,0xd5,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s105, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x69,0x20,0xd5,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x6a,0x20,0xd5,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, vcc_hi, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x6b,0x20,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, ttmp15, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: [0x05,0x7b,0x20,0xd5,0xea,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x0c,0x20,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x68,0x20,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x6a,0x20,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: [0x05,0x7a,0x20,0xd5,0xea,0x04,0xea,0x01,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0xfc,0x20,0xd5,0xe9,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] + +v_add_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x32,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x32,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] + +v_add_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x32,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] + +v_add_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x83,0x32,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] + +v_add_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x03,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x03,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] + +v_add_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x03,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] + +v_add_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x83,0x03,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] + +v_add_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x25,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x25,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_u32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x25,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_and_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_and_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x1b,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_and_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x1b,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_ashrrev_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_ashrrev_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x1a,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_ashrrev_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x1a,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_cndmask_b32_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x01,0xd5,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, |v1|, -v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x01,0x01,0xd5,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, -v1, |v2|, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: [0x05,0x02,0x01,0xd5,0xea,0x04,0xee,0x21,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x00,0x01,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, |v1|, -v2, vcc dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x01,0x01,0xd5,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, -v1, |v2|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: [0x05,0x02,0x01,0xd5,0xea,0x04,0xea,0x21,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v255, -|v255|, -|v255|, null dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x03,0x01,0xd5,0xe9,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00] + +v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x2f,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_rtz_f16_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x2f,0xd5,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_rtz_f16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x2f,0xd5,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_rtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x83,0x2f,0xd5,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x2f,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pkrtz_f16_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x2f,0xd5,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pkrtz_f16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x2f,0xd5,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pkrtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x83,0x2f,0xd5,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_fmac_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x36,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_fmac_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x36,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] + +v_fmac_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x36,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] + +v_fmac_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x83,0x36,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] + +v_fmac_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x2b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_fmac_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x2b,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] + +v_fmac_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x2b,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] + +v_fmac_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x83,0x2b,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] + +v_ldexp_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_ldexp_f16_e64_dpp v5, v1, v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x08,0x01,0x77,0x39,0x05] + +v_ldexp_f16_e64_dpp v5, v1, v2 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x3b,0xd5,0xea,0x04,0x02,0x10,0x01,0x77,0x39,0x05] + +v_ldexp_f16_e64_dpp v255, -|v255|, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x81,0x3b,0xd5,0xe9,0xfe,0x03,0x38,0xff,0x00,0x00,0x00] + +v_lshlrev_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x18,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_lshlrev_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x18,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_lshlrev_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x18,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_lshrrev_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x19,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_lshrrev_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x19,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_lshrrev_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x19,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_max_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x39,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_max_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x39,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] + +v_max_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x39,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] + +v_max_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x83,0x39,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] + +v_max_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x10,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_max_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x10,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] + +v_max_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x10,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] + +v_max_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x83,0x10,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] + +v_max_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x12,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_max_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x12,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_max_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x12,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_max_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x14,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_max_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x14,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_max_u32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x14,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_min_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x3a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_min_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x3a,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] + +v_min_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x3a,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] + +v_min_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x83,0x3a,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] + +v_min_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0f,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_min_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x0f,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] + +v_min_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x0f,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] + +v_min_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x83,0x0f,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] + +v_min_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x11,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_min_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x11,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_min_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x11,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_min_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x13,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_min_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x13,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_min_u32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x13,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x07,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mul_dx9_zero_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x07,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] + +v_mul_dx9_zero_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x07,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] + +v_mul_dx9_zero_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x83,0x07,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] + +v_mul_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x35,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mul_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x35,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] + +v_mul_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x35,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] + +v_mul_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x83,0x35,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] + +v_mul_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x08,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mul_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x08,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] + +v_mul_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x08,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] + +v_mul_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x83,0x08,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] + +v_mul_hi_i32_i24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mul_hi_i32_i24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x0a,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mul_hi_i32_i24_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x0a,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_mul_hi_u32_u24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0c,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mul_hi_u32_u24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x0c,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mul_hi_u32_u24_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x0c,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_mul_i32_i24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x09,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mul_i32_i24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x09,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mul_i32_i24_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x09,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_mul_legacy_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x07,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mul_legacy_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x07,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] + +v_mul_legacy_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x07,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] + +v_mul_legacy_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x83,0x07,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] + +v_mul_u32_u24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x0b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mul_u32_u24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x0b,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_mul_u32_u24_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x0b,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_or_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1c,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_or_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x1c,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_or_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x1c,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_sub_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x06,0x21,0xd5,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s105, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x69,0x21,0xd5,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x6a,0x21,0xd5,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, vcc_hi, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x6b,0x21,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, ttmp15, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: [0x05,0x7b,0x21,0xd5,0xea,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x0c,0x21,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x68,0x21,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x6a,0x21,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: [0x05,0x7a,0x21,0xd5,0xea,0x04,0xea,0x01,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0xfc,0x21,0xd5,0xe9,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] + +v_sub_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x33,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x33,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] + +v_sub_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x33,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] + +v_sub_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x83,0x33,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] + +v_sub_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x04,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x04,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] + +v_sub_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x04,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] + +v_sub_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x83,0x04,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] + +v_sub_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x26,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x26,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_u32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x26,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_subrev_co_ci_u32_e64_dpp v5, s6, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x06,0x22,0xd5,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s105, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x69,0x22,0xd5,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x6a,0x22,0xd5,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, vcc_hi, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x6b,0x22,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, ttmp15, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: [0x05,0x7b,0x22,0xd5,0xea,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x0c,0x22,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x68,0x22,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x6a,0x22,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: [0x05,0x7a,0x22,0xd5,0xea,0x04,0xea,0x01,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0xfc,0x22,0xd5,0xe9,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] + +v_subrev_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x34,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_subrev_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x34,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] + +v_subrev_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x34,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] + +v_subrev_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x83,0x34,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] + +v_subrev_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x05,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_subrev_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x01,0x05,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] + +v_subrev_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x02,0x05,0xd5,0xea,0x04,0x02,0x30,0x01,0x77,0x39,0x05] + +v_subrev_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x83,0x05,0xd5,0xe9,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] + +v_subrev_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x27,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_subrev_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x27,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_subrev_nc_u32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x80,0x27,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_xnor_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1e,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_xnor_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x1e,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_xnor_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x1e,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] + +v_xor_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: [0x05,0x00,0x1d,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_xor_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: [0x05,0x00,0x1d,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_xor_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: [0xff,0x00,0x1d,0xd5,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s new file mode 100644 index 0000000000000..a67cb0bf4cf85 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1.s @@ -0,0 +1,3508 @@ +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX11 %s + +v_bfrev_b32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x01,0x01,0x00,0x00] + +v_bfrev_b32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0xff,0x01,0x00,0x00] + +v_bfrev_b32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x01,0x00,0x00,0x00] + +v_bfrev_b32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x69,0x00,0x00,0x00] + +v_bfrev_b32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x6a,0x00,0x00,0x00] + +v_bfrev_b32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x6b,0x00,0x00,0x00] + +v_bfrev_b32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x7b,0x00,0x00,0x00] + +v_bfrev_b32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x7d,0x00,0x00,0x00] + +v_bfrev_b32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x7e,0x00,0x00,0x00] + +v_bfrev_b32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x7f,0x00,0x00,0x00] + +v_bfrev_b32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0x7c,0x00,0x00,0x00] + +v_bfrev_b32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0xc1,0x00,0x00,0x00] + +v_bfrev_b32_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0xf0,0x00,0x00,0x00] + +v_bfrev_b32_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0xb8,0xd5,0xfd,0x00,0x00,0x00] + +v_bfrev_b32_e64 v255, 0xaf123456 +// GFX11: encoding: [0xff,0x00,0xb8,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_ceil_f16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x01,0x01,0x00,0x00] + +v_ceil_f16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0xff,0x01,0x00,0x00] + +v_ceil_f16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x01,0x00,0x00,0x00] + +v_ceil_f16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x69,0x00,0x00,0x00] + +v_ceil_f16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x6a,0x00,0x00,0x00] + +v_ceil_f16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x6b,0x00,0x00,0x00] + +v_ceil_f16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x7b,0x00,0x00,0x00] + +v_ceil_f16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x7d,0x00,0x00,0x00] + +v_ceil_f16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x7e,0x00,0x00,0x00] + +v_ceil_f16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x7f,0x00,0x00,0x00] + +v_ceil_f16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0x7c,0x00,0x00,0x00] + +v_ceil_f16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0xc1,0x00,0x00,0x00] + +v_ceil_f16_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0xf0,0x00,0x00,0x08] + +v_ceil_f16_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xdc,0xd5,0xfd,0x00,0x00,0x10] + +v_ceil_f16_e64 v255, -|0xfe0b| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xdc,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] + +v_ceil_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x01,0x01,0x00,0x00] + +v_ceil_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0xff,0x01,0x00,0x00] + +v_ceil_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x01,0x00,0x00,0x00] + +v_ceil_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x69,0x00,0x00,0x00] + +v_ceil_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x6a,0x00,0x00,0x00] + +v_ceil_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x6b,0x00,0x00,0x00] + +v_ceil_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x7b,0x00,0x00,0x00] + +v_ceil_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x7d,0x00,0x00,0x00] + +v_ceil_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x7e,0x00,0x00,0x00] + +v_ceil_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x7f,0x00,0x00,0x00] + +v_ceil_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0x7c,0x00,0x00,0x00] + +v_ceil_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0xc1,0x00,0x00,0x00] + +v_ceil_f32_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0xf0,0x00,0x00,0x08] + +v_ceil_f32_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xa2,0xd5,0xfd,0x00,0x00,0x10] + +v_ceil_f32_e64 v255, -|0xaf123456| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xa2,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] + +v_ceil_f64_e64 v[5:6], v[1:2] +// GFX11: encoding: [0x05,0x00,0x98,0xd5,0x01,0x01,0x00,0x00] + +v_ceil_f64_e64 v[5:6], v[254:255] +// GFX11: encoding: [0x05,0x00,0x98,0xd5,0xfe,0x01,0x00,0x00] + +v_ceil_f64_e64 v[5:6], s[2:3] +// GFX11: encoding: [0x05,0x00,0x98,0xd5,0x02,0x00,0x00,0x00] + +v_ceil_f64_e64 v[5:6], s[104:105] +// GFX11: encoding: [0x05,0x00,0x98,0xd5,0x68,0x00,0x00,0x00] + +v_ceil_f64_e64 v[5:6], vcc +// GFX11: encoding: [0x05,0x00,0x98,0xd5,0x6a,0x00,0x00,0x00] + +v_ceil_f64_e64 v[5:6], ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x98,0xd5,0x7a,0x00,0x00,0x00] + +v_ceil_f64_e64 v[5:6], exec +// GFX11: encoding: [0x05,0x00,0x98,0xd5,0x7e,0x00,0x00,0x00] + +v_ceil_f64_e64 v[5:6], null +// GFX11: encoding: [0x05,0x00,0x98,0xd5,0x7c,0x00,0x00,0x00] + +v_ceil_f64_e64 v[5:6], -1 +// GFX11: encoding: [0x05,0x00,0x98,0xd5,0xc1,0x00,0x00,0x00] + +v_ceil_f64_e64 v[5:6], 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x98,0xd5,0xf0,0x00,0x00,0x08] + +v_ceil_f64_e64 v[5:6], -|src_scc| mul:4 +// GFX11: encoding: [0x05,0x01,0x98,0xd5,0xfd,0x00,0x00,0x30] + +v_ceil_f64_e64 v[254:255], 0xaf123456 clamp div:2 +// GFX11: encoding: [0xfe,0x80,0x98,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] + +v_cls_i32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x01,0x01,0x00,0x00] + +v_cls_i32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0xff,0x01,0x00,0x00] + +v_cls_i32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x01,0x00,0x00,0x00] + +v_cls_i32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x69,0x00,0x00,0x00] + +v_cls_i32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x6a,0x00,0x00,0x00] + +v_cls_i32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x6b,0x00,0x00,0x00] + +v_cls_i32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7b,0x00,0x00,0x00] + +v_cls_i32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7d,0x00,0x00,0x00] + +v_cls_i32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7e,0x00,0x00,0x00] + +v_cls_i32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7f,0x00,0x00,0x00] + +v_cls_i32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7c,0x00,0x00,0x00] + +v_cls_i32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0xc1,0x00,0x00,0x00] + +v_cls_i32_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0xf0,0x00,0x00,0x00] + +v_cls_i32_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0xfd,0x00,0x00,0x00] + +v_cls_i32_e64 v255, 0xaf123456 +// GFX11: encoding: [0xff,0x00,0xbb,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_clz_i32_u32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x01,0x01,0x00,0x00] + +v_clz_i32_u32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0xff,0x01,0x00,0x00] + +v_clz_i32_u32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x01,0x00,0x00,0x00] + +v_clz_i32_u32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x69,0x00,0x00,0x00] + +v_clz_i32_u32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x6a,0x00,0x00,0x00] + +v_clz_i32_u32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x6b,0x00,0x00,0x00] + +v_clz_i32_u32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7b,0x00,0x00,0x00] + +v_clz_i32_u32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7d,0x00,0x00,0x00] + +v_clz_i32_u32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7e,0x00,0x00,0x00] + +v_clz_i32_u32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7f,0x00,0x00,0x00] + +v_clz_i32_u32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7c,0x00,0x00,0x00] + +v_clz_i32_u32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0xc1,0x00,0x00,0x00] + +v_clz_i32_u32_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0xf0,0x00,0x00,0x00] + +v_clz_i32_u32_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0xfd,0x00,0x00,0x00] + +v_clz_i32_u32_e64 v255, 0xaf123456 +// GFX11: encoding: [0xff,0x00,0xb9,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cos_f16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x01,0x01,0x00,0x00] + +v_cos_f16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0xff,0x01,0x00,0x00] + +v_cos_f16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x01,0x00,0x00,0x00] + +v_cos_f16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x69,0x00,0x00,0x00] + +v_cos_f16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x6a,0x00,0x00,0x00] + +v_cos_f16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x6b,0x00,0x00,0x00] + +v_cos_f16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x7b,0x00,0x00,0x00] + +v_cos_f16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x7d,0x00,0x00,0x00] + +v_cos_f16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x7e,0x00,0x00,0x00] + +v_cos_f16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x7f,0x00,0x00,0x00] + +v_cos_f16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0x7c,0x00,0x00,0x00] + +v_cos_f16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0xc1,0x00,0x00,0x00] + +v_cos_f16_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0xf0,0x00,0x00,0x08] + +v_cos_f16_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xe1,0xd5,0xfd,0x00,0x00,0x10] + +v_cos_f16_e64 v255, -|0xfe0b| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xe1,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] + +v_cos_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x01,0x01,0x00,0x00] + +v_cos_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0xff,0x01,0x00,0x00] + +v_cos_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x01,0x00,0x00,0x00] + +v_cos_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x69,0x00,0x00,0x00] + +v_cos_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x6a,0x00,0x00,0x00] + +v_cos_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x6b,0x00,0x00,0x00] + +v_cos_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x7b,0x00,0x00,0x00] + +v_cos_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x7d,0x00,0x00,0x00] + +v_cos_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x7e,0x00,0x00,0x00] + +v_cos_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x7f,0x00,0x00,0x00] + +v_cos_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0x7c,0x00,0x00,0x00] + +v_cos_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0xc1,0x00,0x00,0x00] + +v_cos_f32_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0xf0,0x00,0x00,0x08] + +v_cos_f32_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xb6,0xd5,0xfd,0x00,0x00,0x10] + +v_cos_f32_e64 v255, -|0xaf123456| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xb6,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] + +v_ctz_i32_b32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x01,0x01,0x00,0x00] + +v_ctz_i32_b32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0xff,0x01,0x00,0x00] + +v_ctz_i32_b32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x01,0x00,0x00,0x00] + +v_ctz_i32_b32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x69,0x00,0x00,0x00] + +v_ctz_i32_b32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x6a,0x00,0x00,0x00] + +v_ctz_i32_b32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x6b,0x00,0x00,0x00] + +v_ctz_i32_b32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7b,0x00,0x00,0x00] + +v_ctz_i32_b32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7d,0x00,0x00,0x00] + +v_ctz_i32_b32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7e,0x00,0x00,0x00] + +v_ctz_i32_b32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7f,0x00,0x00,0x00] + +v_ctz_i32_b32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7c,0x00,0x00,0x00] + +v_ctz_i32_b32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0xc1,0x00,0x00,0x00] + +v_ctz_i32_b32_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0xf0,0x00,0x00,0x00] + +v_ctz_i32_b32_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0xfd,0x00,0x00,0x00] + +v_ctz_i32_b32_e64 v255, 0xaf123456 +// GFX11: encoding: [0xff,0x00,0xba,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_f16_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_f16_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_f16_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_f16_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_f16_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_f16_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_f16_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_f16_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_f16_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_f16_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_f16_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_f16_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_f16_f32_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0xf0,0x00,0x00,0x08] + +v_cvt_f16_f32_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0xfd,0x00,0x00,0x10] + +v_cvt_f16_f32_e64 v255, -|0xaf123456| clamp div:2 +// GFX11: encoding: [0xff,0x81,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] + +v_cvt_f16_i16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_f16_i16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_f16_i16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_f16_i16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_f16_i16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_f16_i16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_f16_i16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_f16_i16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_f16_i16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_f16_i16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_f16_i16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_f16_i16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_f16_i16_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0xff,0x00,0x00,0x08,0x00,0x38,0x00,0x00] + +v_cvt_f16_i16_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xd1,0xd5,0xfd,0x00,0x00,0x10] + +v_cvt_f16_i16_e64 v255, 0xfe0b clamp div:2 +// GFX11: encoding: [0xff,0x80,0xd1,0xd5,0xff,0x00,0x00,0x18,0x0b,0xfe,0x00,0x00] + +v_cvt_f16_u16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_f16_u16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_f16_u16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_f16_u16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_f16_u16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_f16_u16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_f16_u16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_f16_u16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_f16_u16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_f16_u16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_f16_u16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_f16_u16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_f16_u16_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0xff,0x00,0x00,0x08,0x00,0x38,0x00,0x00] + +v_cvt_f16_u16_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xd0,0xd5,0xfd,0x00,0x00,0x10] + +v_cvt_f16_u16_e64 v255, 0xfe0b clamp div:2 +// GFX11: encoding: [0xff,0x80,0xd0,0xd5,0xff,0x00,0x00,0x18,0x0b,0xfe,0x00,0x00] + +v_cvt_f32_f16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_f32_f16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_f32_f16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_f32_f16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_f32_f16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_f32_f16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_f32_f16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_f32_f16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_f32_f16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_f32_f16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_f32_f16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_f32_f16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_f32_f16_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0xf0,0x00,0x00,0x08] + +v_cvt_f32_f16_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0xfd,0x00,0x00,0x10] + +v_cvt_f32_f16_e64 v255, -|0xfe0b| clamp div:2 +// GFX11: encoding: [0xff,0x81,0x8b,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] + +v_cvt_f32_f64_e64 v5, v[1:2] +// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_f32_f64_e64 v5, v[254:255] +// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0xfe,0x01,0x00,0x00] + +v_cvt_f32_f64_e64 v5, s[2:3] +// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0x02,0x00,0x00,0x00] + +v_cvt_f32_f64_e64 v5, s[104:105] +// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0x68,0x00,0x00,0x00] + +v_cvt_f32_f64_e64 v5, vcc +// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_f32_f64_e64 v5, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0x7a,0x00,0x00,0x00] + +v_cvt_f32_f64_e64 v5, exec +// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_f32_f64_e64 v5, null +// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_f32_f64_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_f32_f64_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x8f,0xd5,0xf0,0x00,0x00,0x08] + +v_cvt_f32_f64_e64 v5, -|src_scc| mul:4 +// GFX11: encoding: [0x05,0x01,0x8f,0xd5,0xfd,0x00,0x00,0x30] + +v_cvt_f32_f64_e64 v255, 0xaf123456 clamp div:2 +// GFX11: encoding: [0xff,0x80,0x8f,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] + +v_cvt_f32_i32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_f32_i32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0x85,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_f32_i32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_f32_i32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_f32_i32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_f32_i32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_f32_i32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_f32_i32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_f32_i32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_f32_i32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_f32_i32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0x85,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_f32_i32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0x85,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_f32_i32_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x85,0xd5,0xf0,0x00,0x00,0x08] + +v_cvt_f32_i32_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0x85,0xd5,0xfd,0x00,0x00,0x10] + +v_cvt_f32_i32_e64 v255, 0xaf123456 clamp div:2 +// GFX11: encoding: [0xff,0x80,0x85,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] + +v_cvt_f32_u32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_f32_u32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0x86,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_f32_u32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_f32_u32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_f32_u32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_f32_u32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_f32_u32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_f32_u32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_f32_u32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_f32_u32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_f32_u32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0x86,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_f32_u32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0x86,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_f32_u32_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x86,0xd5,0xf0,0x00,0x00,0x08] + +v_cvt_f32_u32_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0x86,0xd5,0xfd,0x00,0x00,0x10] + +v_cvt_f32_u32_e64 v255, 0xaf123456 clamp div:2 +// GFX11: encoding: [0xff,0x80,0x86,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] + +v_cvt_f32_ubyte0_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_f32_ubyte0_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0x91,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_f32_ubyte0_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_f32_ubyte0_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_f32_ubyte0_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_f32_ubyte0_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_f32_ubyte0_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_f32_ubyte0_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_f32_ubyte0_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_f32_ubyte0_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_f32_ubyte0_e64 v5, null +// GFX11: encoding: [0x05,0x00,0x91,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_f32_ubyte0_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0x91,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_f32_ubyte0_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x91,0xd5,0xf0,0x00,0x00,0x08] + +v_cvt_f32_ubyte0_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0x91,0xd5,0xfd,0x00,0x00,0x10] + +v_cvt_f32_ubyte0_e64 v255, 0xaf123456 clamp div:2 +// GFX11: encoding: [0xff,0x80,0x91,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] + +v_cvt_f32_ubyte1_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_f32_ubyte1_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0x92,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_f32_ubyte1_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_f32_ubyte1_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_f32_ubyte1_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_f32_ubyte1_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_f32_ubyte1_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_f32_ubyte1_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_f32_ubyte1_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_f32_ubyte1_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_f32_ubyte1_e64 v5, null +// GFX11: encoding: [0x05,0x00,0x92,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_f32_ubyte1_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0x92,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_f32_ubyte1_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x92,0xd5,0xf0,0x00,0x00,0x08] + +v_cvt_f32_ubyte1_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0x92,0xd5,0xfd,0x00,0x00,0x10] + +v_cvt_f32_ubyte1_e64 v255, 0xaf123456 clamp div:2 +// GFX11: encoding: [0xff,0x80,0x92,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] + +v_cvt_f32_ubyte2_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_f32_ubyte2_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0x93,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_f32_ubyte2_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_f32_ubyte2_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_f32_ubyte2_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_f32_ubyte2_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_f32_ubyte2_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_f32_ubyte2_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_f32_ubyte2_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_f32_ubyte2_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_f32_ubyte2_e64 v5, null +// GFX11: encoding: [0x05,0x00,0x93,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_f32_ubyte2_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0x93,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_f32_ubyte2_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x93,0xd5,0xf0,0x00,0x00,0x08] + +v_cvt_f32_ubyte2_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0x93,0xd5,0xfd,0x00,0x00,0x10] + +v_cvt_f32_ubyte2_e64 v255, 0xaf123456 clamp div:2 +// GFX11: encoding: [0xff,0x80,0x93,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] + +v_cvt_f32_ubyte3_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_f32_ubyte3_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0x94,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_f32_ubyte3_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_f32_ubyte3_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_f32_ubyte3_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_f32_ubyte3_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_f32_ubyte3_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_f32_ubyte3_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_f32_ubyte3_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_f32_ubyte3_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_f32_ubyte3_e64 v5, null +// GFX11: encoding: [0x05,0x00,0x94,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_f32_ubyte3_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0x94,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_f32_ubyte3_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x94,0xd5,0xf0,0x00,0x00,0x08] + +v_cvt_f32_ubyte3_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0x94,0xd5,0xfd,0x00,0x00,0x10] + +v_cvt_f32_ubyte3_e64 v255, 0xaf123456 clamp div:2 +// GFX11: encoding: [0xff,0x80,0x94,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] + +v_cvt_f64_f32_e64 v[5:6], v1 +// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_f64_f32_e64 v[5:6], v255 +// GFX11: encoding: [0x05,0x00,0x90,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_f64_f32_e64 v[5:6], s1 +// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_f64_f32_e64 v[5:6], s105 +// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_f64_f32_e64 v[5:6], vcc_lo +// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_f64_f32_e64 v[5:6], vcc_hi +// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_f64_f32_e64 v[5:6], ttmp15 +// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_f64_f32_e64 v[5:6], m0 +// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_f64_f32_e64 v[5:6], exec_lo +// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_f64_f32_e64 v[5:6], exec_hi +// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_f64_f32_e64 v[5:6], null +// GFX11: encoding: [0x05,0x00,0x90,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_f64_f32_e64 v[5:6], -1 +// GFX11: encoding: [0x05,0x00,0x90,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_f64_f32_e64 v[5:6], 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x90,0xd5,0xf0,0x00,0x00,0x08] + +v_cvt_f64_f32_e64 v[5:6], src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0x90,0xd5,0xfd,0x00,0x00,0x10] + +v_cvt_f64_f32_e64 v[254:255], -|0xaf123456| clamp div:2 +// GFX11: encoding: [0xfe,0x81,0x90,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] + +v_cvt_f64_i32_e64 v[5:6], v1 +// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_f64_i32_e64 v[5:6], v255 +// GFX11: encoding: [0x05,0x00,0x84,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_f64_i32_e64 v[5:6], s1 +// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_f64_i32_e64 v[5:6], s105 +// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_f64_i32_e64 v[5:6], vcc_lo +// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_f64_i32_e64 v[5:6], vcc_hi +// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_f64_i32_e64 v[5:6], ttmp15 +// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_f64_i32_e64 v[5:6], m0 +// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_f64_i32_e64 v[5:6], exec_lo +// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_f64_i32_e64 v[5:6], exec_hi +// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_f64_i32_e64 v[5:6], null +// GFX11: encoding: [0x05,0x00,0x84,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_f64_i32_e64 v[5:6], -1 +// GFX11: encoding: [0x05,0x00,0x84,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_f64_i32_e64 v[5:6], 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x84,0xd5,0xf0,0x00,0x00,0x08] + +v_cvt_f64_i32_e64 v[5:6], src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0x84,0xd5,0xfd,0x00,0x00,0x10] + +v_cvt_f64_i32_e64 v[254:255], 0xaf123456 clamp div:2 +// GFX11: encoding: [0xfe,0x80,0x84,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] + +v_cvt_f64_u32_e64 v[5:6], v1 +// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_f64_u32_e64 v[5:6], v255 +// GFX11: encoding: [0x05,0x00,0x96,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_f64_u32_e64 v[5:6], s1 +// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_f64_u32_e64 v[5:6], s105 +// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_f64_u32_e64 v[5:6], vcc_lo +// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_f64_u32_e64 v[5:6], vcc_hi +// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_f64_u32_e64 v[5:6], ttmp15 +// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_f64_u32_e64 v[5:6], m0 +// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_f64_u32_e64 v[5:6], exec_lo +// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_f64_u32_e64 v[5:6], exec_hi +// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_f64_u32_e64 v[5:6], null +// GFX11: encoding: [0x05,0x00,0x96,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_f64_u32_e64 v[5:6], -1 +// GFX11: encoding: [0x05,0x00,0x96,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_f64_u32_e64 v[5:6], 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x96,0xd5,0xf0,0x00,0x00,0x08] + +v_cvt_f64_u32_e64 v[5:6], src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0x96,0xd5,0xfd,0x00,0x00,0x10] + +v_cvt_f64_u32_e64 v[254:255], 0xaf123456 clamp div:2 +// GFX11: encoding: [0xfe,0x80,0x96,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] + +v_cvt_floor_i32_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_floor_i32_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_floor_i32_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_floor_i32_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_floor_i32_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_floor_i32_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_floor_i32_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_floor_i32_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_floor_i32_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_floor_i32_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_floor_i32_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_floor_i32_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_floor_i32_f32_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0xf0,0x00,0x00,0x00] + +v_cvt_floor_i32_f32_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0xfd,0x00,0x00,0x00] + +v_cvt_floor_i32_f32_e64 v255, -|0xaf123456| +// GFX11: encoding: [0xff,0x01,0x8d,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] + +v_cvt_flr_i32_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_flr_i32_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_flr_i32_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_flr_i32_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_flr_i32_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_flr_i32_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_flr_i32_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_flr_i32_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_flr_i32_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_flr_i32_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_flr_i32_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_flr_i32_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_flr_i32_f32_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0xf0,0x00,0x00,0x00] + +v_cvt_flr_i32_f32_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0x8d,0xd5,0xfd,0x00,0x00,0x00] + +v_cvt_flr_i32_f32_e64 v255, -|0xaf123456| +// GFX11: encoding: [0xff,0x01,0x8d,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] + +v_cvt_i16_f16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_i16_f16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_i16_f16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_i16_f16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_i16_f16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_i16_f16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_i16_f16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_i16_f16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_i16_f16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_i16_f16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_i16_f16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_i16_f16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_i16_f16_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0xf0,0x00,0x00,0x00] + +v_cvt_i16_f16_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0xd3,0xd5,0xfd,0x00,0x00,0x00] + +v_cvt_i16_f16_e64 v255, -|0xfe0b| clamp +// GFX11: encoding: [0xff,0x81,0xd3,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] + +v_cvt_i32_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_i32_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0x88,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_i32_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_i32_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_i32_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_i32_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_i32_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_i32_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_i32_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_i32_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_i32_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0x88,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_i32_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0x88,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_i32_f32_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0x88,0xd5,0xf0,0x00,0x00,0x00] + +v_cvt_i32_f32_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0x88,0xd5,0xfd,0x00,0x00,0x00] + +v_cvt_i32_f32_e64 v255, -|0xaf123456| clamp +// GFX11: encoding: [0xff,0x81,0x88,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] + +v_cvt_i32_f64_e64 v5, v[1:2] +// GFX11: encoding: [0x05,0x00,0x83,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_i32_f64_e64 v5, v[254:255] +// GFX11: encoding: [0x05,0x00,0x83,0xd5,0xfe,0x01,0x00,0x00] + +v_cvt_i32_f64_e64 v5, s[2:3] +// GFX11: encoding: [0x05,0x00,0x83,0xd5,0x02,0x00,0x00,0x00] + +v_cvt_i32_f64_e64 v5, s[104:105] +// GFX11: encoding: [0x05,0x00,0x83,0xd5,0x68,0x00,0x00,0x00] + +v_cvt_i32_f64_e64 v5, vcc +// GFX11: encoding: [0x05,0x00,0x83,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_i32_f64_e64 v5, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x83,0xd5,0x7a,0x00,0x00,0x00] + +v_cvt_i32_f64_e64 v5, exec +// GFX11: encoding: [0x05,0x00,0x83,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_i32_f64_e64 v5, null +// GFX11: encoding: [0x05,0x00,0x83,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_i32_f64_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0x83,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_i32_f64_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0x83,0xd5,0xf0,0x00,0x00,0x00] + +v_cvt_i32_f64_e64 v5, -|src_scc| +// GFX11: encoding: [0x05,0x01,0x83,0xd5,0xfd,0x00,0x00,0x20] + +v_cvt_i32_f64_e64 v255, 0xaf123456 clamp +// GFX11: encoding: [0xff,0x80,0x83,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_i32_i16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_i32_i16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xea,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_i32_i16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_i32_i16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_i32_i16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_i32_i16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_i32_i16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_i32_i16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_i32_i16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_i32_i16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_i32_i16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xea,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_i32_i16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xea,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_i32_i16_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xea,0xd5,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00] + +v_cvt_i32_i16_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0xea,0xd5,0xfd,0x00,0x00,0x00] + +v_cvt_i32_i16_e64 v255, 0xfe0b +// GFX11: encoding: [0xff,0x00,0xea,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_cvt_nearest_i32_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_nearest_i32_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_nearest_i32_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_nearest_i32_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_nearest_i32_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_nearest_i32_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_nearest_i32_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_nearest_i32_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_nearest_i32_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_nearest_i32_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_nearest_i32_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_nearest_i32_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_nearest_i32_f32_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0xf0,0x00,0x00,0x00] + +v_cvt_nearest_i32_f32_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0xfd,0x00,0x00,0x00] + +v_cvt_nearest_i32_f32_e64 v255, -|0xaf123456| +// GFX11: encoding: [0xff,0x01,0x8c,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] + +v_cvt_norm_i16_f16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_norm_i16_f16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_norm_i16_f16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_norm_i16_f16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_norm_i16_f16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_norm_i16_f16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_norm_i16_f16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_norm_i16_f16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_norm_i16_f16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_norm_i16_f16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_norm_i16_f16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_norm_i16_f16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_norm_i16_f16_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0xf0,0x00,0x00,0x00] + +v_cvt_norm_i16_f16_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0xe3,0xd5,0xfd,0x00,0x00,0x00] + +v_cvt_norm_i16_f16_e64 v255, -|0xfe0b| +// GFX11: encoding: [0xff,0x01,0xe3,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] + +v_cvt_norm_u16_f16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_norm_u16_f16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_norm_u16_f16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_norm_u16_f16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_norm_u16_f16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_norm_u16_f16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_norm_u16_f16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_norm_u16_f16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_norm_u16_f16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_norm_u16_f16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_norm_u16_f16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_norm_u16_f16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_norm_u16_f16_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0xf0,0x00,0x00,0x00] + +v_cvt_norm_u16_f16_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0xe4,0xd5,0xfd,0x00,0x00,0x00] + +v_cvt_norm_u16_f16_e64 v255, -|0xfe0b| +// GFX11: encoding: [0xff,0x01,0xe4,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] + +v_cvt_off_f32_i4_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_off_f32_i4_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_off_f32_i4_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_off_f32_i4_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_off_f32_i4_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_off_f32_i4_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_off_f32_i4_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_off_f32_i4_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_off_f32_i4_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_off_f32_i4_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_off_f32_i4_e64 v5, null +// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_off_f32_i4_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_off_f32_i4_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0xf0,0x00,0x00,0x08] + +v_cvt_off_f32_i4_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0x8e,0xd5,0xfd,0x00,0x00,0x10] + +v_cvt_off_f32_i4_e64 v255, 0x4f clamp div:2 +// GFX11: encoding: [0xff,0x80,0x8e,0xd5,0xff,0x00,0x00,0x18,0x4f,0x00,0x00,0x00] + +v_cvt_rpi_i32_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_rpi_i32_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_rpi_i32_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_rpi_i32_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_rpi_i32_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_rpi_i32_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_rpi_i32_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_rpi_i32_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_rpi_i32_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_rpi_i32_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_rpi_i32_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_rpi_i32_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_rpi_i32_f32_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0xf0,0x00,0x00,0x00] + +v_cvt_rpi_i32_f32_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0x8c,0xd5,0xfd,0x00,0x00,0x00] + +v_cvt_rpi_i32_f32_e64 v255, -|0xaf123456| +// GFX11: encoding: [0xff,0x01,0x8c,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] + +v_cvt_u16_f16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_u16_f16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_u16_f16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_u16_f16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_u16_f16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_u16_f16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_u16_f16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_u16_f16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_u16_f16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_u16_f16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_u16_f16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_u16_f16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_u16_f16_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0xf0,0x00,0x00,0x00] + +v_cvt_u16_f16_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0xd2,0xd5,0xfd,0x00,0x00,0x00] + +v_cvt_u16_f16_e64 v255, -|0xfe0b| clamp +// GFX11: encoding: [0xff,0x81,0xd2,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] + +v_cvt_u32_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_u32_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0x87,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_u32_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_u32_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_u32_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_u32_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_u32_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_u32_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_u32_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_u32_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_u32_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0x87,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_u32_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0x87,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_u32_f32_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0x87,0xd5,0xf0,0x00,0x00,0x00] + +v_cvt_u32_f32_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0x87,0xd5,0xfd,0x00,0x00,0x00] + +v_cvt_u32_f32_e64 v255, -|0xaf123456| clamp +// GFX11: encoding: [0xff,0x81,0x87,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] + +v_cvt_u32_f64_e64 v5, v[1:2] +// GFX11: encoding: [0x05,0x00,0x95,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_u32_f64_e64 v5, v[254:255] +// GFX11: encoding: [0x05,0x00,0x95,0xd5,0xfe,0x01,0x00,0x00] + +v_cvt_u32_f64_e64 v5, s[2:3] +// GFX11: encoding: [0x05,0x00,0x95,0xd5,0x02,0x00,0x00,0x00] + +v_cvt_u32_f64_e64 v5, s[104:105] +// GFX11: encoding: [0x05,0x00,0x95,0xd5,0x68,0x00,0x00,0x00] + +v_cvt_u32_f64_e64 v5, vcc +// GFX11: encoding: [0x05,0x00,0x95,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_u32_f64_e64 v5, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x95,0xd5,0x7a,0x00,0x00,0x00] + +v_cvt_u32_f64_e64 v5, exec +// GFX11: encoding: [0x05,0x00,0x95,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_u32_f64_e64 v5, null +// GFX11: encoding: [0x05,0x00,0x95,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_u32_f64_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0x95,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_u32_f64_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0x95,0xd5,0xf0,0x00,0x00,0x00] + +v_cvt_u32_f64_e64 v5, -|src_scc| +// GFX11: encoding: [0x05,0x01,0x95,0xd5,0xfd,0x00,0x00,0x20] + +v_cvt_u32_f64_e64 v255, 0xaf123456 clamp +// GFX11: encoding: [0xff,0x80,0x95,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_u32_u16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x01,0x01,0x00,0x00] + +v_cvt_u32_u16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0xff,0x01,0x00,0x00] + +v_cvt_u32_u16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x01,0x00,0x00,0x00] + +v_cvt_u32_u16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x69,0x00,0x00,0x00] + +v_cvt_u32_u16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x6a,0x00,0x00,0x00] + +v_cvt_u32_u16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x6b,0x00,0x00,0x00] + +v_cvt_u32_u16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x7b,0x00,0x00,0x00] + +v_cvt_u32_u16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x7d,0x00,0x00,0x00] + +v_cvt_u32_u16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x7e,0x00,0x00,0x00] + +v_cvt_u32_u16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x7f,0x00,0x00,0x00] + +v_cvt_u32_u16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0x7c,0x00,0x00,0x00] + +v_cvt_u32_u16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0xc1,0x00,0x00,0x00] + +v_cvt_u32_u16_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00] + +v_cvt_u32_u16_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0xeb,0xd5,0xfd,0x00,0x00,0x00] + +v_cvt_u32_u16_e64 v255, 0xfe0b +// GFX11: encoding: [0xff,0x00,0xeb,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_exp_f16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x01,0x01,0x00,0x00] + +v_exp_f16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0xff,0x01,0x00,0x00] + +v_exp_f16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x01,0x00,0x00,0x00] + +v_exp_f16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x69,0x00,0x00,0x00] + +v_exp_f16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x6a,0x00,0x00,0x00] + +v_exp_f16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x6b,0x00,0x00,0x00] + +v_exp_f16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x7b,0x00,0x00,0x00] + +v_exp_f16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x7d,0x00,0x00,0x00] + +v_exp_f16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x7e,0x00,0x00,0x00] + +v_exp_f16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x7f,0x00,0x00,0x00] + +v_exp_f16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0x7c,0x00,0x00,0x00] + +v_exp_f16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0xc1,0x00,0x00,0x00] + +v_exp_f16_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0xf0,0x00,0x00,0x08] + +v_exp_f16_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xd8,0xd5,0xfd,0x00,0x00,0x10] + +v_exp_f16_e64 v255, -|0xfe0b| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xd8,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] + +v_exp_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x01,0x01,0x00,0x00] + +v_exp_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0xff,0x01,0x00,0x00] + +v_exp_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x01,0x00,0x00,0x00] + +v_exp_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x69,0x00,0x00,0x00] + +v_exp_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x6a,0x00,0x00,0x00] + +v_exp_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x6b,0x00,0x00,0x00] + +v_exp_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x7b,0x00,0x00,0x00] + +v_exp_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x7d,0x00,0x00,0x00] + +v_exp_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x7e,0x00,0x00,0x00] + +v_exp_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x7f,0x00,0x00,0x00] + +v_exp_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0x7c,0x00,0x00,0x00] + +v_exp_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0xc1,0x00,0x00,0x00] + +v_exp_f32_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0xf0,0x00,0x00,0x08] + +v_exp_f32_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xa5,0xd5,0xfd,0x00,0x00,0x10] + +v_exp_f32_e64 v255, -|0xaf123456| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xa5,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] + +v_ffbh_i32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x01,0x01,0x00,0x00] + +v_ffbh_i32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0xff,0x01,0x00,0x00] + +v_ffbh_i32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x01,0x00,0x00,0x00] + +v_ffbh_i32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x69,0x00,0x00,0x00] + +v_ffbh_i32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x6a,0x00,0x00,0x00] + +v_ffbh_i32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x6b,0x00,0x00,0x00] + +v_ffbh_i32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7b,0x00,0x00,0x00] + +v_ffbh_i32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7d,0x00,0x00,0x00] + +v_ffbh_i32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7e,0x00,0x00,0x00] + +v_ffbh_i32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7f,0x00,0x00,0x00] + +v_ffbh_i32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0x7c,0x00,0x00,0x00] + +v_ffbh_i32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0xc1,0x00,0x00,0x00] + +v_ffbh_i32_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0xf0,0x00,0x00,0x00] + +v_ffbh_i32_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0xbb,0xd5,0xfd,0x00,0x00,0x00] + +v_ffbh_i32_e64 v255, 0xaf123456 +// GFX11: encoding: [0xff,0x00,0xbb,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_ffbh_u32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x01,0x01,0x00,0x00] + +v_ffbh_u32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0xff,0x01,0x00,0x00] + +v_ffbh_u32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x01,0x00,0x00,0x00] + +v_ffbh_u32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x69,0x00,0x00,0x00] + +v_ffbh_u32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x6a,0x00,0x00,0x00] + +v_ffbh_u32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x6b,0x00,0x00,0x00] + +v_ffbh_u32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7b,0x00,0x00,0x00] + +v_ffbh_u32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7d,0x00,0x00,0x00] + +v_ffbh_u32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7e,0x00,0x00,0x00] + +v_ffbh_u32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7f,0x00,0x00,0x00] + +v_ffbh_u32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0x7c,0x00,0x00,0x00] + +v_ffbh_u32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0xc1,0x00,0x00,0x00] + +v_ffbh_u32_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0xf0,0x00,0x00,0x00] + +v_ffbh_u32_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0xb9,0xd5,0xfd,0x00,0x00,0x00] + +v_ffbh_u32_e64 v255, 0xaf123456 +// GFX11: encoding: [0xff,0x00,0xb9,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_ffbl_b32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x01,0x01,0x00,0x00] + +v_ffbl_b32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0xff,0x01,0x00,0x00] + +v_ffbl_b32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x01,0x00,0x00,0x00] + +v_ffbl_b32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x69,0x00,0x00,0x00] + +v_ffbl_b32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x6a,0x00,0x00,0x00] + +v_ffbl_b32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x6b,0x00,0x00,0x00] + +v_ffbl_b32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7b,0x00,0x00,0x00] + +v_ffbl_b32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7d,0x00,0x00,0x00] + +v_ffbl_b32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7e,0x00,0x00,0x00] + +v_ffbl_b32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7f,0x00,0x00,0x00] + +v_ffbl_b32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0x7c,0x00,0x00,0x00] + +v_ffbl_b32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0xc1,0x00,0x00,0x00] + +v_ffbl_b32_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0xf0,0x00,0x00,0x00] + +v_ffbl_b32_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0xba,0xd5,0xfd,0x00,0x00,0x00] + +v_ffbl_b32_e64 v255, 0xaf123456 +// GFX11: encoding: [0xff,0x00,0xba,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_floor_f16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x01,0x01,0x00,0x00] + +v_floor_f16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0xff,0x01,0x00,0x00] + +v_floor_f16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x01,0x00,0x00,0x00] + +v_floor_f16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x69,0x00,0x00,0x00] + +v_floor_f16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x6a,0x00,0x00,0x00] + +v_floor_f16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x6b,0x00,0x00,0x00] + +v_floor_f16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x7b,0x00,0x00,0x00] + +v_floor_f16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x7d,0x00,0x00,0x00] + +v_floor_f16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x7e,0x00,0x00,0x00] + +v_floor_f16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x7f,0x00,0x00,0x00] + +v_floor_f16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0x7c,0x00,0x00,0x00] + +v_floor_f16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0xc1,0x00,0x00,0x00] + +v_floor_f16_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0xf0,0x00,0x00,0x08] + +v_floor_f16_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xdb,0xd5,0xfd,0x00,0x00,0x10] + +v_floor_f16_e64 v255, -|0xfe0b| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xdb,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] + +v_floor_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x01,0x01,0x00,0x00] + +v_floor_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0xff,0x01,0x00,0x00] + +v_floor_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x01,0x00,0x00,0x00] + +v_floor_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x69,0x00,0x00,0x00] + +v_floor_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x6a,0x00,0x00,0x00] + +v_floor_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x6b,0x00,0x00,0x00] + +v_floor_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x7b,0x00,0x00,0x00] + +v_floor_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x7d,0x00,0x00,0x00] + +v_floor_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x7e,0x00,0x00,0x00] + +v_floor_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x7f,0x00,0x00,0x00] + +v_floor_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0x7c,0x00,0x00,0x00] + +v_floor_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0xc1,0x00,0x00,0x00] + +v_floor_f32_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0xf0,0x00,0x00,0x08] + +v_floor_f32_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xa4,0xd5,0xfd,0x00,0x00,0x10] + +v_floor_f32_e64 v255, -|0xaf123456| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xa4,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] + +v_floor_f64_e64 v[5:6], v[1:2] +// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0x01,0x01,0x00,0x00] + +v_floor_f64_e64 v[5:6], v[254:255] +// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0xfe,0x01,0x00,0x00] + +v_floor_f64_e64 v[5:6], s[2:3] +// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0x02,0x00,0x00,0x00] + +v_floor_f64_e64 v[5:6], s[104:105] +// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0x68,0x00,0x00,0x00] + +v_floor_f64_e64 v[5:6], vcc +// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0x6a,0x00,0x00,0x00] + +v_floor_f64_e64 v[5:6], ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0x7a,0x00,0x00,0x00] + +v_floor_f64_e64 v[5:6], exec +// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0x7e,0x00,0x00,0x00] + +v_floor_f64_e64 v[5:6], null +// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0x7c,0x00,0x00,0x00] + +v_floor_f64_e64 v[5:6], -1 +// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0xc1,0x00,0x00,0x00] + +v_floor_f64_e64 v[5:6], 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x9a,0xd5,0xf0,0x00,0x00,0x08] + +v_floor_f64_e64 v[5:6], -|src_scc| mul:4 +// GFX11: encoding: [0x05,0x01,0x9a,0xd5,0xfd,0x00,0x00,0x30] + +v_floor_f64_e64 v[254:255], 0xaf123456 clamp div:2 +// GFX11: encoding: [0xfe,0x80,0x9a,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] + +v_fract_f16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x01,0x01,0x00,0x00] + +v_fract_f16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0xff,0x01,0x00,0x00] + +v_fract_f16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x01,0x00,0x00,0x00] + +v_fract_f16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x69,0x00,0x00,0x00] + +v_fract_f16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x6a,0x00,0x00,0x00] + +v_fract_f16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x6b,0x00,0x00,0x00] + +v_fract_f16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x7b,0x00,0x00,0x00] + +v_fract_f16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x7d,0x00,0x00,0x00] + +v_fract_f16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x7e,0x00,0x00,0x00] + +v_fract_f16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x7f,0x00,0x00,0x00] + +v_fract_f16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0x7c,0x00,0x00,0x00] + +v_fract_f16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0xc1,0x00,0x00,0x00] + +v_fract_f16_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0xf0,0x00,0x00,0x08] + +v_fract_f16_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xdf,0xd5,0xfd,0x00,0x00,0x10] + +v_fract_f16_e64 v255, -|0xfe0b| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xdf,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] + +v_fract_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x01,0x01,0x00,0x00] + +v_fract_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0xff,0x01,0x00,0x00] + +v_fract_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x01,0x00,0x00,0x00] + +v_fract_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x69,0x00,0x00,0x00] + +v_fract_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x6a,0x00,0x00,0x00] + +v_fract_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x6b,0x00,0x00,0x00] + +v_fract_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x7b,0x00,0x00,0x00] + +v_fract_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x7d,0x00,0x00,0x00] + +v_fract_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x7e,0x00,0x00,0x00] + +v_fract_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x7f,0x00,0x00,0x00] + +v_fract_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0x7c,0x00,0x00,0x00] + +v_fract_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0xc1,0x00,0x00,0x00] + +v_fract_f32_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0xf0,0x00,0x00,0x08] + +v_fract_f32_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xa0,0xd5,0xfd,0x00,0x00,0x10] + +v_fract_f32_e64 v255, -|0xaf123456| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xa0,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] + +v_fract_f64_e64 v[5:6], v[1:2] +// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0x01,0x01,0x00,0x00] + +v_fract_f64_e64 v[5:6], v[254:255] +// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0xfe,0x01,0x00,0x00] + +v_fract_f64_e64 v[5:6], s[2:3] +// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0x02,0x00,0x00,0x00] + +v_fract_f64_e64 v[5:6], s[104:105] +// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0x68,0x00,0x00,0x00] + +v_fract_f64_e64 v[5:6], vcc +// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0x6a,0x00,0x00,0x00] + +v_fract_f64_e64 v[5:6], ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0x7a,0x00,0x00,0x00] + +v_fract_f64_e64 v[5:6], exec +// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0x7e,0x00,0x00,0x00] + +v_fract_f64_e64 v[5:6], null +// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0x7c,0x00,0x00,0x00] + +v_fract_f64_e64 v[5:6], -1 +// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0xc1,0x00,0x00,0x00] + +v_fract_f64_e64 v[5:6], 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xbe,0xd5,0xf0,0x00,0x00,0x08] + +v_fract_f64_e64 v[5:6], -|src_scc| mul:4 +// GFX11: encoding: [0x05,0x01,0xbe,0xd5,0xfd,0x00,0x00,0x30] + +v_fract_f64_e64 v[254:255], 0xaf123456 clamp div:2 +// GFX11: encoding: [0xfe,0x80,0xbe,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] + +v_frexp_exp_i16_f16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x01,0x01,0x00,0x00] + +v_frexp_exp_i16_f16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xda,0xd5,0xff,0x01,0x00,0x00] + +v_frexp_exp_i16_f16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x01,0x00,0x00,0x00] + +v_frexp_exp_i16_f16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x69,0x00,0x00,0x00] + +v_frexp_exp_i16_f16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x6a,0x00,0x00,0x00] + +v_frexp_exp_i16_f16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x6b,0x00,0x00,0x00] + +v_frexp_exp_i16_f16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x7b,0x00,0x00,0x00] + +v_frexp_exp_i16_f16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x7d,0x00,0x00,0x00] + +v_frexp_exp_i16_f16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x7e,0x00,0x00,0x00] + +v_frexp_exp_i16_f16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x7f,0x00,0x00,0x00] + +v_frexp_exp_i16_f16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xda,0xd5,0x7c,0x00,0x00,0x00] + +v_frexp_exp_i16_f16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xda,0xd5,0xc1,0x00,0x00,0x00] + +v_frexp_exp_i16_f16_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xda,0xd5,0xf0,0x00,0x00,0x00] + +v_frexp_exp_i16_f16_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0xda,0xd5,0xfd,0x00,0x00,0x00] + +v_frexp_exp_i16_f16_e64 v255, -|0xfe0b| +// GFX11: encoding: [0xff,0x01,0xda,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] + +v_frexp_exp_i32_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x01,0x01,0x00,0x00] + +v_frexp_exp_i32_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0xff,0x01,0x00,0x00] + +v_frexp_exp_i32_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x01,0x00,0x00,0x00] + +v_frexp_exp_i32_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x69,0x00,0x00,0x00] + +v_frexp_exp_i32_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x6a,0x00,0x00,0x00] + +v_frexp_exp_i32_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x6b,0x00,0x00,0x00] + +v_frexp_exp_i32_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x7b,0x00,0x00,0x00] + +v_frexp_exp_i32_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x7d,0x00,0x00,0x00] + +v_frexp_exp_i32_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x7e,0x00,0x00,0x00] + +v_frexp_exp_i32_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x7f,0x00,0x00,0x00] + +v_frexp_exp_i32_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0x7c,0x00,0x00,0x00] + +v_frexp_exp_i32_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0xc1,0x00,0x00,0x00] + +v_frexp_exp_i32_f32_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0xf0,0x00,0x00,0x00] + +v_frexp_exp_i32_f32_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0xbf,0xd5,0xfd,0x00,0x00,0x00] + +v_frexp_exp_i32_f32_e64 v255, -|0xaf123456| +// GFX11: encoding: [0xff,0x01,0xbf,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] + +v_frexp_exp_i32_f64_e64 v5, v[1:2] +// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0x01,0x01,0x00,0x00] + +v_frexp_exp_i32_f64_e64 v5, v[254:255] +// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0xfe,0x01,0x00,0x00] + +v_frexp_exp_i32_f64_e64 v5, s[2:3] +// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0x02,0x00,0x00,0x00] + +v_frexp_exp_i32_f64_e64 v5, s[104:105] +// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0x68,0x00,0x00,0x00] + +v_frexp_exp_i32_f64_e64 v5, vcc +// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0x6a,0x00,0x00,0x00] + +v_frexp_exp_i32_f64_e64 v5, ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0x7a,0x00,0x00,0x00] + +v_frexp_exp_i32_f64_e64 v5, exec +// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0x7e,0x00,0x00,0x00] + +v_frexp_exp_i32_f64_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0x7c,0x00,0x00,0x00] + +v_frexp_exp_i32_f64_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0xc1,0x00,0x00,0x00] + +v_frexp_exp_i32_f64_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xbc,0xd5,0xf0,0x00,0x00,0x00] + +v_frexp_exp_i32_f64_e64 v5, -|src_scc| +// GFX11: encoding: [0x05,0x01,0xbc,0xd5,0xfd,0x00,0x00,0x20] + +v_frexp_exp_i32_f64_e64 v255, 0xaf123456 +// GFX11: encoding: [0xff,0x00,0xbc,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_frexp_mant_f16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x01,0x01,0x00,0x00] + +v_frexp_mant_f16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0xff,0x01,0x00,0x00] + +v_frexp_mant_f16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x01,0x00,0x00,0x00] + +v_frexp_mant_f16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x69,0x00,0x00,0x00] + +v_frexp_mant_f16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x6a,0x00,0x00,0x00] + +v_frexp_mant_f16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x6b,0x00,0x00,0x00] + +v_frexp_mant_f16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x7b,0x00,0x00,0x00] + +v_frexp_mant_f16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x7d,0x00,0x00,0x00] + +v_frexp_mant_f16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x7e,0x00,0x00,0x00] + +v_frexp_mant_f16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x7f,0x00,0x00,0x00] + +v_frexp_mant_f16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0x7c,0x00,0x00,0x00] + +v_frexp_mant_f16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0xc1,0x00,0x00,0x00] + +v_frexp_mant_f16_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0xf0,0x00,0x00,0x08] + +v_frexp_mant_f16_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xd9,0xd5,0xfd,0x00,0x00,0x10] + +v_frexp_mant_f16_e64 v255, -|0xfe0b| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xd9,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] + +v_frexp_mant_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x01,0x01,0x00,0x00] + +v_frexp_mant_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0xff,0x01,0x00,0x00] + +v_frexp_mant_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x01,0x00,0x00,0x00] + +v_frexp_mant_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x69,0x00,0x00,0x00] + +v_frexp_mant_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x6a,0x00,0x00,0x00] + +v_frexp_mant_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x6b,0x00,0x00,0x00] + +v_frexp_mant_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x7b,0x00,0x00,0x00] + +v_frexp_mant_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x7d,0x00,0x00,0x00] + +v_frexp_mant_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x7e,0x00,0x00,0x00] + +v_frexp_mant_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x7f,0x00,0x00,0x00] + +v_frexp_mant_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0x7c,0x00,0x00,0x00] + +v_frexp_mant_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0xc1,0x00,0x00,0x00] + +v_frexp_mant_f32_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0xf0,0x00,0x00,0x08] + +v_frexp_mant_f32_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xc0,0xd5,0xfd,0x00,0x00,0x10] + +v_frexp_mant_f32_e64 v255, -|0xaf123456| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xc0,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] + +v_frexp_mant_f64_e64 v[5:6], v[1:2] +// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0x01,0x01,0x00,0x00] + +v_frexp_mant_f64_e64 v[5:6], v[254:255] +// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0xfe,0x01,0x00,0x00] + +v_frexp_mant_f64_e64 v[5:6], s[2:3] +// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0x02,0x00,0x00,0x00] + +v_frexp_mant_f64_e64 v[5:6], s[104:105] +// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0x68,0x00,0x00,0x00] + +v_frexp_mant_f64_e64 v[5:6], vcc +// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0x6a,0x00,0x00,0x00] + +v_frexp_mant_f64_e64 v[5:6], ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0x7a,0x00,0x00,0x00] + +v_frexp_mant_f64_e64 v[5:6], exec +// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0x7e,0x00,0x00,0x00] + +v_frexp_mant_f64_e64 v[5:6], null +// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0x7c,0x00,0x00,0x00] + +v_frexp_mant_f64_e64 v[5:6], -1 +// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0xc1,0x00,0x00,0x00] + +v_frexp_mant_f64_e64 v[5:6], 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xbd,0xd5,0xf0,0x00,0x00,0x08] + +v_frexp_mant_f64_e64 v[5:6], -|src_scc| mul:4 +// GFX11: encoding: [0x05,0x01,0xbd,0xd5,0xfd,0x00,0x00,0x30] + +v_frexp_mant_f64_e64 v[254:255], 0xaf123456 clamp div:2 +// GFX11: encoding: [0xfe,0x80,0xbd,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] + +v_log_f16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x01,0x01,0x00,0x00] + +v_log_f16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0xff,0x01,0x00,0x00] + +v_log_f16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x01,0x00,0x00,0x00] + +v_log_f16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x69,0x00,0x00,0x00] + +v_log_f16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x6a,0x00,0x00,0x00] + +v_log_f16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x6b,0x00,0x00,0x00] + +v_log_f16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x7b,0x00,0x00,0x00] + +v_log_f16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x7d,0x00,0x00,0x00] + +v_log_f16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x7e,0x00,0x00,0x00] + +v_log_f16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x7f,0x00,0x00,0x00] + +v_log_f16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0x7c,0x00,0x00,0x00] + +v_log_f16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0xc1,0x00,0x00,0x00] + +v_log_f16_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0xf0,0x00,0x00,0x08] + +v_log_f16_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xd7,0xd5,0xfd,0x00,0x00,0x10] + +v_log_f16_e64 v255, -|0xfe0b| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xd7,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] + +v_log_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x01,0x01,0x00,0x00] + +v_log_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0xff,0x01,0x00,0x00] + +v_log_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x01,0x00,0x00,0x00] + +v_log_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x69,0x00,0x00,0x00] + +v_log_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x6a,0x00,0x00,0x00] + +v_log_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x6b,0x00,0x00,0x00] + +v_log_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x7b,0x00,0x00,0x00] + +v_log_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x7d,0x00,0x00,0x00] + +v_log_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x7e,0x00,0x00,0x00] + +v_log_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x7f,0x00,0x00,0x00] + +v_log_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0x7c,0x00,0x00,0x00] + +v_log_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0xc1,0x00,0x00,0x00] + +v_log_f32_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0xf0,0x00,0x00,0x08] + +v_log_f32_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xa7,0xd5,0xfd,0x00,0x00,0x10] + +v_log_f32_e64 v255, -|0xaf123456| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xa7,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] + +v_mov_b32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x00] + +v_mov_b32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0x81,0xd5,0xff,0x01,0x00,0x00] + +v_mov_b32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x01,0x00,0x00,0x00] + +v_mov_b32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x69,0x00,0x00,0x00] + +v_mov_b32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x6a,0x00,0x00,0x00] + +v_mov_b32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x6b,0x00,0x00,0x00] + +v_mov_b32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x7b,0x00,0x00,0x00] + +v_mov_b32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x7d,0x00,0x00,0x00] + +v_mov_b32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x7e,0x00,0x00,0x00] + +v_mov_b32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x7f,0x00,0x00,0x00] + +v_mov_b32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0x81,0xd5,0x7c,0x00,0x00,0x00] + +v_mov_b32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0x81,0xd5,0xc1,0x00,0x00,0x00] + +v_mov_b32_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0x81,0xd5,0xf0,0x00,0x00,0x00] + +v_mov_b32_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0x81,0xd5,0xfd,0x00,0x00,0x00] + +v_mov_b32_e64 v255, 0xaf123456 +// GFX11: encoding: [0xff,0x00,0x81,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_movreld_b32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00] + +v_movreld_b32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0xff,0x01,0x00,0x00] + +v_movreld_b32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x01,0x00,0x00,0x00] + +v_movreld_b32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x69,0x00,0x00,0x00] + +v_movreld_b32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x6a,0x00,0x00,0x00] + +v_movreld_b32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x6b,0x00,0x00,0x00] + +v_movreld_b32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x7b,0x00,0x00,0x00] + +v_movreld_b32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x7d,0x00,0x00,0x00] + +v_movreld_b32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x7e,0x00,0x00,0x00] + +v_movreld_b32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x7f,0x00,0x00,0x00] + +v_movreld_b32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0x7c,0x00,0x00,0x00] + +v_movreld_b32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0xc1,0x00,0x00,0x00] + +v_movreld_b32_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0xf0,0x00,0x00,0x00] + +v_movreld_b32_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0xc2,0xd5,0xfd,0x00,0x00,0x00] + +v_movreld_b32_e64 v255, 0xaf123456 +// GFX11: encoding: [0xff,0x00,0xc2,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_movrels_b32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x00] + +v_movrels_b32_e64 v255, v255 +// GFX11: encoding: [0xff,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00] + +v_movrelsd_2_b32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x00] + +v_movrelsd_2_b32_e64 v255, v255 +// GFX11: encoding: [0xff,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00] + +v_movrelsd_b32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x00] + +v_movrelsd_b32_e64 v255, v255 +// GFX11: encoding: [0xff,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00] + +v_nop_e64 +// GFX11: encoding: [0x00,0x00,0x80,0xd5,0x00,0x00,0x00,0x00] + +v_not_b16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x01,0x01,0x00,0x00] + +v_not_b16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0xff,0x01,0x00,0x00] + +v_not_b16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x01,0x00,0x00,0x00] + +v_not_b16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x69,0x00,0x00,0x00] + +v_not_b16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x6a,0x00,0x00,0x00] + +v_not_b16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x6b,0x00,0x00,0x00] + +v_not_b16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x7b,0x00,0x00,0x00] + +v_not_b16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x7d,0x00,0x00,0x00] + +v_not_b16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x7e,0x00,0x00,0x00] + +v_not_b16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x7f,0x00,0x00,0x00] + +v_not_b16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0x7c,0x00,0x00,0x00] + +v_not_b16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0xc1,0x00,0x00,0x00] + +v_not_b16_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0xff,0x00,0x00,0x00,0x00,0x38,0x00,0x00] + +v_not_b16_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0xe9,0xd5,0xfd,0x00,0x00,0x00] + +v_not_b16_e64 v255, 0xfe0b +// GFX11: encoding: [0xff,0x00,0xe9,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_not_b32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x01,0x01,0x00,0x00] + +v_not_b32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0xff,0x01,0x00,0x00] + +v_not_b32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x01,0x00,0x00,0x00] + +v_not_b32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x69,0x00,0x00,0x00] + +v_not_b32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x6a,0x00,0x00,0x00] + +v_not_b32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x6b,0x00,0x00,0x00] + +v_not_b32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x7b,0x00,0x00,0x00] + +v_not_b32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x7d,0x00,0x00,0x00] + +v_not_b32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x7e,0x00,0x00,0x00] + +v_not_b32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x7f,0x00,0x00,0x00] + +v_not_b32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0x7c,0x00,0x00,0x00] + +v_not_b32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0xc1,0x00,0x00,0x00] + +v_not_b32_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0xf0,0x00,0x00,0x00] + +v_not_b32_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0xb7,0xd5,0xfd,0x00,0x00,0x00] + +v_not_b32_e64 v255, 0xaf123456 +// GFX11: encoding: [0xff,0x00,0xb7,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_pipeflush_e64 +// GFX11: encoding: [0x00,0x00,0x9b,0xd5,0x00,0x00,0x00,0x00] + +v_rcp_f16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x01,0x01,0x00,0x00] + +v_rcp_f16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0xff,0x01,0x00,0x00] + +v_rcp_f16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x01,0x00,0x00,0x00] + +v_rcp_f16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x69,0x00,0x00,0x00] + +v_rcp_f16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x6a,0x00,0x00,0x00] + +v_rcp_f16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x6b,0x00,0x00,0x00] + +v_rcp_f16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x7b,0x00,0x00,0x00] + +v_rcp_f16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x7d,0x00,0x00,0x00] + +v_rcp_f16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x7e,0x00,0x00,0x00] + +v_rcp_f16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x7f,0x00,0x00,0x00] + +v_rcp_f16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0x7c,0x00,0x00,0x00] + +v_rcp_f16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0xc1,0x00,0x00,0x00] + +v_rcp_f16_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0xf0,0x00,0x00,0x08] + +v_rcp_f16_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xd4,0xd5,0xfd,0x00,0x00,0x10] + +v_rcp_f16_e64 v255, -|0xfe0b| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xd4,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] + +v_rcp_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x01,0x01,0x00,0x00] + +v_rcp_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0xff,0x01,0x00,0x00] + +v_rcp_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x01,0x00,0x00,0x00] + +v_rcp_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x69,0x00,0x00,0x00] + +v_rcp_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x6a,0x00,0x00,0x00] + +v_rcp_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x6b,0x00,0x00,0x00] + +v_rcp_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x7b,0x00,0x00,0x00] + +v_rcp_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x7d,0x00,0x00,0x00] + +v_rcp_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x7e,0x00,0x00,0x00] + +v_rcp_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x7f,0x00,0x00,0x00] + +v_rcp_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0x7c,0x00,0x00,0x00] + +v_rcp_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0xc1,0x00,0x00,0x00] + +v_rcp_f32_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0xf0,0x00,0x00,0x08] + +v_rcp_f32_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xaa,0xd5,0xfd,0x00,0x00,0x10] + +v_rcp_f32_e64 v255, -|0xaf123456| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xaa,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] + +v_rcp_f64_e64 v[5:6], v[1:2] +// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0x01,0x01,0x00,0x00] + +v_rcp_f64_e64 v[5:6], v[254:255] +// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0xfe,0x01,0x00,0x00] + +v_rcp_f64_e64 v[5:6], s[2:3] +// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0x02,0x00,0x00,0x00] + +v_rcp_f64_e64 v[5:6], s[104:105] +// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0x68,0x00,0x00,0x00] + +v_rcp_f64_e64 v[5:6], vcc +// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0x6a,0x00,0x00,0x00] + +v_rcp_f64_e64 v[5:6], ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0x7a,0x00,0x00,0x00] + +v_rcp_f64_e64 v[5:6], exec +// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0x7e,0x00,0x00,0x00] + +v_rcp_f64_e64 v[5:6], null +// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0x7c,0x00,0x00,0x00] + +v_rcp_f64_e64 v[5:6], -1 +// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0xc1,0x00,0x00,0x00] + +v_rcp_f64_e64 v[5:6], 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xaf,0xd5,0xf0,0x00,0x00,0x08] + +v_rcp_f64_e64 v[5:6], -|src_scc| mul:4 +// GFX11: encoding: [0x05,0x01,0xaf,0xd5,0xfd,0x00,0x00,0x30] + +v_rcp_f64_e64 v[254:255], 0xaf123456 clamp div:2 +// GFX11: encoding: [0xfe,0x80,0xaf,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] + +v_rcp_iflag_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x01,0x01,0x00,0x00] + +v_rcp_iflag_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xab,0xd5,0xff,0x01,0x00,0x00] + +v_rcp_iflag_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x01,0x00,0x00,0x00] + +v_rcp_iflag_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x69,0x00,0x00,0x00] + +v_rcp_iflag_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x6a,0x00,0x00,0x00] + +v_rcp_iflag_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x6b,0x00,0x00,0x00] + +v_rcp_iflag_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x7b,0x00,0x00,0x00] + +v_rcp_iflag_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x7d,0x00,0x00,0x00] + +v_rcp_iflag_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x7e,0x00,0x00,0x00] + +v_rcp_iflag_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x7f,0x00,0x00,0x00] + +v_rcp_iflag_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xab,0xd5,0x7c,0x00,0x00,0x00] + +v_rcp_iflag_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xab,0xd5,0xc1,0x00,0x00,0x00] + +v_rcp_iflag_f32_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xab,0xd5,0xf0,0x00,0x00,0x08] + +v_rcp_iflag_f32_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xab,0xd5,0xfd,0x00,0x00,0x10] + +v_rcp_iflag_f32_e64 v255, -|0xaf123456| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xab,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] + +v_rndne_f16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x01,0x01,0x00,0x00] + +v_rndne_f16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xde,0xd5,0xff,0x01,0x00,0x00] + +v_rndne_f16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x01,0x00,0x00,0x00] + +v_rndne_f16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x69,0x00,0x00,0x00] + +v_rndne_f16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x6a,0x00,0x00,0x00] + +v_rndne_f16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x6b,0x00,0x00,0x00] + +v_rndne_f16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x7b,0x00,0x00,0x00] + +v_rndne_f16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x7d,0x00,0x00,0x00] + +v_rndne_f16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x7e,0x00,0x00,0x00] + +v_rndne_f16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x7f,0x00,0x00,0x00] + +v_rndne_f16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xde,0xd5,0x7c,0x00,0x00,0x00] + +v_rndne_f16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xde,0xd5,0xc1,0x00,0x00,0x00] + +v_rndne_f16_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xde,0xd5,0xf0,0x00,0x00,0x08] + +v_rndne_f16_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xde,0xd5,0xfd,0x00,0x00,0x10] + +v_rndne_f16_e64 v255, -|0xfe0b| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xde,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] + +v_rndne_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x01,0x01,0x00,0x00] + +v_rndne_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0xff,0x01,0x00,0x00] + +v_rndne_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x01,0x00,0x00,0x00] + +v_rndne_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x69,0x00,0x00,0x00] + +v_rndne_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x6a,0x00,0x00,0x00] + +v_rndne_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x6b,0x00,0x00,0x00] + +v_rndne_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x7b,0x00,0x00,0x00] + +v_rndne_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x7d,0x00,0x00,0x00] + +v_rndne_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x7e,0x00,0x00,0x00] + +v_rndne_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x7f,0x00,0x00,0x00] + +v_rndne_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0x7c,0x00,0x00,0x00] + +v_rndne_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0xc1,0x00,0x00,0x00] + +v_rndne_f32_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0xf0,0x00,0x00,0x08] + +v_rndne_f32_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xa3,0xd5,0xfd,0x00,0x00,0x10] + +v_rndne_f32_e64 v255, -|0xaf123456| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xa3,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] + +v_rndne_f64_e64 v[5:6], v[1:2] +// GFX11: encoding: [0x05,0x00,0x99,0xd5,0x01,0x01,0x00,0x00] + +v_rndne_f64_e64 v[5:6], v[254:255] +// GFX11: encoding: [0x05,0x00,0x99,0xd5,0xfe,0x01,0x00,0x00] + +v_rndne_f64_e64 v[5:6], s[2:3] +// GFX11: encoding: [0x05,0x00,0x99,0xd5,0x02,0x00,0x00,0x00] + +v_rndne_f64_e64 v[5:6], s[104:105] +// GFX11: encoding: [0x05,0x00,0x99,0xd5,0x68,0x00,0x00,0x00] + +v_rndne_f64_e64 v[5:6], vcc +// GFX11: encoding: [0x05,0x00,0x99,0xd5,0x6a,0x00,0x00,0x00] + +v_rndne_f64_e64 v[5:6], ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x99,0xd5,0x7a,0x00,0x00,0x00] + +v_rndne_f64_e64 v[5:6], exec +// GFX11: encoding: [0x05,0x00,0x99,0xd5,0x7e,0x00,0x00,0x00] + +v_rndne_f64_e64 v[5:6], null +// GFX11: encoding: [0x05,0x00,0x99,0xd5,0x7c,0x00,0x00,0x00] + +v_rndne_f64_e64 v[5:6], -1 +// GFX11: encoding: [0x05,0x00,0x99,0xd5,0xc1,0x00,0x00,0x00] + +v_rndne_f64_e64 v[5:6], 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x99,0xd5,0xf0,0x00,0x00,0x08] + +v_rndne_f64_e64 v[5:6], -|src_scc| mul:4 +// GFX11: encoding: [0x05,0x01,0x99,0xd5,0xfd,0x00,0x00,0x30] + +v_rndne_f64_e64 v[254:255], 0xaf123456 clamp div:2 +// GFX11: encoding: [0xfe,0x80,0x99,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] + +v_rsq_f16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x01,0x01,0x00,0x00] + +v_rsq_f16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0xff,0x01,0x00,0x00] + +v_rsq_f16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x01,0x00,0x00,0x00] + +v_rsq_f16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x69,0x00,0x00,0x00] + +v_rsq_f16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x6a,0x00,0x00,0x00] + +v_rsq_f16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x6b,0x00,0x00,0x00] + +v_rsq_f16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x7b,0x00,0x00,0x00] + +v_rsq_f16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x7d,0x00,0x00,0x00] + +v_rsq_f16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x7e,0x00,0x00,0x00] + +v_rsq_f16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x7f,0x00,0x00,0x00] + +v_rsq_f16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0x7c,0x00,0x00,0x00] + +v_rsq_f16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0xc1,0x00,0x00,0x00] + +v_rsq_f16_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0xf0,0x00,0x00,0x08] + +v_rsq_f16_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xd6,0xd5,0xfd,0x00,0x00,0x10] + +v_rsq_f16_e64 v255, -|0xfe0b| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xd6,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] + +v_rsq_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x01,0x01,0x00,0x00] + +v_rsq_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xae,0xd5,0xff,0x01,0x00,0x00] + +v_rsq_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x01,0x00,0x00,0x00] + +v_rsq_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x69,0x00,0x00,0x00] + +v_rsq_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x6a,0x00,0x00,0x00] + +v_rsq_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x6b,0x00,0x00,0x00] + +v_rsq_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x7b,0x00,0x00,0x00] + +v_rsq_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x7d,0x00,0x00,0x00] + +v_rsq_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x7e,0x00,0x00,0x00] + +v_rsq_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x7f,0x00,0x00,0x00] + +v_rsq_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xae,0xd5,0x7c,0x00,0x00,0x00] + +v_rsq_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xae,0xd5,0xc1,0x00,0x00,0x00] + +v_rsq_f32_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xae,0xd5,0xf0,0x00,0x00,0x08] + +v_rsq_f32_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xae,0xd5,0xfd,0x00,0x00,0x10] + +v_rsq_f32_e64 v255, -|0xaf123456| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xae,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] + +v_rsq_f64_e64 v[5:6], v[1:2] +// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0x01,0x01,0x00,0x00] + +v_rsq_f64_e64 v[5:6], v[254:255] +// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0xfe,0x01,0x00,0x00] + +v_rsq_f64_e64 v[5:6], s[2:3] +// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0x02,0x00,0x00,0x00] + +v_rsq_f64_e64 v[5:6], s[104:105] +// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0x68,0x00,0x00,0x00] + +v_rsq_f64_e64 v[5:6], vcc +// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0x6a,0x00,0x00,0x00] + +v_rsq_f64_e64 v[5:6], ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0x7a,0x00,0x00,0x00] + +v_rsq_f64_e64 v[5:6], exec +// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0x7e,0x00,0x00,0x00] + +v_rsq_f64_e64 v[5:6], null +// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0x7c,0x00,0x00,0x00] + +v_rsq_f64_e64 v[5:6], -1 +// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0xc1,0x00,0x00,0x00] + +v_rsq_f64_e64 v[5:6], 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xb1,0xd5,0xf0,0x00,0x00,0x08] + +v_rsq_f64_e64 v[5:6], -|src_scc| mul:4 +// GFX11: encoding: [0x05,0x01,0xb1,0xd5,0xfd,0x00,0x00,0x30] + +v_rsq_f64_e64 v[254:255], 0xaf123456 clamp div:2 +// GFX11: encoding: [0xfe,0x80,0xb1,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] + +v_sat_pk_u8_i16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x01,0x01,0x00,0x00] + +v_sat_pk_u8_i16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0xff,0x01,0x00,0x00] + +v_sat_pk_u8_i16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x01,0x00,0x00,0x00] + +v_sat_pk_u8_i16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x69,0x00,0x00,0x00] + +v_sat_pk_u8_i16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x6a,0x00,0x00,0x00] + +v_sat_pk_u8_i16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x6b,0x00,0x00,0x00] + +v_sat_pk_u8_i16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x7b,0x00,0x00,0x00] + +v_sat_pk_u8_i16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x7d,0x00,0x00,0x00] + +v_sat_pk_u8_i16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x7e,0x00,0x00,0x00] + +v_sat_pk_u8_i16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x7f,0x00,0x00,0x00] + +v_sat_pk_u8_i16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0x7c,0x00,0x00,0x00] + +v_sat_pk_u8_i16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0xc1,0x00,0x00,0x00] + +v_sat_pk_u8_i16_e64 v5, 0.5 +// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0xf0,0x00,0x00,0x00] + +v_sat_pk_u8_i16_e64 v5, src_scc +// GFX11: encoding: [0x05,0x00,0xe2,0xd5,0xfd,0x00,0x00,0x00] + +v_sat_pk_u8_i16_e64 v255, 0xfe0b +// GFX11: encoding: [0xff,0x00,0xe2,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_sin_f16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x01,0x01,0x00,0x00] + +v_sin_f16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0xff,0x01,0x00,0x00] + +v_sin_f16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x01,0x00,0x00,0x00] + +v_sin_f16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x69,0x00,0x00,0x00] + +v_sin_f16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x6a,0x00,0x00,0x00] + +v_sin_f16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x6b,0x00,0x00,0x00] + +v_sin_f16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x7b,0x00,0x00,0x00] + +v_sin_f16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x7d,0x00,0x00,0x00] + +v_sin_f16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x7e,0x00,0x00,0x00] + +v_sin_f16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x7f,0x00,0x00,0x00] + +v_sin_f16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0x7c,0x00,0x00,0x00] + +v_sin_f16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0xc1,0x00,0x00,0x00] + +v_sin_f16_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0xf0,0x00,0x00,0x08] + +v_sin_f16_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xe0,0xd5,0xfd,0x00,0x00,0x10] + +v_sin_f16_e64 v255, -|0xfe0b| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xe0,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] + +v_sin_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x01,0x01,0x00,0x00] + +v_sin_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0xff,0x01,0x00,0x00] + +v_sin_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x01,0x00,0x00,0x00] + +v_sin_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x69,0x00,0x00,0x00] + +v_sin_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x6a,0x00,0x00,0x00] + +v_sin_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x6b,0x00,0x00,0x00] + +v_sin_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x7b,0x00,0x00,0x00] + +v_sin_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x7d,0x00,0x00,0x00] + +v_sin_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x7e,0x00,0x00,0x00] + +v_sin_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x7f,0x00,0x00,0x00] + +v_sin_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0x7c,0x00,0x00,0x00] + +v_sin_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0xc1,0x00,0x00,0x00] + +v_sin_f32_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0xf0,0x00,0x00,0x08] + +v_sin_f32_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xb5,0xd5,0xfd,0x00,0x00,0x10] + +v_sin_f32_e64 v255, -|0xaf123456| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xb5,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] + +v_sqrt_f16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x01,0x01,0x00,0x00] + +v_sqrt_f16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0xff,0x01,0x00,0x00] + +v_sqrt_f16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x01,0x00,0x00,0x00] + +v_sqrt_f16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x69,0x00,0x00,0x00] + +v_sqrt_f16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x6a,0x00,0x00,0x00] + +v_sqrt_f16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x6b,0x00,0x00,0x00] + +v_sqrt_f16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x7b,0x00,0x00,0x00] + +v_sqrt_f16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x7d,0x00,0x00,0x00] + +v_sqrt_f16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x7e,0x00,0x00,0x00] + +v_sqrt_f16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x7f,0x00,0x00,0x00] + +v_sqrt_f16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0x7c,0x00,0x00,0x00] + +v_sqrt_f16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0xc1,0x00,0x00,0x00] + +v_sqrt_f16_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0xf0,0x00,0x00,0x08] + +v_sqrt_f16_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xd5,0xd5,0xfd,0x00,0x00,0x10] + +v_sqrt_f16_e64 v255, -|0xfe0b| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xd5,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] + +v_sqrt_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x01,0x01,0x00,0x00] + +v_sqrt_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0xff,0x01,0x00,0x00] + +v_sqrt_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x01,0x00,0x00,0x00] + +v_sqrt_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x69,0x00,0x00,0x00] + +v_sqrt_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x6a,0x00,0x00,0x00] + +v_sqrt_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x6b,0x00,0x00,0x00] + +v_sqrt_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x7b,0x00,0x00,0x00] + +v_sqrt_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x7d,0x00,0x00,0x00] + +v_sqrt_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x7e,0x00,0x00,0x00] + +v_sqrt_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x7f,0x00,0x00,0x00] + +v_sqrt_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0x7c,0x00,0x00,0x00] + +v_sqrt_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0xc1,0x00,0x00,0x00] + +v_sqrt_f32_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0xf0,0x00,0x00,0x08] + +v_sqrt_f32_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xb3,0xd5,0xfd,0x00,0x00,0x10] + +v_sqrt_f32_e64 v255, -|0xaf123456| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xb3,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] + +v_sqrt_f64_e64 v[5:6], v[1:2] +// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0x01,0x01,0x00,0x00] + +v_sqrt_f64_e64 v[5:6], v[254:255] +// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0xfe,0x01,0x00,0x00] + +v_sqrt_f64_e64 v[5:6], s[2:3] +// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0x02,0x00,0x00,0x00] + +v_sqrt_f64_e64 v[5:6], s[104:105] +// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0x68,0x00,0x00,0x00] + +v_sqrt_f64_e64 v[5:6], vcc +// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0x6a,0x00,0x00,0x00] + +v_sqrt_f64_e64 v[5:6], ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0x7a,0x00,0x00,0x00] + +v_sqrt_f64_e64 v[5:6], exec +// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0x7e,0x00,0x00,0x00] + +v_sqrt_f64_e64 v[5:6], null +// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0x7c,0x00,0x00,0x00] + +v_sqrt_f64_e64 v[5:6], -1 +// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0xc1,0x00,0x00,0x00] + +v_sqrt_f64_e64 v[5:6], 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xb4,0xd5,0xf0,0x00,0x00,0x08] + +v_sqrt_f64_e64 v[5:6], -|src_scc| mul:4 +// GFX11: encoding: [0x05,0x01,0xb4,0xd5,0xfd,0x00,0x00,0x30] + +v_sqrt_f64_e64 v[254:255], 0xaf123456 clamp div:2 +// GFX11: encoding: [0xfe,0x80,0xb4,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] + +v_trunc_f16_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x01,0x01,0x00,0x00] + +v_trunc_f16_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0xff,0x01,0x00,0x00] + +v_trunc_f16_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x01,0x00,0x00,0x00] + +v_trunc_f16_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x69,0x00,0x00,0x00] + +v_trunc_f16_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x6a,0x00,0x00,0x00] + +v_trunc_f16_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x6b,0x00,0x00,0x00] + +v_trunc_f16_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x7b,0x00,0x00,0x00] + +v_trunc_f16_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x7d,0x00,0x00,0x00] + +v_trunc_f16_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x7e,0x00,0x00,0x00] + +v_trunc_f16_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x7f,0x00,0x00,0x00] + +v_trunc_f16_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0x7c,0x00,0x00,0x00] + +v_trunc_f16_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0xc1,0x00,0x00,0x00] + +v_trunc_f16_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0xf0,0x00,0x00,0x08] + +v_trunc_f16_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xdd,0xd5,0xfd,0x00,0x00,0x10] + +v_trunc_f16_e64 v255, -|0xfe0b| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xdd,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] + +v_trunc_f32_e64 v5, v1 +// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x01,0x01,0x00,0x00] + +v_trunc_f32_e64 v5, v255 +// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0xff,0x01,0x00,0x00] + +v_trunc_f32_e64 v5, s1 +// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x01,0x00,0x00,0x00] + +v_trunc_f32_e64 v5, s105 +// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x69,0x00,0x00,0x00] + +v_trunc_f32_e64 v5, vcc_lo +// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x6a,0x00,0x00,0x00] + +v_trunc_f32_e64 v5, vcc_hi +// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x6b,0x00,0x00,0x00] + +v_trunc_f32_e64 v5, ttmp15 +// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x7b,0x00,0x00,0x00] + +v_trunc_f32_e64 v5, m0 +// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x7d,0x00,0x00,0x00] + +v_trunc_f32_e64 v5, exec_lo +// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x7e,0x00,0x00,0x00] + +v_trunc_f32_e64 v5, exec_hi +// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x7f,0x00,0x00,0x00] + +v_trunc_f32_e64 v5, null +// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0x7c,0x00,0x00,0x00] + +v_trunc_f32_e64 v5, -1 +// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0xc1,0x00,0x00,0x00] + +v_trunc_f32_e64 v5, 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0xf0,0x00,0x00,0x08] + +v_trunc_f32_e64 v5, src_scc mul:4 +// GFX11: encoding: [0x05,0x00,0xa1,0xd5,0xfd,0x00,0x00,0x10] + +v_trunc_f32_e64 v255, -|0xaf123456| clamp div:2 +// GFX11: encoding: [0xff,0x81,0xa1,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] + +v_trunc_f64_e64 v[5:6], v[1:2] +// GFX11: encoding: [0x05,0x00,0x97,0xd5,0x01,0x01,0x00,0x00] + +v_trunc_f64_e64 v[5:6], v[254:255] +// GFX11: encoding: [0x05,0x00,0x97,0xd5,0xfe,0x01,0x00,0x00] + +v_trunc_f64_e64 v[5:6], s[2:3] +// GFX11: encoding: [0x05,0x00,0x97,0xd5,0x02,0x00,0x00,0x00] + +v_trunc_f64_e64 v[5:6], s[104:105] +// GFX11: encoding: [0x05,0x00,0x97,0xd5,0x68,0x00,0x00,0x00] + +v_trunc_f64_e64 v[5:6], vcc +// GFX11: encoding: [0x05,0x00,0x97,0xd5,0x6a,0x00,0x00,0x00] + +v_trunc_f64_e64 v[5:6], ttmp[14:15] +// GFX11: encoding: [0x05,0x00,0x97,0xd5,0x7a,0x00,0x00,0x00] + +v_trunc_f64_e64 v[5:6], exec +// GFX11: encoding: [0x05,0x00,0x97,0xd5,0x7e,0x00,0x00,0x00] + +v_trunc_f64_e64 v[5:6], null +// GFX11: encoding: [0x05,0x00,0x97,0xd5,0x7c,0x00,0x00,0x00] + +v_trunc_f64_e64 v[5:6], -1 +// GFX11: encoding: [0x05,0x00,0x97,0xd5,0xc1,0x00,0x00,0x00] + +v_trunc_f64_e64 v[5:6], 0.5 mul:2 +// GFX11: encoding: [0x05,0x00,0x97,0xd5,0xf0,0x00,0x00,0x08] + +v_trunc_f64_e64 v[5:6], -|src_scc| mul:4 +// GFX11: encoding: [0x05,0x01,0x97,0xd5,0xfd,0x00,0x00,0x30] + +v_trunc_f64_e64 v[254:255], 0xaf123456 clamp div:2 +// GFX11: encoding: [0xfe,0x80,0x97,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop2.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop2.s new file mode 100644 index 0000000000000..43c71617bb385 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop2.s @@ -0,0 +1,2187 @@ +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s + +v_add_co_ci_u32_e64 v5, s6, v1, 0xaf123456, s3 +// W32: encoding: [0x05,0x06,0x20,0xd5,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s6, v255, src_scc, s3 +// W32: encoding: [0x05,0x06,0x20,0xd5,0xff,0xfb,0x0d,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s6, s105, s105, s3 +// W32: encoding: [0x05,0x06,0x20,0xd5,0x69,0xd2,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s6, vcc_lo, v2, s3 +// W32: encoding: [0x05,0x06,0x20,0xd5,0x6a,0x04,0x0e,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s6, vcc_hi, v255, s3 +// W32: encoding: [0x05,0x06,0x20,0xd5,0x6b,0xfe,0x0f,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s6, ttmp15, ttmp15, s3 +// W32: encoding: [0x05,0x06,0x20,0xd5,0x7b,0xf6,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s6, m0, 0.5, s3 +// W32: encoding: [0x05,0x06,0x20,0xd5,0x7d,0xe0,0x0d,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s6, exec_lo, exec_lo, s3 +// W32: encoding: [0x05,0x06,0x20,0xd5,0x7e,0xfc,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s6, exec_hi, -1, s3 +// W32: encoding: [0x05,0x06,0x20,0xd5,0x7f,0x82,0x0d,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s105, null, exec_hi, s105 +// W32: encoding: [0x05,0x69,0x20,0xd5,0x7c,0xfe,0xa4,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, vcc_lo, -1, m0, vcc_lo +// W32: encoding: [0x05,0x6a,0x20,0xd5,0xc1,0xfa,0xa8,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, vcc_hi, 0.5, vcc_lo, vcc_hi +// W32: encoding: [0x05,0x6b,0x20,0xd5,0xf0,0xd4,0xac,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, ttmp15, src_scc, null, ttmp15 +// W32: encoding: [0x05,0x7b,0x20,0xd5,0xfd,0xf8,0xec,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s[12:13], v1, 0xaf123456, s[6:7] +// W64: encoding: [0x05,0x0c,0x20,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s[12:13], v255, src_scc, s[6:7] +// W64: encoding: [0x05,0x0c,0x20,0xd5,0xff,0xfb,0x19,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s[12:13], s105, s105, s[6:7] +// W64: encoding: [0x05,0x0c,0x20,0xd5,0x69,0xd2,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s[12:13], vcc_lo, v2, s[6:7] +// W64: encoding: [0x05,0x0c,0x20,0xd5,0x6a,0x04,0x1a,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s[12:13], vcc_hi, v255, s[6:7] +// W64: encoding: [0x05,0x0c,0x20,0xd5,0x6b,0xfe,0x1b,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s[12:13], ttmp15, ttmp15, s[6:7] +// W64: encoding: [0x05,0x0c,0x20,0xd5,0x7b,0xf6,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s[12:13], m0, 0.5, s[6:7] +// W64: encoding: [0x05,0x0c,0x20,0xd5,0x7d,0xe0,0x19,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s[12:13], exec_lo, exec_lo, s[6:7] +// W64: encoding: [0x05,0x0c,0x20,0xd5,0x7e,0xfc,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s[12:13], exec_hi, -1, s[6:7] +// W64: encoding: [0x05,0x0c,0x20,0xd5,0x7f,0x82,0x19,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s[12:13], null, exec_hi, s[6:7] +// W64: encoding: [0x05,0x0c,0x20,0xd5,0x7c,0xfe,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, s[104:105], -1, m0, s[104:105] +// W64: encoding: [0x05,0x68,0x20,0xd5,0xc1,0xfa,0xa0,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, vcc, 0.5, vcc_lo, vcc +// W64: encoding: [0x05,0x6a,0x20,0xd5,0xf0,0xd4,0xa8,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v5, ttmp[14:15], src_scc, null, ttmp[14:15] +// W64: encoding: [0x05,0x7a,0x20,0xd5,0xfd,0xf8,0xe8,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_add_co_ci_u32_e64 v255, null, 0xaf123456, vcc_hi, null clamp +// GFX11: encoding: [0xff,0xfc,0x20,0xd5,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_add_f16_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x32,0xd5,0x01,0x05,0x02,0x00] + +v_add_f16_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x32,0xd5,0xff,0xff,0x03,0x00] + +v_add_f16_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x32,0xd5,0x01,0x04,0x00,0x00] + +v_add_f16_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x32,0xd5,0x69,0xd2,0x00,0x00] + +v_add_f16_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x32,0xd5,0x6a,0xf6,0x00,0x00] + +v_add_f16_e64 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x32,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_add_f16_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x32,0xd5,0x7b,0xfa,0x01,0x00] + +v_add_f16_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x32,0xd5,0x7d,0xe0,0x01,0x00] + +v_add_f16_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x32,0xd5,0x7e,0x82,0x01,0x00] + +v_add_f16_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x32,0xd5,0x7f,0xf8,0x00,0x00] + +v_add_f16_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x32,0xd5,0x7c,0xfc,0x00,0x00] + +v_add_f16_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x32,0xd5,0xc1,0xfe,0x00,0x00] + +v_add_f16_e64 v5, 0.5, -m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x32,0xd5,0xf0,0xfa,0x00,0x48] + +v_add_f16_e64 v5, -src_scc, |vcc_lo| mul:4 +// GFX11: encoding: [0x05,0x02,0x32,0xd5,0xfd,0xd4,0x00,0x30] + +v_add_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 +// GFX11: encoding: [0xff,0x83,0x32,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] + +v_add_f32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x03,0xd5,0x01,0x05,0x02,0x00] + +v_add_f32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x03,0xd5,0xff,0xff,0x03,0x00] + +v_add_f32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x03,0xd5,0x01,0x04,0x00,0x00] + +v_add_f32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x03,0xd5,0x69,0xd2,0x00,0x00] + +v_add_f32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x03,0xd5,0x6a,0xf6,0x00,0x00] + +v_add_f32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x03,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_add_f32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x03,0xd5,0x7b,0xfa,0x01,0x00] + +v_add_f32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x03,0xd5,0x7d,0xe0,0x01,0x00] + +v_add_f32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x03,0xd5,0x7e,0x82,0x01,0x00] + +v_add_f32_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x03,0xd5,0x7f,0xf8,0x00,0x00] + +v_add_f32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x03,0xd5,0x7c,0xfc,0x00,0x00] + +v_add_f32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x03,0xd5,0xc1,0xfe,0x00,0x00] + +v_add_f32_e64 v5, 0.5, -m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x03,0xd5,0xf0,0xfa,0x00,0x48] + +v_add_f32_e64 v5, -src_scc, |vcc_lo| mul:4 +// GFX11: encoding: [0x05,0x02,0x03,0xd5,0xfd,0xd4,0x00,0x30] + +v_add_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 +// GFX11: encoding: [0xff,0x83,0x03,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] + +v_add_nc_u32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x01,0x05,0x02,0x00] + +v_add_nc_u32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x25,0xd5,0xff,0xff,0x03,0x00] + +v_add_nc_u32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x01,0x04,0x00,0x00] + +v_add_nc_u32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x69,0xd2,0x00,0x00] + +v_add_nc_u32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x6a,0xf6,0x00,0x00] + +v_add_nc_u32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_add_nc_u32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x7b,0xfa,0x01,0x00] + +v_add_nc_u32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x7d,0xe0,0x01,0x00] + +v_add_nc_u32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x7e,0x82,0x01,0x00] + +v_add_nc_u32_e64 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x7f,0xf8,0x00,0x00] + +v_add_nc_u32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x25,0xd5,0x7c,0xfc,0x00,0x00] + +v_add_nc_u32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x25,0xd5,0xc1,0xfe,0x00,0x00] + +v_add_nc_u32_e64 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x25,0xd5,0xf0,0xfa,0x00,0x00] + +v_add_nc_u32_e64 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x25,0xd5,0xfd,0xd4,0x00,0x00] + +v_add_nc_u32_e64 v255, 0xaf123456, vcc_hi clamp +// GFX11: encoding: [0xff,0x80,0x25,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_and_b32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x01,0x05,0x02,0x00] + +v_and_b32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0xff,0xff,0x03,0x00] + +v_and_b32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x01,0x04,0x00,0x00] + +v_and_b32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x69,0xd2,0x00,0x00] + +v_and_b32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x6a,0xf6,0x00,0x00] + +v_and_b32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_and_b32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x7b,0xfa,0x01,0x00] + +v_and_b32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x7d,0xe0,0x01,0x00] + +v_and_b32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x7e,0x82,0x01,0x00] + +v_and_b32_e64 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x7f,0xf8,0x00,0x00] + +v_and_b32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0x7c,0xfc,0x00,0x00] + +v_and_b32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0xc1,0xfe,0x00,0x00] + +v_and_b32_e64 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0xf0,0xfa,0x00,0x00] + +v_and_b32_e64 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x1b,0xd5,0xfd,0xd4,0x00,0x00] + +v_and_b32_e64 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x1b,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_ashrrev_i32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x01,0x05,0x02,0x00] + +v_ashrrev_i32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0xff,0xff,0x03,0x00] + +v_ashrrev_i32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x01,0x04,0x00,0x00] + +v_ashrrev_i32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x69,0xd2,0x00,0x00] + +v_ashrrev_i32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x6a,0xf6,0x00,0x00] + +v_ashrrev_i32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_ashrrev_i32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x7b,0xfa,0x01,0x00] + +v_ashrrev_i32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x7d,0xe0,0x01,0x00] + +v_ashrrev_i32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x7e,0x82,0x01,0x00] + +v_ashrrev_i32_e64 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x7f,0xf8,0x00,0x00] + +v_ashrrev_i32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0x7c,0xfc,0x00,0x00] + +v_ashrrev_i32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0xc1,0xfe,0x00,0x00] + +v_ashrrev_i32_e64 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0xf0,0xfa,0x00,0x00] + +v_ashrrev_i32_e64 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x1a,0xd5,0xfd,0xd4,0x00,0x00] + +v_ashrrev_i32_e64 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x1a,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cndmask_b32_e64 v5, v1, 0xaf123456, s3 +// W32: encoding: [0x05,0x00,0x01,0xd5,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, v255, src_scc, s3 +// W32: encoding: [0x05,0x00,0x01,0xd5,0xff,0xfb,0x0d,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, s105, s105, s3 +// W32: encoding: [0x05,0x00,0x01,0xd5,0x69,0xd2,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, vcc_lo, v2, s3 +// W32: encoding: [0x05,0x00,0x01,0xd5,0x6a,0x04,0x0e,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, vcc_hi, v255, s3 +// W32: encoding: [0x05,0x00,0x01,0xd5,0x6b,0xfe,0x0f,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, ttmp15, ttmp15, s3 +// W32: encoding: [0x05,0x00,0x01,0xd5,0x7b,0xf6,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, m0, 0.5, s3 +// W32: encoding: [0x05,0x00,0x01,0xd5,0x7d,0xe0,0x0d,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, exec_lo, exec_lo, s3 +// W32: encoding: [0x05,0x00,0x01,0xd5,0x7e,0xfc,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, exec_hi, -1, s3 +// W32: encoding: [0x05,0x00,0x01,0xd5,0x7f,0x82,0x0d,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, null, exec_hi, s105 +// W32: encoding: [0x05,0x00,0x01,0xd5,0x7c,0xfe,0xa4,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, -1, m0, vcc_lo +// W32: encoding: [0x05,0x00,0x01,0xd5,0xc1,0xfa,0xa8,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, 0.5, -|vcc_lo|, vcc_hi +// W32: encoding: [0x05,0x02,0x01,0xd5,0xf0,0xd4,0xac,0x41] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, -|src_scc|, null, ttmp15 +// W32: encoding: [0x05,0x01,0x01,0xd5,0xfd,0xf8,0xec,0x21] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, v1, 0xaf123456, s[6:7] +// W64: encoding: [0x05,0x00,0x01,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, v255, src_scc, s[6:7] +// W64: encoding: [0x05,0x00,0x01,0xd5,0xff,0xfb,0x19,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, s105, s105, s[6:7] +// W64: encoding: [0x05,0x00,0x01,0xd5,0x69,0xd2,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, vcc_lo, v2, s[6:7] +// W64: encoding: [0x05,0x00,0x01,0xd5,0x6a,0x04,0x1a,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, vcc_hi, v255, s[6:7] +// W64: encoding: [0x05,0x00,0x01,0xd5,0x6b,0xfe,0x1b,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, ttmp15, ttmp15, s[6:7] +// W64: encoding: [0x05,0x00,0x01,0xd5,0x7b,0xf6,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, m0, 0.5, s[6:7] +// W64: encoding: [0x05,0x00,0x01,0xd5,0x7d,0xe0,0x19,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, exec_lo, exec_lo, s[6:7] +// W64: encoding: [0x05,0x00,0x01,0xd5,0x7e,0xfc,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, exec_hi, -1, s[6:7] +// W64: encoding: [0x05,0x00,0x01,0xd5,0x7f,0x82,0x19,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, null, exec_hi, s[6:7] +// W64: encoding: [0x05,0x00,0x01,0xd5,0x7c,0xfe,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, -1, m0, s[104:105] +// W64: encoding: [0x05,0x00,0x01,0xd5,0xc1,0xfa,0xa0,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, 0.5, -|vcc_lo|, vcc +// W64: encoding: [0x05,0x02,0x01,0xd5,0xf0,0xd4,0xa8,0x41] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v5, -|src_scc|, null, ttmp[14:15] +// W64: encoding: [0x05,0x01,0x01,0xd5,0xfd,0xf8,0xe8,0x21] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64 v255, -|0xaf123456|, -|vcc_hi|, null +// GFX11: encoding: [0xff,0x03,0x01,0xd5,0xff,0xd6,0xf0,0x61,0x56,0x34,0x12,0xaf] + +v_cvt_pk_rtz_f16_f32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x01,0x05,0x02,0x00] + +v_cvt_pk_rtz_f16_f32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0xff,0xff,0x03,0x00] + +v_cvt_pk_rtz_f16_f32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x01,0x04,0x00,0x00] + +v_cvt_pk_rtz_f16_f32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x69,0xd2,0x00,0x00] + +v_cvt_pk_rtz_f16_f32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x6a,0xf6,0x00,0x00] + +v_cvt_pk_rtz_f16_f32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pk_rtz_f16_f32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x7b,0xfa,0x01,0x00] + +v_cvt_pk_rtz_f16_f32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x7d,0xe0,0x01,0x00] + +v_cvt_pk_rtz_f16_f32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x7e,0x82,0x01,0x00] + +v_cvt_pk_rtz_f16_f32_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x2f,0xd5,0x7f,0xf8,0x00,0x00] + +v_cvt_pk_rtz_f16_f32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x7c,0xfc,0x00,0x00] + +v_cvt_pk_rtz_f16_f32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0xc1,0xfe,0x00,0x00] + +v_cvt_pk_rtz_f16_f32_e64 v5, 0.5, -m0 +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0xf0,0xfa,0x00,0x40] + +v_cvt_pk_rtz_f16_f32_e64 v5, -src_scc, |vcc_lo| +// GFX11: encoding: [0x05,0x02,0x2f,0xd5,0xfd,0xd4,0x00,0x20] + +v_cvt_pk_rtz_f16_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp +// GFX11: encoding: [0xff,0x83,0x2f,0xd5,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cvt_pkrtz_f16_f32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x01,0x05,0x02,0x00] + +v_cvt_pkrtz_f16_f32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0xff,0xff,0x03,0x00] + +v_cvt_pkrtz_f16_f32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x01,0x04,0x00,0x00] + +v_cvt_pkrtz_f16_f32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x69,0xd2,0x00,0x00] + +v_cvt_pkrtz_f16_f32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x6a,0xf6,0x00,0x00] + +v_cvt_pkrtz_f16_f32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cvt_pkrtz_f16_f32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x7b,0xfa,0x01,0x00] + +v_cvt_pkrtz_f16_f32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x7d,0xe0,0x01,0x00] + +v_cvt_pkrtz_f16_f32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x7e,0x82,0x01,0x00] + +v_cvt_pkrtz_f16_f32_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x2f,0xd5,0x7f,0xf8,0x00,0x00] + +v_cvt_pkrtz_f16_f32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0x7c,0xfc,0x00,0x00] + +v_cvt_pkrtz_f16_f32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0xc1,0xfe,0x00,0x00] + +v_cvt_pkrtz_f16_f32_e64 v5, 0.5, -m0 +// GFX11: encoding: [0x05,0x00,0x2f,0xd5,0xf0,0xfa,0x00,0x40] + +v_cvt_pkrtz_f16_f32_e64 v5, -src_scc, |vcc_lo| +// GFX11: encoding: [0x05,0x02,0x2f,0xd5,0xfd,0xd4,0x00,0x20] + +v_cvt_pkrtz_f16_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp +// GFX11: encoding: [0xff,0x83,0x2f,0xd5,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_fmac_dx9_zero_f32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x01,0x05,0x02,0x00] + +v_fmac_dx9_zero_f32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0xff,0xff,0x03,0x00] + +v_fmac_dx9_zero_f32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x01,0x04,0x00,0x00] + +v_fmac_dx9_zero_f32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x69,0xd2,0x00,0x00] + +v_fmac_dx9_zero_f32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x6a,0xf6,0x00,0x00] + +v_fmac_dx9_zero_f32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_fmac_dx9_zero_f32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x7b,0xfa,0x01,0x00] + +v_fmac_dx9_zero_f32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x7d,0xe0,0x01,0x00] + +v_fmac_dx9_zero_f32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x7e,0x82,0x01,0x00] + +v_fmac_dx9_zero_f32_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x06,0xd5,0x7f,0xf8,0x00,0x00] + +v_fmac_dx9_zero_f32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x7c,0xfc,0x00,0x00] + +v_fmac_dx9_zero_f32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0xc1,0xfe,0x00,0x00] + +v_fmac_dx9_zero_f32_e64 v5, 0.5, -m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0xf0,0xfa,0x00,0x48] + +v_fmac_dx9_zero_f32_e64 v5, -src_scc, |vcc_lo| mul:4 +// GFX11: encoding: [0x05,0x02,0x06,0xd5,0xfd,0xd4,0x00,0x30] + +v_fmac_dx9_zero_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 +// GFX11: encoding: [0xff,0x83,0x06,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] + +v_fmac_f16_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x36,0xd5,0x01,0x05,0x02,0x00] + +v_fmac_f16_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x36,0xd5,0xff,0xff,0x03,0x00] + +v_fmac_f16_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x36,0xd5,0x01,0x04,0x00,0x00] + +v_fmac_f16_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x36,0xd5,0x69,0xd2,0x00,0x00] + +v_fmac_f16_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x36,0xd5,0x6a,0xf6,0x00,0x00] + +v_fmac_f16_e64 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x36,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_fmac_f16_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x36,0xd5,0x7b,0xfa,0x01,0x00] + +v_fmac_f16_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x36,0xd5,0x7d,0xe0,0x01,0x00] + +v_fmac_f16_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x36,0xd5,0x7e,0x82,0x01,0x00] + +v_fmac_f16_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x36,0xd5,0x7f,0xf8,0x00,0x00] + +v_fmac_f16_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x36,0xd5,0x7c,0xfc,0x00,0x00] + +v_fmac_f16_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x36,0xd5,0xc1,0xfe,0x00,0x00] + +v_fmac_f16_e64 v5, 0.5, -m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x36,0xd5,0xf0,0xfa,0x00,0x48] + +v_fmac_f16_e64 v5, -src_scc, |vcc_lo| mul:4 +// GFX11: encoding: [0x05,0x02,0x36,0xd5,0xfd,0xd4,0x00,0x30] + +v_fmac_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 +// GFX11: encoding: [0xff,0x83,0x36,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] + +v_fmac_f32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0x01,0x05,0x02,0x00] + +v_fmac_f32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0xff,0xff,0x03,0x00] + +v_fmac_f32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0x01,0x04,0x00,0x00] + +v_fmac_f32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0x69,0xd2,0x00,0x00] + +v_fmac_f32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0x6a,0xf6,0x00,0x00] + +v_fmac_f32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_fmac_f32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0x7b,0xfa,0x01,0x00] + +v_fmac_f32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0x7d,0xe0,0x01,0x00] + +v_fmac_f32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0x7e,0x82,0x01,0x00] + +v_fmac_f32_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x2b,0xd5,0x7f,0xf8,0x00,0x00] + +v_fmac_f32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0x7c,0xfc,0x00,0x00] + +v_fmac_f32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0xc1,0xfe,0x00,0x00] + +v_fmac_f32_e64 v5, 0.5, -m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x2b,0xd5,0xf0,0xfa,0x00,0x48] + +v_fmac_f32_e64 v5, -src_scc, |vcc_lo| mul:4 +// GFX11: encoding: [0x05,0x02,0x2b,0xd5,0xfd,0xd4,0x00,0x30] + +v_fmac_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 +// GFX11: encoding: [0xff,0x83,0x2b,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] + +v_fmac_legacy_f32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x01,0x05,0x02,0x00] + +v_fmac_legacy_f32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0xff,0xff,0x03,0x00] + +v_fmac_legacy_f32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x01,0x04,0x00,0x00] + +v_fmac_legacy_f32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x69,0xd2,0x00,0x00] + +v_fmac_legacy_f32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x6a,0xf6,0x00,0x00] + +v_fmac_legacy_f32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_fmac_legacy_f32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x7b,0xfa,0x01,0x00] + +v_fmac_legacy_f32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x7d,0xe0,0x01,0x00] + +v_fmac_legacy_f32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x7e,0x82,0x01,0x00] + +v_fmac_legacy_f32_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x06,0xd5,0x7f,0xf8,0x00,0x00] + +v_fmac_legacy_f32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0x7c,0xfc,0x00,0x00] + +v_fmac_legacy_f32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0xc1,0xfe,0x00,0x00] + +v_fmac_legacy_f32_e64 v5, 0.5, -m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x06,0xd5,0xf0,0xfa,0x00,0x48] + +v_fmac_legacy_f32_e64 v5, -src_scc, |vcc_lo| mul:4 +// GFX11: encoding: [0x05,0x02,0x06,0xd5,0xfd,0xd4,0x00,0x30] + +v_fmac_legacy_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 +// GFX11: encoding: [0xff,0x83,0x06,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] + +v_ldexp_f16_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x01,0x05,0x02,0x00] + +v_ldexp_f16_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0xff,0xff,0x03,0x00] + +v_ldexp_f16_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x01,0x04,0x00,0x00] + +v_ldexp_f16_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x69,0xd2,0x00,0x00] + +v_ldexp_f16_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x6a,0xf6,0x00,0x00] + +v_ldexp_f16_e64 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_ldexp_f16_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x7b,0xfa,0x01,0x00] + +v_ldexp_f16_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x7d,0xe0,0x01,0x00] + +v_ldexp_f16_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x7e,0x82,0x01,0x00] + +v_ldexp_f16_e64 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x7f,0xf8,0x00,0x00] + +v_ldexp_f16_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0x7c,0xfc,0x00,0x00] + +v_ldexp_f16_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0xc1,0xfe,0x00,0x00] + +v_ldexp_f16_e64 v5, 0.5, m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0xf0,0xfa,0x00,0x08] + +v_ldexp_f16_e64 v5, src_scc, vcc_lo mul:4 +// GFX11: encoding: [0x05,0x00,0x3b,0xd5,0xfd,0xd4,0x00,0x10] + +v_ldexp_f16_e64 v255, -|0xfe0b|, vcc_hi clamp div:2 +// GFX11: encoding: [0xff,0x81,0x3b,0xd5,0xff,0xd6,0x00,0x38,0x0b,0xfe,0x00,0x00] + +v_lshlrev_b32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x01,0x05,0x02,0x00] + +v_lshlrev_b32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x18,0xd5,0xff,0xff,0x03,0x00] + +v_lshlrev_b32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x01,0x04,0x00,0x00] + +v_lshlrev_b32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x69,0xd2,0x00,0x00] + +v_lshlrev_b32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x6a,0xf6,0x00,0x00] + +v_lshlrev_b32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_lshlrev_b32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x7b,0xfa,0x01,0x00] + +v_lshlrev_b32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x7d,0xe0,0x01,0x00] + +v_lshlrev_b32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x7e,0x82,0x01,0x00] + +v_lshlrev_b32_e64 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x7f,0xf8,0x00,0x00] + +v_lshlrev_b32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x18,0xd5,0x7c,0xfc,0x00,0x00] + +v_lshlrev_b32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x18,0xd5,0xc1,0xfe,0x00,0x00] + +v_lshlrev_b32_e64 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x18,0xd5,0xf0,0xfa,0x00,0x00] + +v_lshlrev_b32_e64 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x18,0xd5,0xfd,0xd4,0x00,0x00] + +v_lshlrev_b32_e64 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x18,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_lshrrev_b32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x01,0x05,0x02,0x00] + +v_lshrrev_b32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x19,0xd5,0xff,0xff,0x03,0x00] + +v_lshrrev_b32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x01,0x04,0x00,0x00] + +v_lshrrev_b32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x69,0xd2,0x00,0x00] + +v_lshrrev_b32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x6a,0xf6,0x00,0x00] + +v_lshrrev_b32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_lshrrev_b32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x7b,0xfa,0x01,0x00] + +v_lshrrev_b32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x7d,0xe0,0x01,0x00] + +v_lshrrev_b32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x7e,0x82,0x01,0x00] + +v_lshrrev_b32_e64 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x7f,0xf8,0x00,0x00] + +v_lshrrev_b32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x19,0xd5,0x7c,0xfc,0x00,0x00] + +v_lshrrev_b32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x19,0xd5,0xc1,0xfe,0x00,0x00] + +v_lshrrev_b32_e64 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x19,0xd5,0xf0,0xfa,0x00,0x00] + +v_lshrrev_b32_e64 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x19,0xd5,0xfd,0xd4,0x00,0x00] + +v_lshrrev_b32_e64 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x19,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_max_f16_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x39,0xd5,0x01,0x05,0x02,0x00] + +v_max_f16_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x39,0xd5,0xff,0xff,0x03,0x00] + +v_max_f16_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x39,0xd5,0x01,0x04,0x00,0x00] + +v_max_f16_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x39,0xd5,0x69,0xd2,0x00,0x00] + +v_max_f16_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x39,0xd5,0x6a,0xf6,0x00,0x00] + +v_max_f16_e64 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x39,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_max_f16_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x39,0xd5,0x7b,0xfa,0x01,0x00] + +v_max_f16_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x39,0xd5,0x7d,0xe0,0x01,0x00] + +v_max_f16_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x39,0xd5,0x7e,0x82,0x01,0x00] + +v_max_f16_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x39,0xd5,0x7f,0xf8,0x00,0x00] + +v_max_f16_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x39,0xd5,0x7c,0xfc,0x00,0x00] + +v_max_f16_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x39,0xd5,0xc1,0xfe,0x00,0x00] + +v_max_f16_e64 v5, 0.5, -m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x39,0xd5,0xf0,0xfa,0x00,0x48] + +v_max_f16_e64 v5, -src_scc, |vcc_lo| mul:4 +// GFX11: encoding: [0x05,0x02,0x39,0xd5,0xfd,0xd4,0x00,0x30] + +v_max_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 +// GFX11: encoding: [0xff,0x83,0x39,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] + +v_max_f32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x10,0xd5,0x01,0x05,0x02,0x00] + +v_max_f32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x10,0xd5,0xff,0xff,0x03,0x00] + +v_max_f32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x10,0xd5,0x01,0x04,0x00,0x00] + +v_max_f32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x10,0xd5,0x69,0xd2,0x00,0x00] + +v_max_f32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x10,0xd5,0x6a,0xf6,0x00,0x00] + +v_max_f32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x10,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_max_f32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x10,0xd5,0x7b,0xfa,0x01,0x00] + +v_max_f32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x10,0xd5,0x7d,0xe0,0x01,0x00] + +v_max_f32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x10,0xd5,0x7e,0x82,0x01,0x00] + +v_max_f32_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x10,0xd5,0x7f,0xf8,0x00,0x00] + +v_max_f32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x10,0xd5,0x7c,0xfc,0x00,0x00] + +v_max_f32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x10,0xd5,0xc1,0xfe,0x00,0x00] + +v_max_f32_e64 v5, 0.5, -m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x10,0xd5,0xf0,0xfa,0x00,0x48] + +v_max_f32_e64 v5, -src_scc, |vcc_lo| mul:4 +// GFX11: encoding: [0x05,0x02,0x10,0xd5,0xfd,0xd4,0x00,0x30] + +v_max_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 +// GFX11: encoding: [0xff,0x83,0x10,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] + +v_max_i32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x01,0x05,0x02,0x00] + +v_max_i32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x12,0xd5,0xff,0xff,0x03,0x00] + +v_max_i32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x01,0x04,0x00,0x00] + +v_max_i32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x69,0xd2,0x00,0x00] + +v_max_i32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x6a,0xf6,0x00,0x00] + +v_max_i32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_max_i32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x7b,0xfa,0x01,0x00] + +v_max_i32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x7d,0xe0,0x01,0x00] + +v_max_i32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x7e,0x82,0x01,0x00] + +v_max_i32_e64 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x7f,0xf8,0x00,0x00] + +v_max_i32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x12,0xd5,0x7c,0xfc,0x00,0x00] + +v_max_i32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x12,0xd5,0xc1,0xfe,0x00,0x00] + +v_max_i32_e64 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x12,0xd5,0xf0,0xfa,0x00,0x00] + +v_max_i32_e64 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x12,0xd5,0xfd,0xd4,0x00,0x00] + +v_max_i32_e64 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x12,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_max_u32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x01,0x05,0x02,0x00] + +v_max_u32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x14,0xd5,0xff,0xff,0x03,0x00] + +v_max_u32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x01,0x04,0x00,0x00] + +v_max_u32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x69,0xd2,0x00,0x00] + +v_max_u32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x6a,0xf6,0x00,0x00] + +v_max_u32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_max_u32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x7b,0xfa,0x01,0x00] + +v_max_u32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x7d,0xe0,0x01,0x00] + +v_max_u32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x7e,0x82,0x01,0x00] + +v_max_u32_e64 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x7f,0xf8,0x00,0x00] + +v_max_u32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x14,0xd5,0x7c,0xfc,0x00,0x00] + +v_max_u32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x14,0xd5,0xc1,0xfe,0x00,0x00] + +v_max_u32_e64 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x14,0xd5,0xf0,0xfa,0x00,0x00] + +v_max_u32_e64 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x14,0xd5,0xfd,0xd4,0x00,0x00] + +v_max_u32_e64 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x14,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_min_f16_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0x01,0x05,0x02,0x00] + +v_min_f16_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0xff,0xff,0x03,0x00] + +v_min_f16_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0x01,0x04,0x00,0x00] + +v_min_f16_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0x69,0xd2,0x00,0x00] + +v_min_f16_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0x6a,0xf6,0x00,0x00] + +v_min_f16_e64 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_min_f16_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0x7b,0xfa,0x01,0x00] + +v_min_f16_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0x7d,0xe0,0x01,0x00] + +v_min_f16_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0x7e,0x82,0x01,0x00] + +v_min_f16_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x3a,0xd5,0x7f,0xf8,0x00,0x00] + +v_min_f16_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0x7c,0xfc,0x00,0x00] + +v_min_f16_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0xc1,0xfe,0x00,0x00] + +v_min_f16_e64 v5, 0.5, -m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x3a,0xd5,0xf0,0xfa,0x00,0x48] + +v_min_f16_e64 v5, -src_scc, |vcc_lo| mul:4 +// GFX11: encoding: [0x05,0x02,0x3a,0xd5,0xfd,0xd4,0x00,0x30] + +v_min_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 +// GFX11: encoding: [0xff,0x83,0x3a,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] + +v_min_f32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0x01,0x05,0x02,0x00] + +v_min_f32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0xff,0xff,0x03,0x00] + +v_min_f32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0x01,0x04,0x00,0x00] + +v_min_f32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0x69,0xd2,0x00,0x00] + +v_min_f32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0x6a,0xf6,0x00,0x00] + +v_min_f32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_min_f32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0x7b,0xfa,0x01,0x00] + +v_min_f32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0x7d,0xe0,0x01,0x00] + +v_min_f32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0x7e,0x82,0x01,0x00] + +v_min_f32_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x0f,0xd5,0x7f,0xf8,0x00,0x00] + +v_min_f32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0x7c,0xfc,0x00,0x00] + +v_min_f32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0xc1,0xfe,0x00,0x00] + +v_min_f32_e64 v5, 0.5, -m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x0f,0xd5,0xf0,0xfa,0x00,0x48] + +v_min_f32_e64 v5, -src_scc, |vcc_lo| mul:4 +// GFX11: encoding: [0x05,0x02,0x0f,0xd5,0xfd,0xd4,0x00,0x30] + +v_min_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 +// GFX11: encoding: [0xff,0x83,0x0f,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] + +v_min_i32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x01,0x05,0x02,0x00] + +v_min_i32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x11,0xd5,0xff,0xff,0x03,0x00] + +v_min_i32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x01,0x04,0x00,0x00] + +v_min_i32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x69,0xd2,0x00,0x00] + +v_min_i32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x6a,0xf6,0x00,0x00] + +v_min_i32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_min_i32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x7b,0xfa,0x01,0x00] + +v_min_i32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x7d,0xe0,0x01,0x00] + +v_min_i32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x7e,0x82,0x01,0x00] + +v_min_i32_e64 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x7f,0xf8,0x00,0x00] + +v_min_i32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x11,0xd5,0x7c,0xfc,0x00,0x00] + +v_min_i32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x11,0xd5,0xc1,0xfe,0x00,0x00] + +v_min_i32_e64 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x11,0xd5,0xf0,0xfa,0x00,0x00] + +v_min_i32_e64 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x11,0xd5,0xfd,0xd4,0x00,0x00] + +v_min_i32_e64 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x11,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_min_u32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x01,0x05,0x02,0x00] + +v_min_u32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x13,0xd5,0xff,0xff,0x03,0x00] + +v_min_u32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x01,0x04,0x00,0x00] + +v_min_u32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x69,0xd2,0x00,0x00] + +v_min_u32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x6a,0xf6,0x00,0x00] + +v_min_u32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_min_u32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x7b,0xfa,0x01,0x00] + +v_min_u32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x7d,0xe0,0x01,0x00] + +v_min_u32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x7e,0x82,0x01,0x00] + +v_min_u32_e64 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x7f,0xf8,0x00,0x00] + +v_min_u32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x13,0xd5,0x7c,0xfc,0x00,0x00] + +v_min_u32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x13,0xd5,0xc1,0xfe,0x00,0x00] + +v_min_u32_e64 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x13,0xd5,0xf0,0xfa,0x00,0x00] + +v_min_u32_e64 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x13,0xd5,0xfd,0xd4,0x00,0x00] + +v_min_u32_e64 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x13,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_mul_dx9_zero_f32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x01,0x05,0x02,0x00] + +v_mul_dx9_zero_f32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0xff,0xff,0x03,0x00] + +v_mul_dx9_zero_f32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x01,0x04,0x00,0x00] + +v_mul_dx9_zero_f32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x69,0xd2,0x00,0x00] + +v_mul_dx9_zero_f32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x6a,0xf6,0x00,0x00] + +v_mul_dx9_zero_f32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mul_dx9_zero_f32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x7b,0xfa,0x01,0x00] + +v_mul_dx9_zero_f32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x7d,0xe0,0x01,0x00] + +v_mul_dx9_zero_f32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x7e,0x82,0x01,0x00] + +v_mul_dx9_zero_f32_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x07,0xd5,0x7f,0xf8,0x00,0x00] + +v_mul_dx9_zero_f32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x7c,0xfc,0x00,0x00] + +v_mul_dx9_zero_f32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0xc1,0xfe,0x00,0x00] + +v_mul_dx9_zero_f32_e64 v5, 0.5, -m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0xf0,0xfa,0x00,0x48] + +v_mul_dx9_zero_f32_e64 v5, -src_scc, |vcc_lo| mul:4 +// GFX11: encoding: [0x05,0x02,0x07,0xd5,0xfd,0xd4,0x00,0x30] + +v_mul_dx9_zero_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 +// GFX11: encoding: [0xff,0x83,0x07,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] + +v_mul_f16_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x35,0xd5,0x01,0x05,0x02,0x00] + +v_mul_f16_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x35,0xd5,0xff,0xff,0x03,0x00] + +v_mul_f16_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x35,0xd5,0x01,0x04,0x00,0x00] + +v_mul_f16_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x35,0xd5,0x69,0xd2,0x00,0x00] + +v_mul_f16_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x35,0xd5,0x6a,0xf6,0x00,0x00] + +v_mul_f16_e64 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x35,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_mul_f16_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x35,0xd5,0x7b,0xfa,0x01,0x00] + +v_mul_f16_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x35,0xd5,0x7d,0xe0,0x01,0x00] + +v_mul_f16_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x35,0xd5,0x7e,0x82,0x01,0x00] + +v_mul_f16_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x35,0xd5,0x7f,0xf8,0x00,0x00] + +v_mul_f16_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x35,0xd5,0x7c,0xfc,0x00,0x00] + +v_mul_f16_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x35,0xd5,0xc1,0xfe,0x00,0x00] + +v_mul_f16_e64 v5, 0.5, -m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x35,0xd5,0xf0,0xfa,0x00,0x48] + +v_mul_f16_e64 v5, -src_scc, |vcc_lo| mul:4 +// GFX11: encoding: [0x05,0x02,0x35,0xd5,0xfd,0xd4,0x00,0x30] + +v_mul_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 +// GFX11: encoding: [0xff,0x83,0x35,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] + +v_mul_f32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x08,0xd5,0x01,0x05,0x02,0x00] + +v_mul_f32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x08,0xd5,0xff,0xff,0x03,0x00] + +v_mul_f32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x08,0xd5,0x01,0x04,0x00,0x00] + +v_mul_f32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x08,0xd5,0x69,0xd2,0x00,0x00] + +v_mul_f32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x08,0xd5,0x6a,0xf6,0x00,0x00] + +v_mul_f32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x08,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mul_f32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x08,0xd5,0x7b,0xfa,0x01,0x00] + +v_mul_f32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x08,0xd5,0x7d,0xe0,0x01,0x00] + +v_mul_f32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x08,0xd5,0x7e,0x82,0x01,0x00] + +v_mul_f32_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x08,0xd5,0x7f,0xf8,0x00,0x00] + +v_mul_f32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x08,0xd5,0x7c,0xfc,0x00,0x00] + +v_mul_f32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x08,0xd5,0xc1,0xfe,0x00,0x00] + +v_mul_f32_e64 v5, 0.5, -m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x08,0xd5,0xf0,0xfa,0x00,0x48] + +v_mul_f32_e64 v5, -src_scc, |vcc_lo| mul:4 +// GFX11: encoding: [0x05,0x02,0x08,0xd5,0xfd,0xd4,0x00,0x30] + +v_mul_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 +// GFX11: encoding: [0xff,0x83,0x08,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] + +v_mul_hi_i32_i24_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x01,0x05,0x02,0x00] + +v_mul_hi_i32_i24_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0xff,0xff,0x03,0x00] + +v_mul_hi_i32_i24_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x01,0x04,0x00,0x00] + +v_mul_hi_i32_i24_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x69,0xd2,0x00,0x00] + +v_mul_hi_i32_i24_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x6a,0xf6,0x00,0x00] + +v_mul_hi_i32_i24_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mul_hi_i32_i24_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x7b,0xfa,0x01,0x00] + +v_mul_hi_i32_i24_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x7d,0xe0,0x01,0x00] + +v_mul_hi_i32_i24_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x7e,0x82,0x01,0x00] + +v_mul_hi_i32_i24_e64 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x7f,0xf8,0x00,0x00] + +v_mul_hi_i32_i24_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0x7c,0xfc,0x00,0x00] + +v_mul_hi_i32_i24_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0xc1,0xfe,0x00,0x00] + +v_mul_hi_i32_i24_e64 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0xf0,0xfa,0x00,0x00] + +v_mul_hi_i32_i24_e64 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x0a,0xd5,0xfd,0xd4,0x00,0x00] + +v_mul_hi_i32_i24_e64 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x0a,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_mul_hi_u32_u24_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x01,0x05,0x02,0x00] + +v_mul_hi_u32_u24_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0xff,0xff,0x03,0x00] + +v_mul_hi_u32_u24_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x01,0x04,0x00,0x00] + +v_mul_hi_u32_u24_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x69,0xd2,0x00,0x00] + +v_mul_hi_u32_u24_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x6a,0xf6,0x00,0x00] + +v_mul_hi_u32_u24_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mul_hi_u32_u24_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x7b,0xfa,0x01,0x00] + +v_mul_hi_u32_u24_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x7d,0xe0,0x01,0x00] + +v_mul_hi_u32_u24_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x7e,0x82,0x01,0x00] + +v_mul_hi_u32_u24_e64 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x7f,0xf8,0x00,0x00] + +v_mul_hi_u32_u24_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0x7c,0xfc,0x00,0x00] + +v_mul_hi_u32_u24_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0xc1,0xfe,0x00,0x00] + +v_mul_hi_u32_u24_e64 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0xf0,0xfa,0x00,0x00] + +v_mul_hi_u32_u24_e64 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x0c,0xd5,0xfd,0xd4,0x00,0x00] + +v_mul_hi_u32_u24_e64 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x0c,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_mul_i32_i24_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x01,0x05,0x02,0x00] + +v_mul_i32_i24_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x09,0xd5,0xff,0xff,0x03,0x00] + +v_mul_i32_i24_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x01,0x04,0x00,0x00] + +v_mul_i32_i24_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x69,0xd2,0x00,0x00] + +v_mul_i32_i24_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x6a,0xf6,0x00,0x00] + +v_mul_i32_i24_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mul_i32_i24_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x7b,0xfa,0x01,0x00] + +v_mul_i32_i24_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x7d,0xe0,0x01,0x00] + +v_mul_i32_i24_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x7e,0x82,0x01,0x00] + +v_mul_i32_i24_e64 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x7f,0xf8,0x00,0x00] + +v_mul_i32_i24_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x09,0xd5,0x7c,0xfc,0x00,0x00] + +v_mul_i32_i24_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x09,0xd5,0xc1,0xfe,0x00,0x00] + +v_mul_i32_i24_e64 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x09,0xd5,0xf0,0xfa,0x00,0x00] + +v_mul_i32_i24_e64 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x09,0xd5,0xfd,0xd4,0x00,0x00] + +v_mul_i32_i24_e64 v255, 0xaf123456, vcc_hi clamp +// GFX11: encoding: [0xff,0x80,0x09,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_mul_legacy_f32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x01,0x05,0x02,0x00] + +v_mul_legacy_f32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0xff,0xff,0x03,0x00] + +v_mul_legacy_f32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x01,0x04,0x00,0x00] + +v_mul_legacy_f32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x69,0xd2,0x00,0x00] + +v_mul_legacy_f32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x6a,0xf6,0x00,0x00] + +v_mul_legacy_f32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mul_legacy_f32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x7b,0xfa,0x01,0x00] + +v_mul_legacy_f32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x7d,0xe0,0x01,0x00] + +v_mul_legacy_f32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x7e,0x82,0x01,0x00] + +v_mul_legacy_f32_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x07,0xd5,0x7f,0xf8,0x00,0x00] + +v_mul_legacy_f32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0x7c,0xfc,0x00,0x00] + +v_mul_legacy_f32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0xc1,0xfe,0x00,0x00] + +v_mul_legacy_f32_e64 v5, 0.5, -m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x07,0xd5,0xf0,0xfa,0x00,0x48] + +v_mul_legacy_f32_e64 v5, -src_scc, |vcc_lo| mul:4 +// GFX11: encoding: [0x05,0x02,0x07,0xd5,0xfd,0xd4,0x00,0x30] + +v_mul_legacy_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 +// GFX11: encoding: [0xff,0x83,0x07,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] + +v_mul_u32_u24_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x01,0x05,0x02,0x00] + +v_mul_u32_u24_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0xff,0xff,0x03,0x00] + +v_mul_u32_u24_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x01,0x04,0x00,0x00] + +v_mul_u32_u24_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x69,0xd2,0x00,0x00] + +v_mul_u32_u24_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x6a,0xf6,0x00,0x00] + +v_mul_u32_u24_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_mul_u32_u24_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x7b,0xfa,0x01,0x00] + +v_mul_u32_u24_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x7d,0xe0,0x01,0x00] + +v_mul_u32_u24_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x7e,0x82,0x01,0x00] + +v_mul_u32_u24_e64 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x7f,0xf8,0x00,0x00] + +v_mul_u32_u24_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0x7c,0xfc,0x00,0x00] + +v_mul_u32_u24_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0xc1,0xfe,0x00,0x00] + +v_mul_u32_u24_e64 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0xf0,0xfa,0x00,0x00] + +v_mul_u32_u24_e64 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x0b,0xd5,0xfd,0xd4,0x00,0x00] + +v_mul_u32_u24_e64 v255, 0xaf123456, vcc_hi clamp +// GFX11: encoding: [0xff,0x80,0x0b,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_or_b32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x01,0x05,0x02,0x00] + +v_or_b32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0xff,0xff,0x03,0x00] + +v_or_b32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x01,0x04,0x00,0x00] + +v_or_b32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x69,0xd2,0x00,0x00] + +v_or_b32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x6a,0xf6,0x00,0x00] + +v_or_b32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_or_b32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x7b,0xfa,0x01,0x00] + +v_or_b32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x7d,0xe0,0x01,0x00] + +v_or_b32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x7e,0x82,0x01,0x00] + +v_or_b32_e64 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x7f,0xf8,0x00,0x00] + +v_or_b32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0x7c,0xfc,0x00,0x00] + +v_or_b32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0xc1,0xfe,0x00,0x00] + +v_or_b32_e64 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0xf0,0xfa,0x00,0x00] + +v_or_b32_e64 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x1c,0xd5,0xfd,0xd4,0x00,0x00] + +v_or_b32_e64 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x1c,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_sub_co_ci_u32_e64 v5, s6, v1, 0xaf123456, s3 +// W32: encoding: [0x05,0x06,0x21,0xd5,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s6, v255, src_scc, s3 +// W32: encoding: [0x05,0x06,0x21,0xd5,0xff,0xfb,0x0d,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s6, s105, s105, s3 +// W32: encoding: [0x05,0x06,0x21,0xd5,0x69,0xd2,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s6, vcc_lo, v2, s3 +// W32: encoding: [0x05,0x06,0x21,0xd5,0x6a,0x04,0x0e,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s6, vcc_hi, v255, s3 +// W32: encoding: [0x05,0x06,0x21,0xd5,0x6b,0xfe,0x0f,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s6, ttmp15, ttmp15, s3 +// W32: encoding: [0x05,0x06,0x21,0xd5,0x7b,0xf6,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s6, m0, 0.5, s3 +// W32: encoding: [0x05,0x06,0x21,0xd5,0x7d,0xe0,0x0d,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s6, exec_lo, exec_lo, s3 +// W32: encoding: [0x05,0x06,0x21,0xd5,0x7e,0xfc,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s6, exec_hi, -1, s3 +// W32: encoding: [0x05,0x06,0x21,0xd5,0x7f,0x82,0x0d,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s105, null, exec_hi, s105 +// W32: encoding: [0x05,0x69,0x21,0xd5,0x7c,0xfe,0xa4,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, vcc_lo, -1, m0, vcc_lo +// W32: encoding: [0x05,0x6a,0x21,0xd5,0xc1,0xfa,0xa8,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, vcc_hi, 0.5, vcc_lo, vcc_hi +// W32: encoding: [0x05,0x6b,0x21,0xd5,0xf0,0xd4,0xac,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, ttmp15, src_scc, null, ttmp15 +// W32: encoding: [0x05,0x7b,0x21,0xd5,0xfd,0xf8,0xec,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s[12:13], v1, 0xaf123456, s[6:7] +// W64: encoding: [0x05,0x0c,0x21,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s[12:13], v255, src_scc, s[6:7] +// W64: encoding: [0x05,0x0c,0x21,0xd5,0xff,0xfb,0x19,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s[12:13], s105, s105, s[6:7] +// W64: encoding: [0x05,0x0c,0x21,0xd5,0x69,0xd2,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s[12:13], vcc_lo, v2, s[6:7] +// W64: encoding: [0x05,0x0c,0x21,0xd5,0x6a,0x04,0x1a,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s[12:13], vcc_hi, v255, s[6:7] +// W64: encoding: [0x05,0x0c,0x21,0xd5,0x6b,0xfe,0x1b,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s[12:13], ttmp15, ttmp15, s[6:7] +// W64: encoding: [0x05,0x0c,0x21,0xd5,0x7b,0xf6,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s[12:13], m0, 0.5, s[6:7] +// W64: encoding: [0x05,0x0c,0x21,0xd5,0x7d,0xe0,0x19,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s[12:13], exec_lo, exec_lo, s[6:7] +// W64: encoding: [0x05,0x0c,0x21,0xd5,0x7e,0xfc,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s[12:13], exec_hi, -1, s[6:7] +// W64: encoding: [0x05,0x0c,0x21,0xd5,0x7f,0x82,0x19,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s[12:13], null, exec_hi, s[6:7] +// W64: encoding: [0x05,0x0c,0x21,0xd5,0x7c,0xfe,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, s[104:105], -1, m0, s[104:105] +// W64: encoding: [0x05,0x68,0x21,0xd5,0xc1,0xfa,0xa0,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, vcc, 0.5, vcc_lo, vcc +// W64: encoding: [0x05,0x6a,0x21,0xd5,0xf0,0xd4,0xa8,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v5, ttmp[14:15], src_scc, null, ttmp[14:15] +// W64: encoding: [0x05,0x7a,0x21,0xd5,0xfd,0xf8,0xe8,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_sub_co_ci_u32_e64 v255, null, 0xaf123456, vcc_hi, null clamp +// GFX11: encoding: [0xff,0xfc,0x21,0xd5,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_sub_f16_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x33,0xd5,0x01,0x05,0x02,0x00] + +v_sub_f16_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x33,0xd5,0xff,0xff,0x03,0x00] + +v_sub_f16_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x33,0xd5,0x01,0x04,0x00,0x00] + +v_sub_f16_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x33,0xd5,0x69,0xd2,0x00,0x00] + +v_sub_f16_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x33,0xd5,0x6a,0xf6,0x00,0x00] + +v_sub_f16_e64 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x33,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_sub_f16_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x33,0xd5,0x7b,0xfa,0x01,0x00] + +v_sub_f16_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x33,0xd5,0x7d,0xe0,0x01,0x00] + +v_sub_f16_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x33,0xd5,0x7e,0x82,0x01,0x00] + +v_sub_f16_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x33,0xd5,0x7f,0xf8,0x00,0x00] + +v_sub_f16_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x33,0xd5,0x7c,0xfc,0x00,0x00] + +v_sub_f16_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x33,0xd5,0xc1,0xfe,0x00,0x00] + +v_sub_f16_e64 v5, 0.5, -m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x33,0xd5,0xf0,0xfa,0x00,0x48] + +v_sub_f16_e64 v5, -src_scc, |vcc_lo| mul:4 +// GFX11: encoding: [0x05,0x02,0x33,0xd5,0xfd,0xd4,0x00,0x30] + +v_sub_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 +// GFX11: encoding: [0xff,0x83,0x33,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] + +v_sub_f32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x04,0xd5,0x01,0x05,0x02,0x00] + +v_sub_f32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x04,0xd5,0xff,0xff,0x03,0x00] + +v_sub_f32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x04,0xd5,0x01,0x04,0x00,0x00] + +v_sub_f32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x04,0xd5,0x69,0xd2,0x00,0x00] + +v_sub_f32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x04,0xd5,0x6a,0xf6,0x00,0x00] + +v_sub_f32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x04,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_sub_f32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x04,0xd5,0x7b,0xfa,0x01,0x00] + +v_sub_f32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x04,0xd5,0x7d,0xe0,0x01,0x00] + +v_sub_f32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x04,0xd5,0x7e,0x82,0x01,0x00] + +v_sub_f32_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x04,0xd5,0x7f,0xf8,0x00,0x00] + +v_sub_f32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x04,0xd5,0x7c,0xfc,0x00,0x00] + +v_sub_f32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x04,0xd5,0xc1,0xfe,0x00,0x00] + +v_sub_f32_e64 v5, 0.5, -m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x04,0xd5,0xf0,0xfa,0x00,0x48] + +v_sub_f32_e64 v5, -src_scc, |vcc_lo| mul:4 +// GFX11: encoding: [0x05,0x02,0x04,0xd5,0xfd,0xd4,0x00,0x30] + +v_sub_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 +// GFX11: encoding: [0xff,0x83,0x04,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] + +v_sub_nc_u32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x01,0x05,0x02,0x00] + +v_sub_nc_u32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x26,0xd5,0xff,0xff,0x03,0x00] + +v_sub_nc_u32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x01,0x04,0x00,0x00] + +v_sub_nc_u32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x69,0xd2,0x00,0x00] + +v_sub_nc_u32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x6a,0xf6,0x00,0x00] + +v_sub_nc_u32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_sub_nc_u32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x7b,0xfa,0x01,0x00] + +v_sub_nc_u32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x7d,0xe0,0x01,0x00] + +v_sub_nc_u32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x7e,0x82,0x01,0x00] + +v_sub_nc_u32_e64 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x7f,0xf8,0x00,0x00] + +v_sub_nc_u32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x26,0xd5,0x7c,0xfc,0x00,0x00] + +v_sub_nc_u32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x26,0xd5,0xc1,0xfe,0x00,0x00] + +v_sub_nc_u32_e64 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x26,0xd5,0xf0,0xfa,0x00,0x00] + +v_sub_nc_u32_e64 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x26,0xd5,0xfd,0xd4,0x00,0x00] + +v_sub_nc_u32_e64 v255, 0xaf123456, vcc_hi clamp +// GFX11: encoding: [0xff,0x80,0x26,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_subrev_co_ci_u32_e64 v5, s6, v1, 0xaf123456, s3 +// W32: encoding: [0x05,0x06,0x22,0xd5,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s6, v255, src_scc, s3 +// W32: encoding: [0x05,0x06,0x22,0xd5,0xff,0xfb,0x0d,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s6, s105, s105, s3 +// W32: encoding: [0x05,0x06,0x22,0xd5,0x69,0xd2,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s6, vcc_lo, v2, s3 +// W32: encoding: [0x05,0x06,0x22,0xd5,0x6a,0x04,0x0e,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s6, vcc_hi, v255, s3 +// W32: encoding: [0x05,0x06,0x22,0xd5,0x6b,0xfe,0x0f,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s6, ttmp15, ttmp15, s3 +// W32: encoding: [0x05,0x06,0x22,0xd5,0x7b,0xf6,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s6, m0, 0.5, s3 +// W32: encoding: [0x05,0x06,0x22,0xd5,0x7d,0xe0,0x0d,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s6, exec_lo, exec_lo, s3 +// W32: encoding: [0x05,0x06,0x22,0xd5,0x7e,0xfc,0x0c,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s6, exec_hi, -1, s3 +// W32: encoding: [0x05,0x06,0x22,0xd5,0x7f,0x82,0x0d,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s105, null, exec_hi, s105 +// W32: encoding: [0x05,0x69,0x22,0xd5,0x7c,0xfe,0xa4,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, vcc_lo, -1, m0, vcc_lo +// W32: encoding: [0x05,0x6a,0x22,0xd5,0xc1,0xfa,0xa8,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, vcc_hi, 0.5, vcc_lo, vcc_hi +// W32: encoding: [0x05,0x6b,0x22,0xd5,0xf0,0xd4,0xac,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, ttmp15, src_scc, null, ttmp15 +// W32: encoding: [0x05,0x7b,0x22,0xd5,0xfd,0xf8,0xec,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s[12:13], v1, 0xaf123456, s[6:7] +// W64: encoding: [0x05,0x0c,0x22,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s[12:13], v255, src_scc, s[6:7] +// W64: encoding: [0x05,0x0c,0x22,0xd5,0xff,0xfb,0x19,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s[12:13], s105, s105, s[6:7] +// W64: encoding: [0x05,0x0c,0x22,0xd5,0x69,0xd2,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s[12:13], vcc_lo, v2, s[6:7] +// W64: encoding: [0x05,0x0c,0x22,0xd5,0x6a,0x04,0x1a,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s[12:13], vcc_hi, v255, s[6:7] +// W64: encoding: [0x05,0x0c,0x22,0xd5,0x6b,0xfe,0x1b,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s[12:13], ttmp15, ttmp15, s[6:7] +// W64: encoding: [0x05,0x0c,0x22,0xd5,0x7b,0xf6,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s[12:13], m0, 0.5, s[6:7] +// W64: encoding: [0x05,0x0c,0x22,0xd5,0x7d,0xe0,0x19,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s[12:13], exec_lo, exec_lo, s[6:7] +// W64: encoding: [0x05,0x0c,0x22,0xd5,0x7e,0xfc,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s[12:13], exec_hi, -1, s[6:7] +// W64: encoding: [0x05,0x0c,0x22,0xd5,0x7f,0x82,0x19,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s[12:13], null, exec_hi, s[6:7] +// W64: encoding: [0x05,0x0c,0x22,0xd5,0x7c,0xfe,0x18,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, s[104:105], -1, m0, s[104:105] +// W64: encoding: [0x05,0x68,0x22,0xd5,0xc1,0xfa,0xa0,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, vcc, 0.5, vcc_lo, vcc +// W64: encoding: [0x05,0x6a,0x22,0xd5,0xf0,0xd4,0xa8,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v5, ttmp[14:15], src_scc, null, ttmp[14:15] +// W64: encoding: [0x05,0x7a,0x22,0xd5,0xfd,0xf8,0xe8,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_subrev_co_ci_u32_e64 v255, null, 0xaf123456, vcc_hi, null clamp +// GFX11: encoding: [0xff,0xfc,0x22,0xd5,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] + +v_subrev_f16_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x34,0xd5,0x01,0x05,0x02,0x00] + +v_subrev_f16_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x34,0xd5,0xff,0xff,0x03,0x00] + +v_subrev_f16_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x34,0xd5,0x01,0x04,0x00,0x00] + +v_subrev_f16_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x34,0xd5,0x69,0xd2,0x00,0x00] + +v_subrev_f16_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x34,0xd5,0x6a,0xf6,0x00,0x00] + +v_subrev_f16_e64 v5, vcc_hi, 0xfe0b +// GFX11: encoding: [0x05,0x00,0x34,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] + +v_subrev_f16_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x34,0xd5,0x7b,0xfa,0x01,0x00] + +v_subrev_f16_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x34,0xd5,0x7d,0xe0,0x01,0x00] + +v_subrev_f16_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x34,0xd5,0x7e,0x82,0x01,0x00] + +v_subrev_f16_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x34,0xd5,0x7f,0xf8,0x00,0x00] + +v_subrev_f16_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x34,0xd5,0x7c,0xfc,0x00,0x00] + +v_subrev_f16_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x34,0xd5,0xc1,0xfe,0x00,0x00] + +v_subrev_f16_e64 v5, 0.5, -m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x34,0xd5,0xf0,0xfa,0x00,0x48] + +v_subrev_f16_e64 v5, -src_scc, |vcc_lo| mul:4 +// GFX11: encoding: [0x05,0x02,0x34,0xd5,0xfd,0xd4,0x00,0x30] + +v_subrev_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 +// GFX11: encoding: [0xff,0x83,0x34,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] + +v_subrev_f32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x05,0xd5,0x01,0x05,0x02,0x00] + +v_subrev_f32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x05,0xd5,0xff,0xff,0x03,0x00] + +v_subrev_f32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x05,0xd5,0x01,0x04,0x00,0x00] + +v_subrev_f32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x05,0xd5,0x69,0xd2,0x00,0x00] + +v_subrev_f32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x05,0xd5,0x6a,0xf6,0x00,0x00] + +v_subrev_f32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x05,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_subrev_f32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x05,0xd5,0x7b,0xfa,0x01,0x00] + +v_subrev_f32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x05,0xd5,0x7d,0xe0,0x01,0x00] + +v_subrev_f32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x05,0xd5,0x7e,0x82,0x01,0x00] + +v_subrev_f32_e64 v5, |exec_hi|, null +// GFX11: encoding: [0x05,0x01,0x05,0xd5,0x7f,0xf8,0x00,0x00] + +v_subrev_f32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x05,0xd5,0x7c,0xfc,0x00,0x00] + +v_subrev_f32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x05,0xd5,0xc1,0xfe,0x00,0x00] + +v_subrev_f32_e64 v5, 0.5, -m0 mul:2 +// GFX11: encoding: [0x05,0x00,0x05,0xd5,0xf0,0xfa,0x00,0x48] + +v_subrev_f32_e64 v5, -src_scc, |vcc_lo| mul:4 +// GFX11: encoding: [0x05,0x02,0x05,0xd5,0xfd,0xd4,0x00,0x30] + +v_subrev_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 +// GFX11: encoding: [0xff,0x83,0x05,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] + +v_subrev_nc_u32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x01,0x05,0x02,0x00] + +v_subrev_nc_u32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x27,0xd5,0xff,0xff,0x03,0x00] + +v_subrev_nc_u32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x01,0x04,0x00,0x00] + +v_subrev_nc_u32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x69,0xd2,0x00,0x00] + +v_subrev_nc_u32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x6a,0xf6,0x00,0x00] + +v_subrev_nc_u32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_subrev_nc_u32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x7b,0xfa,0x01,0x00] + +v_subrev_nc_u32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x7d,0xe0,0x01,0x00] + +v_subrev_nc_u32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x7e,0x82,0x01,0x00] + +v_subrev_nc_u32_e64 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x7f,0xf8,0x00,0x00] + +v_subrev_nc_u32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x27,0xd5,0x7c,0xfc,0x00,0x00] + +v_subrev_nc_u32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x27,0xd5,0xc1,0xfe,0x00,0x00] + +v_subrev_nc_u32_e64 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x27,0xd5,0xf0,0xfa,0x00,0x00] + +v_subrev_nc_u32_e64 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x27,0xd5,0xfd,0xd4,0x00,0x00] + +v_subrev_nc_u32_e64 v255, 0xaf123456, vcc_hi clamp +// GFX11: encoding: [0xff,0x80,0x27,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_xnor_b32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x01,0x05,0x02,0x00] + +v_xnor_b32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0xff,0xff,0x03,0x00] + +v_xnor_b32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x01,0x04,0x00,0x00] + +v_xnor_b32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x69,0xd2,0x00,0x00] + +v_xnor_b32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x6a,0xf6,0x00,0x00] + +v_xnor_b32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_xnor_b32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x7b,0xfa,0x01,0x00] + +v_xnor_b32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x7d,0xe0,0x01,0x00] + +v_xnor_b32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x7e,0x82,0x01,0x00] + +v_xnor_b32_e64 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x7f,0xf8,0x00,0x00] + +v_xnor_b32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0x7c,0xfc,0x00,0x00] + +v_xnor_b32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0xc1,0xfe,0x00,0x00] + +v_xnor_b32_e64 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0xf0,0xfa,0x00,0x00] + +v_xnor_b32_e64 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x1e,0xd5,0xfd,0xd4,0x00,0x00] + +v_xnor_b32_e64 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x1e,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_xor_b32_e64 v5, v1, v2 +// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x01,0x05,0x02,0x00] + +v_xor_b32_e64 v5, v255, v255 +// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0xff,0xff,0x03,0x00] + +v_xor_b32_e64 v5, s1, s2 +// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x01,0x04,0x00,0x00] + +v_xor_b32_e64 v5, s105, s105 +// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x69,0xd2,0x00,0x00] + +v_xor_b32_e64 v5, vcc_lo, ttmp15 +// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x6a,0xf6,0x00,0x00] + +v_xor_b32_e64 v5, vcc_hi, 0xaf123456 +// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_xor_b32_e64 v5, ttmp15, src_scc +// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x7b,0xfa,0x01,0x00] + +v_xor_b32_e64 v5, m0, 0.5 +// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x7d,0xe0,0x01,0x00] + +v_xor_b32_e64 v5, exec_lo, -1 +// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x7e,0x82,0x01,0x00] + +v_xor_b32_e64 v5, exec_hi, null +// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x7f,0xf8,0x00,0x00] + +v_xor_b32_e64 v5, null, exec_lo +// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0x7c,0xfc,0x00,0x00] + +v_xor_b32_e64 v5, -1, exec_hi +// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0xc1,0xfe,0x00,0x00] + +v_xor_b32_e64 v5, 0.5, m0 +// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0xf0,0xfa,0x00,0x00] + +v_xor_b32_e64 v5, src_scc, vcc_lo +// GFX11: encoding: [0x05,0x00,0x1d,0xd5,0xfd,0xd4,0x00,0x00] + +v_xor_b32_e64 v255, 0xaf123456, vcc_hi +// GFX11: encoding: [0xff,0x00,0x1d,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] diff --git a/llvm/test/MC/AsmParser/layout-interdependency.s b/llvm/test/MC/AsmParser/layout-interdependency.s index ca766573426a2..f26149ced766f 100644 --- a/llvm/test/MC/AsmParser/layout-interdependency.s +++ b/llvm/test/MC/AsmParser/layout-interdependency.s @@ -1,5 +1,5 @@ # RUN: not llvm-mc --filetype=obj %s -o /dev/null 2>&1 | FileCheck %s -# REQUIRES: default_triple +# REQUIRES: object-emission fct_end: diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt index 66ce6b8b94fab..0785ba2ea2eb6 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3.txt @@ -16044,6 +16044,9 @@ # GFX10: v_permlane16_b32 v5, v1, s103, s3 ; encoding: [0x05,0x00,0x77,0xd7,0x01,0xcf,0x0c,0x00] 0x05,0x00,0x77,0xd7,0x01,0xcf,0x0c,0x00 +# GFX10: v_permlane16_b32 v5, v1, 0xaf123456, s3 ; encoding: [0x05,0x00,0x77,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x77,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf + # GFX10: v_permlane16_b32 v5, v1, s2, -1 ; encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0x04,0x03] 0x05,0x00,0x77,0xd7,0x01,0x05,0x04,0x03 @@ -16071,6 +16074,9 @@ # GFX10: v_permlane16_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0x0c,0x00] 0x05,0x00,0x77,0xd7,0x01,0x05,0x0c,0x00 +# GFX10: v_permlane16_b32 v5, v1, s2, 0xaf123456 ; encoding: [0x05,0x00,0x77,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x77,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf + # GFX10: v_permlane16_b32 v5, v1, s2, s3 op_sel:[0,1] ; encoding: [0x05,0x10,0x77,0xd7,0x01,0x05,0x0c,0x00] 0x05,0x10,0x77,0xd7,0x01,0x05,0x0c,0x00 @@ -16149,6 +16155,9 @@ # GFX10: v_permlanex16_b32 v5, v1, s103, s3 ; encoding: [0x05,0x00,0x78,0xd7,0x01,0xcf,0x0c,0x00] 0x05,0x00,0x78,0xd7,0x01,0xcf,0x0c,0x00 +# GFX10: v_permlanex16_b32 v5, v1, 0xaf123456, s3 ; encoding: [0x05,0x00,0x78,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x78,0xd7,0x01,0xff,0x0d,0x00,0x56,0x34,0x12,0xaf + # GFX10: v_permlanex16_b32 v5, v1, s2, -1 ; encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0x04,0x03] 0x05,0x00,0x78,0xd7,0x01,0x05,0x04,0x03 @@ -16176,6 +16185,9 @@ # GFX10: v_permlanex16_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0x0c,0x00] 0x05,0x00,0x78,0xd7,0x01,0x05,0x0c,0x00 +# GFX10: v_permlanex16_b32 v5, v1, s2, 0xaf123456 ; encoding: [0x05,0x00,0x78,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x78,0xd7,0x01,0x05,0xfc,0x03,0x56,0x34,0x12,0xaf + # GFX10: v_permlanex16_b32 v5, v1, s2, s3 op_sel:[0,1] ; encoding: [0x05,0x10,0x78,0xd7,0x01,0x05,0x0c,0x00] 0x05,0x10,0x78,0xd7,0x01,0x05,0x0c,0x00 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt index 956bb2d332669..67ecd5da929bf 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt @@ -1,5 +1,5 @@ # RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32 %s -# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64 %s +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64 %s # GFX11: v_add3_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x55,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x55,0xd6,0x01,0x05,0x0e,0x00 @@ -46,61 +46,6 @@ # GFX11: v_add3_u32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x55,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] 0xff,0x00,0x55,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# W32: v_add_co_ci_u32_e64 v5, s12, v1, 0xaf123456, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] -# W64: v_add_co_ci_u32_e64 v5, s[12:13], v1, 0xaf123456, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] -0x05,0x0c,0x20,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf - -# W32: v_add_co_ci_u32_e64 v5, s12, v255, src_scc, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0xff,0xfb,0x19,0x00] -# W64: v_add_co_ci_u32_e64 v5, s[12:13], v255, src_scc, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0xff,0xfb,0x19,0x00] -0x05,0x0c,0x20,0xd5,0xff,0xfb,0x19,0x00 - -# W32: v_add_co_ci_u32_e64 v5, s12, s105, s105, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0x69,0xd2,0x18,0x00] -# W64: v_add_co_ci_u32_e64 v5, s[12:13], s105, s105, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0x69,0xd2,0x18,0x00] -0x05,0x0c,0x20,0xd5,0x69,0xd2,0x18,0x00 - -# W32: v_add_co_ci_u32_e64 v5, s12, vcc_lo, v2, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0x6a,0x04,0x1a,0x00] -# W64: v_add_co_ci_u32_e64 v5, s[12:13], vcc_lo, v2, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0x6a,0x04,0x1a,0x00] -0x05,0x0c,0x20,0xd5,0x6a,0x04,0x1a,0x00 - -# W32: v_add_co_ci_u32_e64 v5, s12, vcc_hi, v255, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0x6b,0xfe,0x1b,0x00] -# W64: v_add_co_ci_u32_e64 v5, s[12:13], vcc_hi, v255, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0x6b,0xfe,0x1b,0x00] -0x05,0x0c,0x20,0xd5,0x6b,0xfe,0x1b,0x00 - -# W32: v_add_co_ci_u32_e64 v5, s12, ttmp15, ttmp15, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0x7b,0xf6,0x18,0x00] -# W64: v_add_co_ci_u32_e64 v5, s[12:13], ttmp15, ttmp15, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0x7b,0xf6,0x18,0x00] -0x05,0x0c,0x20,0xd5,0x7b,0xf6,0x18,0x00 - -# W32: v_add_co_ci_u32_e64 v5, s12, m0, 0.5, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0x7d,0xe0,0x19,0x00] -# W64: v_add_co_ci_u32_e64 v5, s[12:13], m0, 0.5, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0x7d,0xe0,0x19,0x00] -0x05,0x0c,0x20,0xd5,0x7d,0xe0,0x19,0x00 - -# W32: v_add_co_ci_u32_e64 v5, s12, exec_lo, exec_lo, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0x7e,0xfc,0x18,0x00] -# W64: v_add_co_ci_u32_e64 v5, s[12:13], exec_lo, exec_lo, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0x7e,0xfc,0x18,0x00] -0x05,0x0c,0x20,0xd5,0x7e,0xfc,0x18,0x00 - -# W32: v_add_co_ci_u32_e64 v5, s12, exec_hi, -1, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0x7f,0x82,0x19,0x00] -# W64: v_add_co_ci_u32_e64 v5, s[12:13], exec_hi, -1, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0x7f,0x82,0x19,0x00] -0x05,0x0c,0x20,0xd5,0x7f,0x82,0x19,0x00 - -# W32: v_add_co_ci_u32_e64 v5, s12, null, exec_hi, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0x7c,0xfe,0x18,0x00] -# W64: v_add_co_ci_u32_e64 v5, s[12:13], null, exec_hi, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0x7c,0xfe,0x18,0x00] -0x05,0x0c,0x20,0xd5,0x7c,0xfe,0x18,0x00 - -# W32: v_add_co_ci_u32_e64 v5, s104, -1, m0, s104 ; encoding: [0x05,0x68,0x20,0xd5,0xc1,0xfa,0xa0,0x01] -# W64: v_add_co_ci_u32_e64 v5, s[104:105], -1, m0, s[104:105] ; encoding: [0x05,0x68,0x20,0xd5,0xc1,0xfa,0xa0,0x01] -0x05,0x68,0x20,0xd5,0xc1,0xfa,0xa0,0x01 - -# W32: v_add_co_ci_u32_e64 v5, vcc_lo, 0.5, vcc_lo, vcc_lo ; encoding: [0x05,0x6a,0x20,0xd5,0xf0,0xd4,0xa8,0x01] -# W64: v_add_co_ci_u32_e64 v5, vcc, 0.5, vcc_lo, vcc ; encoding: [0x05,0x6a,0x20,0xd5,0xf0,0xd4,0xa8,0x01] -0x05,0x6a,0x20,0xd5,0xf0,0xd4,0xa8,0x01 - -# W32: v_add_co_ci_u32_e64 v5, ttmp14, src_scc, null, ttmp14 ; encoding: [0x05,0x7a,0x20,0xd5,0xfd,0xf8,0xe8,0x01] -# W64: v_add_co_ci_u32_e64 v5, ttmp[14:15], src_scc, null, ttmp[14:15] ; encoding: [0x05,0x7a,0x20,0xd5,0xfd,0xf8,0xe8,0x01] -0x05,0x7a,0x20,0xd5,0xfd,0xf8,0xe8,0x01 - -# GFX11: v_add_co_ci_u32_e64 v255, null, 0xaf123456, vcc_hi, null clamp ; encoding: [0xff,0xfc,0x20,0xd5,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -0xff,0xfc,0x20,0xd5,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf - # W32: v_add_co_u32 v5, s12, v1, v2 ; encoding: [0x05,0x0c,0x00,0xd7,0x01,0x05,0x02,0x00] # W64: v_add_co_u32 v5, s[12:13], v1, v2 ; encoding: [0x05,0x0c,0x00,0xd7,0x01,0x05,0x02,0x00] 0x05,0x0c,0x00,0xd7,0x01,0x05,0x02,0x00 @@ -160,96 +105,6 @@ # GFX11: v_add_co_u32 v255, null, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0xfc,0x00,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] 0xff,0xfc,0x00,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_add_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x32,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x32,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_add_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x32,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x32,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_add_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x32,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x32,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_add_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x32,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x32,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_add_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x32,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x32,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_add_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x32,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x32,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 - -# GFX11: v_add_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x32,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x32,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_add_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x32,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x32,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_add_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x32,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x32,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_add_f16_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x32,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x32,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_add_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x32,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x32,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_add_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x32,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x32,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_add_f16_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x32,0xd5,0xf0,0xfa,0x00,0x48] -0x05,0x00,0x32,0xd5,0xf0,0xfa,0x00,0x48 - -# GFX11: v_add_f16_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x32,0xd5,0xfd,0xd4,0x00,0x30] -0x05,0x02,0x32,0xd5,0xfd,0xd4,0x00,0x30 - -# GFX11: v_add_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x32,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] -0xff,0x83,0x32,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00 - -# GFX11: v_add_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x03,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x03,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_add_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x03,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x03,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_add_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x03,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x03,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_add_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x03,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x03,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_add_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x03,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x03,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_add_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x03,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x03,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_add_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x03,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x03,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_add_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x03,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x03,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_add_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x03,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x03,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_add_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x03,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x03,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_add_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x03,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x03,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_add_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x03,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x03,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_add_f32_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x03,0xd5,0xf0,0xfa,0x00,0x48] -0x05,0x00,0x03,0xd5,0xf0,0xfa,0x00,0x48 - -# GFX11: v_add_f32_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x03,0xd5,0xfd,0xd4,0x00,0x30] -0x05,0x02,0x03,0xd5,0xfd,0xd4,0x00,0x30 - -# GFX11: v_add_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x03,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] -0xff,0x83,0x03,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf - # GFX11: v_add_f64 v[5:6], v[1:2], v[2:3] ; encoding: [0x05,0x00,0x27,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x27,0xd7,0x01,0x05,0x02,0x00 @@ -466,51 +321,6 @@ # GFX11: v_add_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0xc0,0x03,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_add_nc_u32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x25,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x25,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_add_nc_u32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x25,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x25,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_add_nc_u32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x25,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x25,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_add_nc_u32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x25,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x25,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_add_nc_u32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x25,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x25,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_add_nc_u32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x25,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x25,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_add_nc_u32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x25,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x25,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_add_nc_u32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x25,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x25,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_add_nc_u32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x25,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x25,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_add_nc_u32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x25,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x25,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_add_nc_u32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x25,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x25,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_add_nc_u32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x25,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x25,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_add_nc_u32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x25,0xd5,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x25,0xd5,0xf0,0xfa,0x00,0x00 - -# GFX11: v_add_nc_u32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x25,0xd5,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x25,0xd5,0xfd,0xd4,0x00,0x00 - -# GFX11: v_add_nc_u32_e64 v255, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0x80,0x25,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x80,0x25,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf - # GFX11: v_alignbit_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x16,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x16,0xd6,0x01,0x05,0x0e,0x00 @@ -646,51 +456,6 @@ # GFX11: v_and_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x62,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0x00,0x62,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_and_b32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x1b,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x1b,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_and_b32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x1b,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x1b,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_and_b32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x1b,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x1b,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_and_b32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x1b,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x1b,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_and_b32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x1b,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x1b,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_and_b32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x1b,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x1b,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_and_b32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x1b,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x1b,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_and_b32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x1b,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x1b,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_and_b32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x1b,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x1b,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_and_b32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x1b,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x1b,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_and_b32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x1b,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x1b,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_and_b32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x1b,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x1b,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_and_b32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x1b,0xd5,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x1b,0xd5,0xf0,0xfa,0x00,0x00 - -# GFX11: v_and_b32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x1b,0xd5,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x1b,0xd5,0xfd,0xd4,0x00,0x00 - -# GFX11: v_and_b32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x1b,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0x1b,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf - # GFX11: v_and_or_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x57,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x57,0xd6,0x01,0x05,0x0e,0x00 @@ -781,51 +546,6 @@ # GFX11: v_ashrrev_i16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x3a,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0x00,0x3a,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_ashrrev_i32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x1a,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x1a,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_ashrrev_i32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x1a,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x1a,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_ashrrev_i32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x1a,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x1a,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_ashrrev_i32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x1a,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x1a,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_ashrrev_i32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x1a,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x1a,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_ashrrev_i32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x1a,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x1a,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_ashrrev_i32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x1a,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x1a,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_ashrrev_i32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x1a,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x1a,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_ashrrev_i32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x1a,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x1a,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_ashrrev_i32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x1a,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x1a,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_ashrrev_i32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x1a,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x1a,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_ashrrev_i32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x1a,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x1a,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_ashrrev_i32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x1a,0xd5,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x1a,0xd5,0xf0,0xfa,0x00,0x00 - -# GFX11: v_ashrrev_i32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x1a,0xd5,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x1a,0xd5,0xfd,0xd4,0x00,0x00 - -# GFX11: v_ashrrev_i32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x1a,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0x1a,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf - # GFX11: v_ashrrev_i64 v[5:6], v1, vcc ; encoding: [0x05,0x00,0x3e,0xd7,0x01,0xd5,0x00,0x00] 0x05,0x00,0x3e,0xd7,0x01,0xd5,0x00,0x00 @@ -1078,267 +798,6 @@ # GFX11: v_bfm_b32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x1d,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] 0xff,0x00,0x1d,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_bfrev_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xb8,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xb8,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_bfrev_b32_e64 v5, v255 ; encoding: [0x05,0x00,0xb8,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xb8,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_bfrev_b32_e64 v5, s1 ; encoding: [0x05,0x00,0xb8,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xb8,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_bfrev_b32_e64 v5, s105 ; encoding: [0x05,0x00,0xb8,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xb8,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_bfrev_b32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xb8,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xb8,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_bfrev_b32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xb8,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xb8,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_bfrev_b32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xb8,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xb8,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_bfrev_b32_e64 v5, m0 ; encoding: [0x05,0x00,0xb8,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xb8,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_bfrev_b32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xb8,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xb8,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_bfrev_b32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xb8,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xb8,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_bfrev_b32_e64 v5, null ; encoding: [0x05,0x00,0xb8,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xb8,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_bfrev_b32_e64 v5, -1 ; encoding: [0x05,0x00,0xb8,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xb8,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_bfrev_b32_e64 v5, 0.5 ; encoding: [0x05,0x00,0xb8,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0xb8,0xd5,0xf0,0x00,0x00,0x00 - -# GFX11: v_bfrev_b32_e64 v5, src_scc ; encoding: [0x05,0x00,0xb8,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0xb8,0xd5,0xfd,0x00,0x00,0x00 - -# GFX11: v_bfrev_b32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0xb8,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0xb8,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_ceil_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xdc,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xdc,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_ceil_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xdc,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xdc,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_ceil_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xdc,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xdc,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_ceil_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xdc,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xdc,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_ceil_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xdc,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xdc,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_ceil_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xdc,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xdc,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_ceil_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xdc,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xdc,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_ceil_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xdc,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xdc,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_ceil_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xdc,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xdc,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_ceil_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xdc,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xdc,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_ceil_f16_e64 v5, null ; encoding: [0x05,0x00,0xdc,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xdc,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_ceil_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xdc,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xdc,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_ceil_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xdc,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xdc,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_ceil_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xdc,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xdc,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_ceil_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xdc,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] -0xff,0x81,0xdc,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 - -# GFX11: v_ceil_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xa2,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xa2,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_ceil_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xa2,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xa2,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_ceil_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xa2,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xa2,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_ceil_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xa2,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xa2,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_ceil_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xa2,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xa2,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_ceil_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xa2,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xa2,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_ceil_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xa2,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xa2,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_ceil_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xa2,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xa2,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_ceil_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xa2,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xa2,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_ceil_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xa2,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xa2,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_ceil_f32_e64 v5, null ; encoding: [0x05,0x00,0xa2,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xa2,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_ceil_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xa2,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xa2,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_ceil_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xa2,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xa2,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_ceil_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xa2,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xa2,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_ceil_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xa2,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] -0xff,0x81,0xa2,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf - -# GFX11: v_ceil_f64_e64 v[5:6], v[1:2] ; encoding: [0x05,0x00,0x98,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x98,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_ceil_f64_e64 v[5:6], v[254:255] ; encoding: [0x05,0x00,0x98,0xd5,0xfe,0x01,0x00,0x00] -0x05,0x00,0x98,0xd5,0xfe,0x01,0x00,0x00 - -# GFX11: v_ceil_f64_e64 v[5:6], s[2:3] ; encoding: [0x05,0x00,0x98,0xd5,0x02,0x00,0x00,0x00] -0x05,0x00,0x98,0xd5,0x02,0x00,0x00,0x00 - -# GFX11: v_ceil_f64_e64 v[5:6], s[104:105] ; encoding: [0x05,0x00,0x98,0xd5,0x68,0x00,0x00,0x00] -0x05,0x00,0x98,0xd5,0x68,0x00,0x00,0x00 - -# GFX11: v_ceil_f64_e64 v[5:6], vcc ; encoding: [0x05,0x00,0x98,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x98,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_ceil_f64_e64 v[5:6], ttmp[14:15] ; encoding: [0x05,0x00,0x98,0xd5,0x7a,0x00,0x00,0x00] -0x05,0x00,0x98,0xd5,0x7a,0x00,0x00,0x00 - -# GFX11: v_ceil_f64_e64 v[5:6], exec ; encoding: [0x05,0x00,0x98,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x98,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_ceil_f64_e64 v[5:6], null ; encoding: [0x05,0x00,0x98,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x98,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_ceil_f64_e64 v[5:6], -1 ; encoding: [0x05,0x00,0x98,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x98,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_ceil_f64_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0x98,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0x98,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_ceil_f64_e64 v[5:6], -|src_scc| mul:4 ; encoding: [0x05,0x01,0x98,0xd5,0xfd,0x00,0x00,0x30] -0x05,0x01,0x98,0xd5,0xfd,0x00,0x00,0x30 - -# GFX11: v_ceil_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0x98,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] -0xfe,0x80,0x98,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf - -# GFX11: v_cls_i32_e64 v5, v1 ; encoding: [0x05,0x00,0xbb,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xbb,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_cls_i32_e64 v5, v255 ; encoding: [0x05,0x00,0xbb,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xbb,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_cls_i32_e64 v5, s1 ; encoding: [0x05,0x00,0xbb,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xbb,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_cls_i32_e64 v5, s105 ; encoding: [0x05,0x00,0xbb,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xbb,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_cls_i32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xbb,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xbb,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_cls_i32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xbb,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xbb,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_cls_i32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xbb,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xbb,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_cls_i32_e64 v5, m0 ; encoding: [0x05,0x00,0xbb,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xbb,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_cls_i32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xbb,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xbb,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_cls_i32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xbb,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xbb,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_cls_i32_e64 v5, null ; encoding: [0x05,0x00,0xbb,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xbb,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_cls_i32_e64 v5, -1 ; encoding: [0x05,0x00,0xbb,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xbb,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_cls_i32_e64 v5, 0.5 ; encoding: [0x05,0x00,0xbb,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0xbb,0xd5,0xf0,0x00,0x00,0x00 - -# GFX11: v_cls_i32_e64 v5, src_scc ; encoding: [0x05,0x00,0xbb,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0xbb,0xd5,0xfd,0x00,0x00,0x00 - -# GFX11: v_cls_i32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0xbb,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0xbb,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_clz_i32_u32_e64 v5, v1 ; encoding: [0x05,0x00,0xb9,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xb9,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_clz_i32_u32_e64 v5, v255 ; encoding: [0x05,0x00,0xb9,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xb9,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_clz_i32_u32_e64 v5, s1 ; encoding: [0x05,0x00,0xb9,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xb9,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_clz_i32_u32_e64 v5, s105 ; encoding: [0x05,0x00,0xb9,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xb9,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_clz_i32_u32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xb9,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xb9,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_clz_i32_u32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xb9,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xb9,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_clz_i32_u32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xb9,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xb9,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_clz_i32_u32_e64 v5, m0 ; encoding: [0x05,0x00,0xb9,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xb9,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_clz_i32_u32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xb9,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xb9,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_clz_i32_u32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xb9,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xb9,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_clz_i32_u32_e64 v5, null ; encoding: [0x05,0x00,0xb9,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xb9,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_clz_i32_u32_e64 v5, -1 ; encoding: [0x05,0x00,0xb9,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xb9,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_clz_i32_u32_e64 v5, 0.5 ; encoding: [0x05,0x00,0xb9,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0xb9,0xd5,0xf0,0x00,0x00,0x00 - -# GFX11: v_clz_i32_u32_e64 v5, src_scc ; encoding: [0x05,0x00,0xb9,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0xb9,0xd5,0xfd,0x00,0x00,0x00 - -# GFX11: v_clz_i32_u32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0xb9,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0xb9,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf - # W32: v_cndmask_b16 v5, v1, src_scc, s6 ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00] # W64: v_cndmask_b16 v5, v1, src_scc, s[6:7] ; encoding: [0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00] 0x05,0x00,0x5d,0xd6,0x01,0xfb,0x19,0x00 @@ -1390,196 +849,6 @@ # GFX11: v_cndmask_b16 v255, -|0xfe0b|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] 0xff,0x03,0x5d,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 -# W32: v_cndmask_b32_e64 v5, v1, 0xaf123456, s6 ; encoding: [0x05,0x00,0x01,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] -# W64: v_cndmask_b32_e64 v5, v1, 0xaf123456, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x01,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf - -# W32: v_cndmask_b32_e64 v5, v255, src_scc, s6 ; encoding: [0x05,0x00,0x01,0xd5,0xff,0xfb,0x19,0x00] -# W64: v_cndmask_b32_e64 v5, v255, src_scc, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0xff,0xfb,0x19,0x00] -0x05,0x00,0x01,0xd5,0xff,0xfb,0x19,0x00 - -# W32: v_cndmask_b32_e64 v5, s105, s105, s6 ; encoding: [0x05,0x00,0x01,0xd5,0x69,0xd2,0x18,0x00] -# W64: v_cndmask_b32_e64 v5, s105, s105, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0x69,0xd2,0x18,0x00] -0x05,0x00,0x01,0xd5,0x69,0xd2,0x18,0x00 - -# W32: v_cndmask_b32_e64 v5, vcc_lo, v2, s6 ; encoding: [0x05,0x00,0x01,0xd5,0x6a,0x04,0x1a,0x00] -# W64: v_cndmask_b32_e64 v5, vcc_lo, v2, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0x6a,0x04,0x1a,0x00] -0x05,0x00,0x01,0xd5,0x6a,0x04,0x1a,0x00 - -# W32: v_cndmask_b32_e64 v5, vcc_hi, v255, s6 ; encoding: [0x05,0x00,0x01,0xd5,0x6b,0xfe,0x1b,0x00] -# W64: v_cndmask_b32_e64 v5, vcc_hi, v255, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0x6b,0xfe,0x1b,0x00] -0x05,0x00,0x01,0xd5,0x6b,0xfe,0x1b,0x00 - -# W32: v_cndmask_b32_e64 v5, ttmp15, ttmp15, s6 ; encoding: [0x05,0x00,0x01,0xd5,0x7b,0xf6,0x18,0x00] -# W64: v_cndmask_b32_e64 v5, ttmp15, ttmp15, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0x7b,0xf6,0x18,0x00] -0x05,0x00,0x01,0xd5,0x7b,0xf6,0x18,0x00 - -# W32: v_cndmask_b32_e64 v5, m0, 0.5, s6 ; encoding: [0x05,0x00,0x01,0xd5,0x7d,0xe0,0x19,0x00] -# W64: v_cndmask_b32_e64 v5, m0, 0.5, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0x7d,0xe0,0x19,0x00] -0x05,0x00,0x01,0xd5,0x7d,0xe0,0x19,0x00 - -# W32: v_cndmask_b32_e64 v5, exec_lo, exec_lo, s6 ; encoding: [0x05,0x00,0x01,0xd5,0x7e,0xfc,0x18,0x00] -# W64: v_cndmask_b32_e64 v5, exec_lo, exec_lo, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0x7e,0xfc,0x18,0x00] -0x05,0x00,0x01,0xd5,0x7e,0xfc,0x18,0x00 - -# W32: v_cndmask_b32_e64 v5, exec_hi, -1, s6 ; encoding: [0x05,0x00,0x01,0xd5,0x7f,0x82,0x19,0x00] -# W64: v_cndmask_b32_e64 v5, exec_hi, -1, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0x7f,0x82,0x19,0x00] -0x05,0x00,0x01,0xd5,0x7f,0x82,0x19,0x00 - -# W32: v_cndmask_b32_e64 v5, null, exec_hi, s6 ; encoding: [0x05,0x00,0x01,0xd5,0x7c,0xfe,0x18,0x00] -# W64: v_cndmask_b32_e64 v5, null, exec_hi, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0x7c,0xfe,0x18,0x00] -0x05,0x00,0x01,0xd5,0x7c,0xfe,0x18,0x00 - -# W32: v_cndmask_b32_e64 v5, -1, m0, s104 ; encoding: [0x05,0x00,0x01,0xd5,0xc1,0xfa,0xa0,0x01] -# W64: v_cndmask_b32_e64 v5, -1, m0, s[104:105] ; encoding: [0x05,0x00,0x01,0xd5,0xc1,0xfa,0xa0,0x01] -0x05,0x00,0x01,0xd5,0xc1,0xfa,0xa0,0x01 - -# W32: v_cndmask_b32_e64 v5, 0.5, -|vcc_lo|, vcc_lo ; encoding: [0x05,0x02,0x01,0xd5,0xf0,0xd4,0xa8,0x41] -# W64: v_cndmask_b32_e64 v5, 0.5, -|vcc_lo|, vcc ; encoding: [0x05,0x02,0x01,0xd5,0xf0,0xd4,0xa8,0x41] -0x05,0x02,0x01,0xd5,0xf0,0xd4,0xa8,0x41 - -# W32: v_cndmask_b32_e64 v5, -|src_scc|, null, ttmp14 ; encoding: [0x05,0x01,0x01,0xd5,0xfd,0xf8,0xe8,0x21] -# W64: v_cndmask_b32_e64 v5, -|src_scc|, null, ttmp[14:15] ; encoding: [0x05,0x01,0x01,0xd5,0xfd,0xf8,0xe8,0x21] -0x05,0x01,0x01,0xd5,0xfd,0xf8,0xe8,0x21 - -# GFX11: v_cndmask_b32_e64 v255, -|0xaf123456|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x01,0xd5,0xff,0xd6,0xf0,0x61,0x56,0x34,0x12,0xaf] -0xff,0x03,0x01,0xd5,0xff,0xd6,0xf0,0x61,0x56,0x34,0x12,0xaf - -# GFX11: v_cos_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xe1,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xe1,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_cos_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xe1,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xe1,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_cos_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xe1,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xe1,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_cos_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xe1,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xe1,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_cos_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xe1,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xe1,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_cos_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xe1,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xe1,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_cos_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xe1,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xe1,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_cos_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xe1,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xe1,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_cos_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xe1,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xe1,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_cos_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xe1,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xe1,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_cos_f16_e64 v5, null ; encoding: [0x05,0x00,0xe1,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xe1,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_cos_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xe1,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xe1,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_cos_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xe1,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xe1,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_cos_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xe1,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xe1,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_cos_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xe1,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] -0xff,0x81,0xe1,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 - -# GFX11: v_cos_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xb6,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xb6,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_cos_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xb6,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xb6,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_cos_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xb6,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xb6,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_cos_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xb6,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xb6,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_cos_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xb6,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xb6,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_cos_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xb6,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xb6,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_cos_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xb6,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xb6,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_cos_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xb6,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xb6,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_cos_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xb6,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xb6,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_cos_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xb6,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xb6,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_cos_f32_e64 v5, null ; encoding: [0x05,0x00,0xb6,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xb6,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_cos_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xb6,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xb6,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_cos_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xb6,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xb6,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_cos_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xb6,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xb6,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_cos_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xb6,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] -0xff,0x81,0xb6,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf - -# GFX11: v_ctz_i32_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xba,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xba,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_ctz_i32_b32_e64 v5, v255 ; encoding: [0x05,0x00,0xba,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xba,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_ctz_i32_b32_e64 v5, s1 ; encoding: [0x05,0x00,0xba,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xba,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_ctz_i32_b32_e64 v5, s105 ; encoding: [0x05,0x00,0xba,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xba,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_ctz_i32_b32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xba,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xba,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_ctz_i32_b32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xba,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xba,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_ctz_i32_b32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xba,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xba,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_ctz_i32_b32_e64 v5, m0 ; encoding: [0x05,0x00,0xba,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xba,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_ctz_i32_b32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xba,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xba,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_ctz_i32_b32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xba,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xba,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_ctz_i32_b32_e64 v5, null ; encoding: [0x05,0x00,0xba,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xba,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_ctz_i32_b32_e64 v5, -1 ; encoding: [0x05,0x00,0xba,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xba,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_ctz_i32_b32_e64 v5, 0.5 ; encoding: [0x05,0x00,0xba,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0xba,0xd5,0xf0,0x00,0x00,0x00 - -# GFX11: v_ctz_i32_b32_e64 v5, src_scc ; encoding: [0x05,0x00,0xba,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0xba,0xd5,0xfd,0x00,0x00,0x00 - -# GFX11: v_ctz_i32_b32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0xba,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0xba,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf - # GFX11: v_cubeid_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x0c,0xd6,0x01,0x05,0x0e,0x00 @@ -1760,3113 +1029,1106 @@ # GFX11: v_cubetc_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x0e,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] 0xff,0x83,0x0e,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f16_f32_e64 v5, v1 ; encoding: [0x05,0x00,0x8a,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x8a,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_cvt_f16_f32_e64 v5, v255 ; encoding: [0x05,0x00,0x8a,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0x8a,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_cvt_pk_i16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x06,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x06,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_cvt_f16_f32_e64 v5, s1 ; encoding: [0x05,0x00,0x8a,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0x8a,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_f32 v5, v255, v255 ; encoding: [0x05,0x00,0x06,0xd7,0xff,0xff,0x03,0x00] +0x05,0x00,0x06,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_cvt_f16_f32_e64 v5, s105 ; encoding: [0x05,0x00,0x8a,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0x8a,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_f32 v5, s1, s2 ; encoding: [0x05,0x00,0x06,0xd7,0x01,0x04,0x00,0x00] +0x05,0x00,0x06,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_cvt_f16_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x8a,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x8a,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_f32 v5, s105, s105 ; encoding: [0x05,0x00,0x06,0xd7,0x69,0xd2,0x00,0x00] +0x05,0x00,0x06,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_cvt_f16_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x8a,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0x8a,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_f32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x06,0xd7,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x06,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_cvt_f16_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x8a,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0x8a,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_f32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x06,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x06,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f16_f32_e64 v5, m0 ; encoding: [0x05,0x00,0x8a,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0x8a,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_f32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x06,0xd7,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x06,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_cvt_f16_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x8a,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x8a,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_f32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x06,0xd7,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x06,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_cvt_f16_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x8a,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0x8a,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_f32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x06,0xd7,0x7e,0x82,0x01,0x00] +0x05,0x00,0x06,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_cvt_f16_f32_e64 v5, null ; encoding: [0x05,0x00,0x8a,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x8a,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_f32 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x06,0xd7,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x06,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_cvt_f16_f32_e64 v5, -1 ; encoding: [0x05,0x00,0x8a,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x8a,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_f32 v5, null, exec_lo ; encoding: [0x05,0x00,0x06,0xd7,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x06,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_cvt_f16_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x8a,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0x8a,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_cvt_pk_i16_f32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x06,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x06,0xd7,0xc1,0xfe,0x00,0x00 -# GFX11: v_cvt_f16_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x8a,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0x8a,0xd5,0xfd,0x00,0x00,0x10 +# GFX11: v_cvt_pk_i16_f32 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x06,0xd7,0xf0,0xfa,0x00,0x40] +0x05,0x00,0x06,0xd7,0xf0,0xfa,0x00,0x40 -# GFX11: v_cvt_f16_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] -0xff,0x81,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf +# GFX11: v_cvt_pk_i16_f32 v5, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x06,0xd7,0xfd,0xd4,0x00,0x20] +0x05,0x02,0x06,0xd7,0xfd,0xd4,0x00,0x20 -# GFX11: v_cvt_f16_i16_e64 v5, v1 ; encoding: [0x05,0x00,0xd1,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xd1,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_cvt_pk_i16_f32 v255, -|0xaf123456|, -|vcc_hi| ; encoding: [0xff,0x03,0x06,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] +0xff,0x03,0x06,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f16_i16_e64 v5, v255 ; encoding: [0x05,0x00,0xd1,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xd1,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_cvt_pk_i16_i32 v5, v1, v2 ; encoding: [0x05,0x00,0x24,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x24,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_cvt_f16_i16_e64 v5, s1 ; encoding: [0x05,0x00,0xd1,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xd1,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_i32 v5, v255, v255 ; encoding: [0x05,0x00,0x24,0xd7,0xff,0xff,0x03,0x00] +0x05,0x00,0x24,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_cvt_f16_i16_e64 v5, s105 ; encoding: [0x05,0x00,0xd1,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xd1,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_i32 v5, s1, s2 ; encoding: [0x05,0x00,0x24,0xd7,0x01,0x04,0x00,0x00] +0x05,0x00,0x24,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_cvt_f16_i16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd1,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xd1,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_i32 v5, s105, s105 ; encoding: [0x05,0x00,0x24,0xd7,0x69,0xd2,0x00,0x00] +0x05,0x00,0x24,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_cvt_f16_i16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd1,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xd1,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_i32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x24,0xd7,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x24,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_cvt_f16_i16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd1,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xd1,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_i32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x24,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x24,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f16_i16_e64 v5, m0 ; encoding: [0x05,0x00,0xd1,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xd1,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_i32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x24,0xd7,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x24,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_cvt_f16_i16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd1,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xd1,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_i32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x24,0xd7,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x24,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_cvt_f16_i16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd1,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xd1,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_i32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x24,0xd7,0x7e,0x82,0x01,0x00] +0x05,0x00,0x24,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_cvt_f16_i16_e64 v5, null ; encoding: [0x05,0x00,0xd1,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xd1,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_i32 v5, exec_hi, null ; encoding: [0x05,0x00,0x24,0xd7,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x24,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_cvt_f16_i16_e64 v5, -1 ; encoding: [0x05,0x00,0xd1,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xd1,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_cvt_pk_i16_i32 v5, null, exec_lo ; encoding: [0x05,0x00,0x24,0xd7,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x24,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_cvt_f16_i16_e64 v5, 0x3800 mul:2 -0x05,0x00,0xd1,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_cvt_pk_i16_i32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x24,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x24,0xd7,0xc1,0xfe,0x00,0x00 -# GFX11: v_cvt_f16_i16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xd1,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xd1,0xd5,0xfd,0x00,0x00,0x10 +# GFX11: v_cvt_pk_i16_i32 v5, 0.5, m0 ; encoding: [0x05,0x00,0x24,0xd7,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x24,0xd7,0xf0,0xfa,0x00,0x00 -# GFX11: v_cvt_f16_i16_e64 v255, 0xfe0b clamp div:2 ; encoding: [0xff,0x80,0xd1,0xd5,0xff,0x00,0x00,0x18,0x0b,0xfe,0x00,0x00] -0xff,0x80,0xd1,0xd5,0xff,0x00,0x00,0x18,0x0b,0xfe,0x00,0x00 +# GFX11: v_cvt_pk_i16_i32 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x24,0xd7,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x24,0xd7,0xfd,0xd4,0x00,0x00 -# GFX11: v_cvt_f16_u16_e64 v5, v1 ; encoding: [0x05,0x00,0xd0,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xd0,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_cvt_pk_i16_i32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x24,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0x24,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f16_u16_e64 v5, v255 ; encoding: [0x05,0x00,0xd0,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xd0,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_cvt_pk_norm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_cvt_f16_u16_e64 v5, s1 ; encoding: [0x05,0x00,0xd0,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xd0,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_i16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] +0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_cvt_f16_u16_e64 v5, s105 ; encoding: [0x05,0x00,0xd0,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xd0,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_i16_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00] +0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_cvt_f16_u16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd0,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xd0,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_i16_f16 v5, s105, s105 ; encoding: [0x05,0x00,0x12,0xd7,0x69,0xd2,0x00,0x00] +0x05,0x00,0x12,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_cvt_f16_u16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd0,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xd0,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_i16_f16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x12,0xd7,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x12,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_cvt_f16_u16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd0,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xd0,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_i16_f16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x12,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x12,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_f16_u16_e64 v5, m0 ; encoding: [0x05,0x00,0xd0,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xd0,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_i16_f16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x12,0xd7,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x12,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_cvt_f16_u16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd0,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xd0,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_i16_f16 v5, m0, 0.5 ; encoding: [0x05,0x00,0x12,0xd7,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x12,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_cvt_f16_u16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd0,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xd0,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_i16_f16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x12,0xd7,0x7e,0x82,0x01,0x00] +0x05,0x00,0x12,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_cvt_f16_u16_e64 v5, null ; encoding: [0x05,0x00,0xd0,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xd0,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_i16_f16 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x12,0xd7,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x12,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_cvt_f16_u16_e64 v5, -1 ; encoding: [0x05,0x00,0xd0,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xd0,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_i16_f16 v5, null, exec_lo ; encoding: [0x05,0x00,0x12,0xd7,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x12,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_cvt_f16_u16_e64 v5, 0x3800 mul:2 -0x05,0x00,0xd0,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_cvt_pk_norm_i16_f16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x12,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x12,0xd7,0xc1,0xfe,0x00,0x00 -# GFX11: v_cvt_f16_u16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xd0,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xd0,0xd5,0xfd,0x00,0x00,0x10 +# GFX11: v_cvt_pk_norm_i16_f16 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x12,0xd7,0xf0,0xfa,0x00,0x40] +0x05,0x00,0x12,0xd7,0xf0,0xfa,0x00,0x40 -# GFX11: v_cvt_f16_u16_e64 v255, 0xfe0b clamp div:2 ; encoding: [0xff,0x80,0xd0,0xd5,0xff,0x00,0x00,0x18,0x0b,0xfe,0x00,0x00] -0xff,0x80,0xd0,0xd5,0xff,0x00,0x00,0x18,0x0b,0xfe,0x00,0x00 +# GFX11: v_cvt_pk_norm_i16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xfd,0xd4,0x00,0x20] +0x05,0x0a,0x12,0xd7,0xfd,0xd4,0x00,0x20 -# GFX11: v_cvt_f32_f16_e64 v5, v1 ; encoding: [0x05,0x00,0x8b,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x8b,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] +0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_f32_f16_e64 v5, v255 ; encoding: [0x05,0x00,0x8b,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0x8b,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_cvt_pk_norm_u16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_cvt_f32_f16_e64 v5, s1 ; encoding: [0x05,0x00,0x8b,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0x8b,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_u16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] +0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_cvt_f32_f16_e64 v5, s105 ; encoding: [0x05,0x00,0x8b,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0x8b,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_u16_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00] +0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_cvt_f32_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x8b,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x8b,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_u16_f16 v5, s105, s105 ; encoding: [0x05,0x00,0x13,0xd7,0x69,0xd2,0x00,0x00] +0x05,0x00,0x13,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_cvt_f32_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x8b,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0x8b,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_u16_f16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x13,0xd7,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x13,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_cvt_f32_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x8b,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0x8b,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_u16_f16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x13,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x13,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_f32_f16_e64 v5, m0 ; encoding: [0x05,0x00,0x8b,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0x8b,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_u16_f16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x13,0xd7,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x13,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_cvt_f32_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0x8b,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x8b,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_u16_f16 v5, m0, 0.5 ; encoding: [0x05,0x00,0x13,0xd7,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x13,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_cvt_f32_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0x8b,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0x8b,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_u16_f16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x13,0xd7,0x7e,0x82,0x01,0x00] +0x05,0x00,0x13,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_cvt_f32_f16_e64 v5, null ; encoding: [0x05,0x00,0x8b,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x8b,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_u16_f16 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x13,0xd7,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x13,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_cvt_f32_f16_e64 v5, -1 ; encoding: [0x05,0x00,0x8b,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x8b,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_cvt_pk_norm_u16_f16 v5, null, exec_lo ; encoding: [0x05,0x00,0x13,0xd7,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x13,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_cvt_f32_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x8b,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0x8b,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_cvt_pk_norm_u16_f16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x13,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x13,0xd7,0xc1,0xfe,0x00,0x00 -# GFX11: v_cvt_f32_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x8b,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0x8b,0xd5,0xfd,0x00,0x00,0x10 +# GFX11: v_cvt_pk_norm_u16_f16 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x13,0xd7,0xf0,0xfa,0x00,0x40] +0x05,0x00,0x13,0xd7,0xf0,0xfa,0x00,0x40 -# GFX11: v_cvt_f32_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0x8b,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] -0xff,0x81,0x8b,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 +# GFX11: v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xfd,0xd4,0x00,0x20] +0x05,0x0a,0x13,0xd7,0xfd,0xd4,0x00,0x20 -# GFX11: v_cvt_f32_f64_e64 v5, v[1:2] ; encoding: [0x05,0x00,0x8f,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x8f,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] +0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_f32_f64_e64 v5, v[254:255] ; encoding: [0x05,0x00,0x8f,0xd5,0xfe,0x01,0x00,0x00] -0x05,0x00,0x8f,0xd5,0xfe,0x01,0x00,0x00 +# GFX11: v_cvt_pk_u16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_cvt_f32_f64_e64 v5, s[2:3] ; encoding: [0x05,0x00,0x8f,0xd5,0x02,0x00,0x00,0x00] -0x05,0x00,0x8f,0xd5,0x02,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u16_f32 v5, v255, v255 ; encoding: [0x05,0x00,0x07,0xd7,0xff,0xff,0x03,0x00] +0x05,0x00,0x07,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_cvt_f32_f64_e64 v5, s[104:105] ; encoding: [0x05,0x00,0x8f,0xd5,0x68,0x00,0x00,0x00] -0x05,0x00,0x8f,0xd5,0x68,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u16_f32 v5, s1, s2 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0x04,0x00,0x00] +0x05,0x00,0x07,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_cvt_f32_f64_e64 v5, vcc ; encoding: [0x05,0x00,0x8f,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x8f,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u16_f32 v5, s105, s105 ; encoding: [0x05,0x00,0x07,0xd7,0x69,0xd2,0x00,0x00] +0x05,0x00,0x07,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_cvt_f32_f64_e64 v5, ttmp[14:15] ; encoding: [0x05,0x00,0x8f,0xd5,0x7a,0x00,0x00,0x00] -0x05,0x00,0x8f,0xd5,0x7a,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u16_f32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x07,0xd7,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x07,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_cvt_f32_f64_e64 v5, exec ; encoding: [0x05,0x00,0x8f,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x8f,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u16_f32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x07,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x07,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f32_f64_e64 v5, null ; encoding: [0x05,0x00,0x8f,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x8f,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u16_f32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x07,0xd7,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x07,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_cvt_f32_f64_e64 v5, -1 ; encoding: [0x05,0x00,0x8f,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x8f,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u16_f32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x07,0xd7,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x07,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_cvt_f32_f64_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x8f,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0x8f,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_cvt_pk_u16_f32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x07,0xd7,0x7e,0x82,0x01,0x00] +0x05,0x00,0x07,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_cvt_f32_f64_e64 v5, -|src_scc| mul:4 ; encoding: [0x05,0x01,0x8f,0xd5,0xfd,0x00,0x00,0x30] -0x05,0x01,0x8f,0xd5,0xfd,0x00,0x00,0x30 +# GFX11: v_cvt_pk_u16_f32 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x07,0xd7,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x07,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_cvt_f32_f64_e64 v255, 0xaf123456 clamp div:2 ; encoding: [0xff,0x80,0x8f,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] -0xff,0x80,0x8f,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf +# GFX11: v_cvt_pk_u16_f32 v5, null, exec_lo ; encoding: [0x05,0x00,0x07,0xd7,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x07,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_cvt_f32_i32_e64 v5, v1 ; encoding: [0x05,0x00,0x85,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x85,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_cvt_pk_u16_f32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x07,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x07,0xd7,0xc1,0xfe,0x00,0x00 -# GFX11: v_cvt_f32_i32_e64 v5, v255 ; encoding: [0x05,0x00,0x85,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0x85,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_cvt_pk_u16_f32 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x07,0xd7,0xf0,0xfa,0x00,0x40] +0x05,0x00,0x07,0xd7,0xf0,0xfa,0x00,0x40 -# GFX11: v_cvt_f32_i32_e64 v5, s1 ; encoding: [0x05,0x00,0x85,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0x85,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u16_f32 v5, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x07,0xd7,0xfd,0xd4,0x00,0x20] +0x05,0x02,0x07,0xd7,0xfd,0xd4,0x00,0x20 -# GFX11: v_cvt_f32_i32_e64 v5, s105 ; encoding: [0x05,0x00,0x85,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0x85,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u16_f32 v255, -|0xaf123456|, -|vcc_hi| ; encoding: [0xff,0x03,0x07,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] +0xff,0x03,0x07,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f32_i32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x85,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x85,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u16_u32 v5, v1, v2 ; encoding: [0x05,0x00,0x23,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x23,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_cvt_f32_i32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x85,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0x85,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u16_u32 v5, v255, v255 ; encoding: [0x05,0x00,0x23,0xd7,0xff,0xff,0x03,0x00] +0x05,0x00,0x23,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_cvt_f32_i32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x85,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0x85,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u16_u32 v5, s1, s2 ; encoding: [0x05,0x00,0x23,0xd7,0x01,0x04,0x00,0x00] +0x05,0x00,0x23,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_cvt_f32_i32_e64 v5, m0 ; encoding: [0x05,0x00,0x85,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0x85,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u16_u32 v5, s105, s105 ; encoding: [0x05,0x00,0x23,0xd7,0x69,0xd2,0x00,0x00] +0x05,0x00,0x23,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_cvt_f32_i32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x85,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x85,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u16_u32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x23,0xd7,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x23,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_cvt_f32_i32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x85,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0x85,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u16_u32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x23,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x23,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f32_i32_e64 v5, null ; encoding: [0x05,0x00,0x85,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x85,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u16_u32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x23,0xd7,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x23,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_cvt_f32_i32_e64 v5, -1 ; encoding: [0x05,0x00,0x85,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x85,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u16_u32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x23,0xd7,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x23,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_cvt_f32_i32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x85,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0x85,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_cvt_pk_u16_u32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x23,0xd7,0x7e,0x82,0x01,0x00] +0x05,0x00,0x23,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_cvt_f32_i32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x85,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0x85,0xd5,0xfd,0x00,0x00,0x10 +# GFX11: v_cvt_pk_u16_u32 v5, exec_hi, null ; encoding: [0x05,0x00,0x23,0xd7,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x23,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_cvt_f32_i32_e64 v255, 0xaf123456 clamp div:2 ; encoding: [0xff,0x80,0x85,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] -0xff,0x80,0x85,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf +# GFX11: v_cvt_pk_u16_u32 v5, null, exec_lo ; encoding: [0x05,0x00,0x23,0xd7,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x23,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_cvt_f32_u32_e64 v5, v1 ; encoding: [0x05,0x00,0x86,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x86,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_cvt_pk_u16_u32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x23,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x23,0xd7,0xc1,0xfe,0x00,0x00 -# GFX11: v_cvt_f32_u32_e64 v5, v255 ; encoding: [0x05,0x00,0x86,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0x86,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_cvt_pk_u16_u32 v5, 0.5, m0 ; encoding: [0x05,0x00,0x23,0xd7,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x23,0xd7,0xf0,0xfa,0x00,0x00 -# GFX11: v_cvt_f32_u32_e64 v5, s1 ; encoding: [0x05,0x00,0x86,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0x86,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u16_u32 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x23,0xd7,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x23,0xd7,0xfd,0xd4,0x00,0x00 -# GFX11: v_cvt_f32_u32_e64 v5, s105 ; encoding: [0x05,0x00,0x86,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0x86,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u16_u32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x23,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0x23,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f32_u32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x86,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x86,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u8_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x26,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x26,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_cvt_f32_u32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x86,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0x86,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u8_f32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x26,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x26,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_cvt_f32_u32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x86,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0x86,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u8_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x26,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x26,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_cvt_f32_u32_e64 v5, m0 ; encoding: [0x05,0x00,0x86,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0x86,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u8_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x26,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x26,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_cvt_f32_u32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x86,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x86,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u8_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x26,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x26,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_cvt_f32_u32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x86,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0x86,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u8_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x26,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x26,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f32_u32_e64 v5, null ; encoding: [0x05,0x00,0x86,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x86,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u8_f32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x26,0xd6,0x7b,0xfa,0xed,0x01] +0x05,0x00,0x26,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_cvt_f32_u32_e64 v5, -1 ; encoding: [0x05,0x00,0x86,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x86,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u8_f32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x26,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x26,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_cvt_f32_u32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x86,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0x86,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_cvt_pk_u8_f32 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x26,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x00,0x26,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_cvt_f32_u32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x86,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0x86,0xd5,0xfd,0x00,0x00,0x10 +# GFX11: v_cvt_pk_u8_f32 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x26,0xd6,0x7f,0xf8,0xa8,0x01] +0x05,0x00,0x26,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_cvt_f32_u32_e64 v255, 0xaf123456 clamp div:2 ; encoding: [0xff,0x80,0x86,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] -0xff,0x80,0x86,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf +# GFX11: v_cvt_pk_u8_f32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x26,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x26,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f32_ubyte0_e64 v5, v1 ; encoding: [0x05,0x00,0x91,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x91,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_cvt_pk_u8_f32 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x26,0xd6,0xc1,0xfe,0xf4,0x03] +0x05,0x00,0x26,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX11: v_cvt_f32_ubyte0_e64 v5, v255 ; encoding: [0x05,0x00,0x91,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0x91,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_cvt_pk_u8_f32 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x26,0xd6,0xf0,0xfa,0xc0,0x03] +0x05,0x00,0x26,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX11: v_cvt_f32_ubyte0_e64 v5, s1 ; encoding: [0x05,0x00,0x91,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0x91,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u8_f32 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x26,0xd6,0xfd,0xd4,0x04,0x03] +0x05,0x00,0x26,0xd6,0xfd,0xd4,0x04,0x03 -# GFX11: v_cvt_f32_ubyte0_e64 v5, s105 ; encoding: [0x05,0x00,0x91,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0x91,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_cvt_pk_u8_f32 v255, -|0xaf123456|, vcc_hi, null ; encoding: [0xff,0x01,0x26,0xd6,0xff,0xd6,0xf0,0x21,0x56,0x34,0x12,0xaf] +0xff,0x01,0x26,0xd6,0xff,0xd6,0xf0,0x21,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f32_ubyte0_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x91,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x91,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_i16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x21,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x21,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_cvt_f32_ubyte0_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x91,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0x91,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_i16_f32 v5, v255, v255 ; encoding: [0x05,0x00,0x21,0xd7,0xff,0xff,0x03,0x00] +0x05,0x00,0x21,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_cvt_f32_ubyte0_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x91,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0x91,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_i16_f32 v5, s1, s2 ; encoding: [0x05,0x00,0x21,0xd7,0x01,0x04,0x00,0x00] +0x05,0x00,0x21,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_cvt_f32_ubyte0_e64 v5, m0 ; encoding: [0x05,0x00,0x91,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0x91,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_i16_f32 v5, s105, s105 ; encoding: [0x05,0x00,0x21,0xd7,0x69,0xd2,0x00,0x00] +0x05,0x00,0x21,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_cvt_f32_ubyte0_e64 v5, exec_lo ; encoding: [0x05,0x00,0x91,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x91,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_i16_f32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x21,0xd7,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x21,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_cvt_f32_ubyte0_e64 v5, exec_hi ; encoding: [0x05,0x00,0x91,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0x91,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_i16_f32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x21,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x21,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f32_ubyte0_e64 v5, null ; encoding: [0x05,0x00,0x91,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x91,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_i16_f32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x21,0xd7,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x21,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_cvt_f32_ubyte0_e64 v5, -1 ; encoding: [0x05,0x00,0x91,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x91,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_i16_f32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x21,0xd7,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x21,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_cvt_f32_ubyte0_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x91,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0x91,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_cvt_pknorm_i16_f32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x21,0xd7,0x7e,0x82,0x01,0x00] +0x05,0x00,0x21,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_cvt_f32_ubyte0_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x91,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0x91,0xd5,0xfd,0x00,0x00,0x10 +# GFX11: v_cvt_pknorm_i16_f32 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x21,0xd7,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x21,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_cvt_f32_ubyte0_e64 v255, 0xaf123456 clamp div:2 ; encoding: [0xff,0x80,0x91,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] -0xff,0x80,0x91,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf +# GFX11: v_cvt_pknorm_i16_f32 v5, null, exec_lo ; encoding: [0x05,0x00,0x21,0xd7,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x21,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_cvt_f32_ubyte1_e64 v5, v1 ; encoding: [0x05,0x00,0x92,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x92,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_cvt_pknorm_i16_f32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x21,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x21,0xd7,0xc1,0xfe,0x00,0x00 -# GFX11: v_cvt_f32_ubyte1_e64 v5, v255 ; encoding: [0x05,0x00,0x92,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0x92,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_cvt_pknorm_i16_f32 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x21,0xd7,0xf0,0xfa,0x00,0x40] +0x05,0x00,0x21,0xd7,0xf0,0xfa,0x00,0x40 -# GFX11: v_cvt_f32_ubyte1_e64 v5, s1 ; encoding: [0x05,0x00,0x92,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0x92,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_i16_f32 v5, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x21,0xd7,0xfd,0xd4,0x00,0x20] +0x05,0x02,0x21,0xd7,0xfd,0xd4,0x00,0x20 -# GFX11: v_cvt_f32_ubyte1_e64 v5, s105 ; encoding: [0x05,0x00,0x92,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0x92,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_i16_f32 v255, -|0xaf123456|, -|vcc_hi| ; encoding: [0xff,0x03,0x21,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] +0xff,0x03,0x21,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f32_ubyte1_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x92,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x92,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_u16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x22,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x22,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_cvt_f32_ubyte1_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x92,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0x92,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_u16_f32 v5, v255, v255 ; encoding: [0x05,0x00,0x22,0xd7,0xff,0xff,0x03,0x00] +0x05,0x00,0x22,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_cvt_f32_ubyte1_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x92,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0x92,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_u16_f32 v5, s1, s2 ; encoding: [0x05,0x00,0x22,0xd7,0x01,0x04,0x00,0x00] +0x05,0x00,0x22,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_cvt_f32_ubyte1_e64 v5, m0 ; encoding: [0x05,0x00,0x92,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0x92,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_u16_f32 v5, s105, s105 ; encoding: [0x05,0x00,0x22,0xd7,0x69,0xd2,0x00,0x00] +0x05,0x00,0x22,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_cvt_f32_ubyte1_e64 v5, exec_lo ; encoding: [0x05,0x00,0x92,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x92,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_u16_f32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x22,0xd7,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x22,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_cvt_f32_ubyte1_e64 v5, exec_hi ; encoding: [0x05,0x00,0x92,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0x92,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_u16_f32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x22,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x22,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f32_ubyte1_e64 v5, null ; encoding: [0x05,0x00,0x92,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x92,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_u16_f32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x22,0xd7,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x22,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_cvt_f32_ubyte1_e64 v5, -1 ; encoding: [0x05,0x00,0x92,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x92,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_u16_f32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x22,0xd7,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x22,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_cvt_f32_ubyte1_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x92,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0x92,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_cvt_pknorm_u16_f32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x22,0xd7,0x7e,0x82,0x01,0x00] +0x05,0x00,0x22,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_cvt_f32_ubyte1_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x92,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0x92,0xd5,0xfd,0x00,0x00,0x10 +# GFX11: v_cvt_pknorm_u16_f32 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x22,0xd7,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x22,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_cvt_f32_ubyte1_e64 v255, 0xaf123456 clamp div:2 ; encoding: [0xff,0x80,0x92,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] -0xff,0x80,0x92,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf +# GFX11: v_cvt_pknorm_u16_f32 v5, null, exec_lo ; encoding: [0x05,0x00,0x22,0xd7,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x22,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_cvt_f32_ubyte2_e64 v5, v1 ; encoding: [0x05,0x00,0x93,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x93,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_cvt_pknorm_u16_f32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x22,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x22,0xd7,0xc1,0xfe,0x00,0x00 -# GFX11: v_cvt_f32_ubyte2_e64 v5, v255 ; encoding: [0x05,0x00,0x93,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0x93,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_cvt_pknorm_u16_f32 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x22,0xd7,0xf0,0xfa,0x00,0x40] +0x05,0x00,0x22,0xd7,0xf0,0xfa,0x00,0x40 -# GFX11: v_cvt_f32_ubyte2_e64 v5, s1 ; encoding: [0x05,0x00,0x93,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0x93,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_u16_f32 v5, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x22,0xd7,0xfd,0xd4,0x00,0x20] +0x05,0x02,0x22,0xd7,0xfd,0xd4,0x00,0x20 -# GFX11: v_cvt_f32_ubyte2_e64 v5, s105 ; encoding: [0x05,0x00,0x93,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0x93,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_cvt_pknorm_u16_f32 v255, -|0xaf123456|, -|vcc_hi| ; encoding: [0xff,0x03,0x22,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] +0xff,0x03,0x22,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f32_ubyte2_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x93,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x93,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_div_fixup_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_cvt_f32_ubyte2_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x93,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0x93,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_div_fixup_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_cvt_f32_ubyte2_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x93,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0x93,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_div_fixup_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_cvt_f32_ubyte2_e64 v5, m0 ; encoding: [0x05,0x00,0x93,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0x93,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_cvt_f32_ubyte2_e64 v5, exec_lo ; encoding: [0x05,0x00,0x93,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x93,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_div_fixup_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_cvt_f32_ubyte2_e64 v5, exec_hi ; encoding: [0x05,0x00,0x93,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0x93,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_div_fixup_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_cvt_f32_ubyte2_e64 v5, null ; encoding: [0x05,0x00,0x93,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x93,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_f32_ubyte2_e64 v5, -1 ; encoding: [0x05,0x00,0x93,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x93,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1] +0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1 -# GFX11: v_cvt_f32_ubyte2_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x93,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0x93,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_div_fixup_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_cvt_f32_ubyte2_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x93,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0x93,0xd5,0xfd,0x00,0x00,0x10 +# GFX11: v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_cvt_f32_ubyte2_e64 v255, 0xaf123456 clamp div:2 ; encoding: [0xff,0x80,0x93,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] -0xff,0x80,0x93,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf +# GFX11: v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x54,0xd6,0x7f,0xf8,0xa8,0xa1] +0x05,0x05,0x54,0xd6,0x7f,0xf8,0xa8,0xa1 -# GFX11: v_cvt_f32_ubyte3_e64 v5, v1 ; encoding: [0x05,0x00,0x94,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x94,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +0x05,0x7c,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_f32_ubyte3_e64 v5, v255 ; encoding: [0x05,0x00,0x94,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0x94,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3] +0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3 -# GFX11: v_cvt_f32_ubyte3_e64 v5, s1 ; encoding: [0x05,0x00,0x94,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0x94,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43] +0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43 -# GFX11: v_cvt_f32_ubyte3_e64 v5, s105 ; encoding: [0x05,0x00,0x94,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0x94,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23] +0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23 -# GFX11: v_cvt_f32_ubyte3_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x94,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x94,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_f32_ubyte3_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x94,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0x94,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_div_fixup_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_cvt_f32_ubyte3_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x94,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0x94,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_div_fixup_f32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x27,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x27,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_cvt_f32_ubyte3_e64 v5, m0 ; encoding: [0x05,0x00,0x94,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0x94,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_div_fixup_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x27,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x27,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_cvt_f32_ubyte3_e64 v5, exec_lo ; encoding: [0x05,0x00,0x94,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x94,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_div_fixup_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x27,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x27,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_cvt_f32_ubyte3_e64 v5, exec_hi ; encoding: [0x05,0x00,0x94,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0x94,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_div_fixup_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x27,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x27,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_cvt_f32_ubyte3_e64 v5, null ; encoding: [0x05,0x00,0x94,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x94,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_div_fixup_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x27,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x27,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f32_ubyte3_e64 v5, -1 ; encoding: [0x05,0x00,0x94,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x94,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_div_fixup_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x27,0xd6,0x7b,0xfa,0xed,0xe1] +0x05,0x07,0x27,0xd6,0x7b,0xfa,0xed,0xe1 -# GFX11: v_cvt_f32_ubyte3_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x94,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0x94,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_div_fixup_f32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x27,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x27,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_cvt_f32_ubyte3_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x94,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0x94,0xd5,0xfd,0x00,0x00,0x10 +# GFX11: v_div_fixup_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x27,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x01,0x27,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_cvt_f32_ubyte3_e64 v255, 0xaf123456 clamp div:2 ; encoding: [0xff,0x80,0x94,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] -0xff,0x80,0x94,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf +# GFX11: v_div_fixup_f32 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x27,0xd6,0x7f,0xf8,0xa8,0xa1] +0x05,0x05,0x27,0xd6,0x7f,0xf8,0xa8,0xa1 -# GFX11: v_cvt_f64_f32_e64 v[5:6], v1 ; encoding: [0x05,0x00,0x90,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x90,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_div_fixup_f32 v5, null, exec_lo, -|0xaf123456| ; encoding: [0x05,0x04,0x27,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] +0x05,0x04,0x27,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f64_f32_e64 v[5:6], v255 ; encoding: [0x05,0x00,0x90,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0x90,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_div_fixup_f32 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x27,0xd6,0xc1,0xfe,0xf4,0xc3] +0x05,0x06,0x27,0xd6,0xc1,0xfe,0xf4,0xc3 -# GFX11: v_cvt_f64_f32_e64 v[5:6], s1 ; encoding: [0x05,0x00,0x90,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0x90,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_div_fixup_f32 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x27,0xd6,0xf0,0xfa,0xc0,0x4b] +0x05,0x00,0x27,0xd6,0xf0,0xfa,0xc0,0x4b -# GFX11: v_cvt_f64_f32_e64 v[5:6], s105 ; encoding: [0x05,0x00,0x90,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0x90,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_div_fixup_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x27,0xd6,0xfd,0xd4,0x04,0x33] +0x05,0x02,0x27,0xd6,0xfd,0xd4,0x04,0x33 -# GFX11: v_cvt_f64_f32_e64 v[5:6], vcc_lo ; encoding: [0x05,0x00,0x90,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x90,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_div_fixup_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x27,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] +0xff,0x83,0x27,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f64_f32_e64 v[5:6], vcc_hi ; encoding: [0x05,0x00,0x90,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0x90,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_div_fixup_f64 v[5:6], v[1:2], v[2:3], v[3:4] ; encoding: [0x05,0x00,0x28,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x28,0xd6,0x01,0x05,0x0e,0x04 -# GFX11: v_cvt_f64_f32_e64 v[5:6], ttmp15 ; encoding: [0x05,0x00,0x90,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0x90,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_div_fixup_f64 v[5:6], v[254:255], v[254:255], s[6:7] ; encoding: [0x05,0x00,0x28,0xd6,0xfe,0xfd,0x1b,0x00] +0x05,0x00,0x28,0xd6,0xfe,0xfd,0x1b,0x00 -# GFX11: v_cvt_f64_f32_e64 v[5:6], m0 ; encoding: [0x05,0x00,0x90,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0x90,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_div_fixup_f64 v[5:6], s[2:3], s[4:5], v[254:255] ; encoding: [0x05,0x00,0x28,0xd6,0x02,0x08,0xf8,0x07] +0x05,0x00,0x28,0xd6,0x02,0x08,0xf8,0x07 -# GFX11: v_cvt_f64_f32_e64 v[5:6], exec_lo ; encoding: [0x05,0x00,0x90,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x90,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_div_fixup_f64 v[5:6], -|s[104:105]|, s[104:105], -|s[104:105]| ; encoding: [0x05,0x05,0x28,0xd6,0x68,0xd0,0xa0,0xa1] +0x05,0x05,0x28,0xd6,0x68,0xd0,0xa0,0xa1 -# GFX11: v_cvt_f64_f32_e64 v[5:6], exec_hi ; encoding: [0x05,0x00,0x90,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0x90,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_div_fixup_f64 v[5:6], vcc, -|ttmp[14:15]|, -|ttmp[14:15]| ; encoding: [0x05,0x06,0x28,0xd6,0x6a,0xf4,0xe8,0xc1] +0x05,0x06,0x28,0xd6,0x6a,0xf4,0xe8,0xc1 -# GFX11: v_cvt_f64_f32_e64 v[5:6], null ; encoding: [0x05,0x00,0x90,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x90,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_div_fixup_f64 v[5:6], -|ttmp[14:15]|, 0xaf123456, null ; encoding: [0x05,0x01,0x28,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] +0x05,0x01,0x28,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f64_f32_e64 v[5:6], -1 ; encoding: [0x05,0x00,0x90,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x90,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_div_fixup_f64 v[5:6], -|exec|, -|src_scc|, -|exec| ; encoding: [0x05,0x07,0x28,0xd6,0x7e,0xfa,0xf9,0xe1] +0x05,0x07,0x28,0xd6,0x7e,0xfa,0xf9,0xe1 -# GFX11: v_cvt_f64_f32_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0x90,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0x90,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_div_fixup_f64 v[5:6], null, 0.5, vcc ; encoding: [0x05,0x00,0x28,0xd6,0x7c,0xe0,0xa9,0x01] +0x05,0x00,0x28,0xd6,0x7c,0xe0,0xa9,0x01 -# GFX11: v_cvt_f64_f32_e64 v[5:6], src_scc mul:4 ; encoding: [0x05,0x00,0x90,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0x90,0xd5,0xfd,0x00,0x00,0x10 +# GFX11: v_div_fixup_f64 v[5:6], -1, -1, 0xaf123456 ; encoding: [0x05,0x00,0x28,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x28,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f64_f32_e64 v[254:255], -|0xaf123456| clamp div:2 ; encoding: [0xfe,0x81,0x90,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] -0xfe,0x81,0x90,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf +# GFX11: v_div_fixup_f64 v[5:6], 0.5, null, -|src_scc| mul:2 ; encoding: [0x05,0x04,0x28,0xd6,0xf0,0xf8,0xf4,0x8b] +0x05,0x04,0x28,0xd6,0xf0,0xf8,0xf4,0x8b -# GFX11: v_cvt_f64_i32_e64 v[5:6], v1 ; encoding: [0x05,0x00,0x84,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x84,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_div_fixup_f64 v[5:6], -|src_scc|, -|exec|, 0.5 mul:4 ; encoding: [0x05,0x03,0x28,0xd6,0xfd,0xfc,0xc0,0x73] +0x05,0x03,0x28,0xd6,0xfd,0xfc,0xc0,0x73 -# GFX11: v_cvt_f64_i32_e64 v[5:6], v255 ; encoding: [0x05,0x00,0x84,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0x84,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_div_fixup_f64 v[254:255], 0xaf123456, -|vcc|, -1 clamp div:2 ; encoding: [0xfe,0x82,0x28,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] +0xfe,0x82,0x28,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f64_i32_e64 v[5:6], s1 ; encoding: [0x05,0x00,0x84,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0x84,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_div_fmas_f32 v5, v1, 0xaf123456, 0xaf123456 ; encoding: [0x05,0x00,0x37,0xd6,0x01,0xff,0xfd,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x37,0xd6,0x01,0xff,0xfd,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f64_i32_e64 v[5:6], s105 ; encoding: [0x05,0x00,0x84,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0x84,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_div_fmas_f32 v5, v255, src_scc, src_scc ; encoding: [0x05,0x00,0x37,0xd6,0xff,0xfb,0xf5,0x03] +0x05,0x00,0x37,0xd6,0xff,0xfb,0xf5,0x03 -# GFX11: v_cvt_f64_i32_e64 v[5:6], vcc_lo ; encoding: [0x05,0x00,0x84,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x84,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_div_fmas_f32 v5, s105, s105, s105 ; encoding: [0x05,0x00,0x37,0xd6,0x69,0xd2,0xa4,0x01] +0x05,0x00,0x37,0xd6,0x69,0xd2,0xa4,0x01 -# GFX11: v_cvt_f64_i32_e64 v[5:6], vcc_hi ; encoding: [0x05,0x00,0x84,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0x84,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_div_fmas_f32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x37,0xd6,0x6a,0x04,0x0e,0x04] +0x05,0x00,0x37,0xd6,0x6a,0x04,0x0e,0x04 -# GFX11: v_cvt_f64_i32_e64 v[5:6], ttmp15 ; encoding: [0x05,0x00,0x84,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0x84,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_div_fmas_f32 v5, vcc_hi, v255, vcc_hi ; encoding: [0x05,0x00,0x37,0xd6,0x6b,0xfe,0xaf,0x01] +0x05,0x00,0x37,0xd6,0x6b,0xfe,0xaf,0x01 -# GFX11: v_cvt_f64_i32_e64 v[5:6], m0 ; encoding: [0x05,0x00,0x84,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0x84,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_div_fmas_f32 v5, -|ttmp15|, -|ttmp15|, ttmp15 ; encoding: [0x05,0x03,0x37,0xd6,0x7b,0xf6,0xec,0x61] +0x05,0x03,0x37,0xd6,0x7b,0xf6,0xec,0x61 -# GFX11: v_cvt_f64_i32_e64 v[5:6], exec_lo ; encoding: [0x05,0x00,0x84,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x84,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_div_fmas_f32 v5, m0, 0.5, v255 ; encoding: [0x05,0x00,0x37,0xd6,0x7d,0xe0,0xfd,0x07] +0x05,0x00,0x37,0xd6,0x7d,0xe0,0xfd,0x07 -# GFX11: v_cvt_f64_i32_e64 v[5:6], exec_hi ; encoding: [0x05,0x00,0x84,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0x84,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_div_fmas_f32 v5, -|exec_lo|, exec_lo, -|exec_lo| ; encoding: [0x05,0x05,0x37,0xd6,0x7e,0xfc,0xf8,0xa1] +0x05,0x05,0x37,0xd6,0x7e,0xfc,0xf8,0xa1 -# GFX11: v_cvt_f64_i32_e64 v[5:6], null ; encoding: [0x05,0x00,0x84,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x84,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_div_fmas_f32 v5, -|exec_hi|, -|exec_hi|, -|exec_hi| ; encoding: [0x05,0x07,0x37,0xd6,0x7f,0xfe,0xfc,0xe1] +0x05,0x07,0x37,0xd6,0x7f,0xfe,0xfc,0xe1 -# GFX11: v_cvt_f64_i32_e64 v[5:6], -1 ; encoding: [0x05,0x00,0x84,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x84,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_div_fmas_f32 v5, null, m0, -|m0| ; encoding: [0x05,0x04,0x37,0xd6,0x7c,0xfa,0xf4,0x81] +0x05,0x04,0x37,0xd6,0x7c,0xfa,0xf4,0x81 -# GFX11: v_cvt_f64_i32_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0x84,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0x84,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_div_fmas_f32 v5, -1, -|vcc_lo|, -|vcc_lo| ; encoding: [0x05,0x06,0x37,0xd6,0xc1,0xd4,0xa8,0xc1] +0x05,0x06,0x37,0xd6,0xc1,0xd4,0xa8,0xc1 -# GFX11: v_cvt_f64_i32_e64 v[5:6], src_scc mul:4 ; encoding: [0x05,0x00,0x84,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0x84,0xd5,0xfd,0x00,0x00,0x10 +# GFX11: v_div_fmas_f32 v5, 0.5, -|vcc_hi|, 0.5 mul:2 ; encoding: [0x05,0x02,0x37,0xd6,0xf0,0xd6,0xc0,0x4b] +0x05,0x02,0x37,0xd6,0xf0,0xd6,0xc0,0x4b -# GFX11: v_cvt_f64_i32_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0x84,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] -0xfe,0x80,0x84,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf +# GFX11: v_div_fmas_f32 v5, src_scc, -1, -1 mul:4 ; encoding: [0x05,0x00,0x37,0xd6,0xfd,0x82,0x05,0x13] +0x05,0x00,0x37,0xd6,0xfd,0x82,0x05,0x13 -# GFX11: v_cvt_f64_u32_e64 v[5:6], v1 ; encoding: [0x05,0x00,0x96,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x96,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_div_fmas_f32 v255, -|0xaf123456|, null, null clamp div:2 ; encoding: [0xff,0x81,0x37,0xd6,0xff,0xf8,0xf0,0x39,0x56,0x34,0x12,0xaf] +0xff,0x81,0x37,0xd6,0xff,0xf8,0xf0,0x39,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f64_u32_e64 v[5:6], v255 ; encoding: [0x05,0x00,0x96,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0x96,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_div_fmas_f64 v[5:6], v[1:2], 0xaf123456, 0xaf123456 ; encoding: [0x05,0x00,0x38,0xd6,0x01,0xff,0xfd,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x38,0xd6,0x01,0xff,0xfd,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f64_u32_e64 v[5:6], s1 ; encoding: [0x05,0x00,0x96,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0x96,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_div_fmas_f64 v[5:6], v[254:255], src_scc, v[3:4] ; encoding: [0x05,0x00,0x38,0xd6,0xfe,0xfb,0x0d,0x04] +0x05,0x00,0x38,0xd6,0xfe,0xfb,0x0d,0x04 -# GFX11: v_cvt_f64_u32_e64 v[5:6], s105 ; encoding: [0x05,0x00,0x96,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0x96,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_div_fmas_f64 v[5:6], s[104:105], |s[104:105]|, s[104:105] ; encoding: [0x05,0x02,0x38,0xd6,0x68,0xd0,0xa0,0x01] +0x05,0x02,0x38,0xd6,0x68,0xd0,0xa0,0x01 -# GFX11: v_cvt_f64_u32_e64 v[5:6], vcc_lo ; encoding: [0x05,0x00,0x96,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x96,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_div_fmas_f64 v[5:6], -|vcc|, v[2:3], -|v[254:255]| ; encoding: [0x05,0x05,0x38,0xd6,0x6a,0x04,0xfa,0xa7] +0x05,0x05,0x38,0xd6,0x6a,0x04,0xfa,0xa7 -# GFX11: v_cvt_f64_u32_e64 v[5:6], vcc_hi ; encoding: [0x05,0x00,0x96,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0x96,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_div_fmas_f64 v[5:6], -|ttmp[14:15]|, -|ttmp[14:15]|, -|ttmp[14:15]| ; encoding: [0x05,0x07,0x38,0xd6,0x7a,0xf4,0xe8,0xe1] +0x05,0x07,0x38,0xd6,0x7a,0xf4,0xe8,0xe1 -# GFX11: v_cvt_f64_u32_e64 v[5:6], ttmp15 ; encoding: [0x05,0x00,0x96,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0x96,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_div_fmas_f64 v[5:6], -|exec|, -|v[254:255]|, null ; encoding: [0x05,0x03,0x38,0xd6,0x7e,0xfc,0xf3,0x61] +0x05,0x03,0x38,0xd6,0x7e,0xfc,0xf3,0x61 -# GFX11: v_cvt_f64_u32_e64 v[5:6], m0 ; encoding: [0x05,0x00,0x96,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0x96,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_div_fmas_f64 v[5:6], null, 0.5, -src_scc ; encoding: [0x05,0x00,0x38,0xd6,0x7c,0xe0,0xf5,0x83] +0x05,0x00,0x38,0xd6,0x7c,0xe0,0xf5,0x83 -# GFX11: v_cvt_f64_u32_e64 v[5:6], exec_lo ; encoding: [0x05,0x00,0x96,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x96,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_div_fmas_f64 v[5:6], -1, -exec, |exec| ; encoding: [0x05,0x04,0x38,0xd6,0xc1,0xfc,0xf8,0x41] +0x05,0x04,0x38,0xd6,0xc1,0xfc,0xf8,0x41 -# GFX11: v_cvt_f64_u32_e64 v[5:6], exec_hi ; encoding: [0x05,0x00,0x96,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0x96,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_div_fmas_f64 v[5:6], 0.5, -|vcc|, -|vcc| mul:2 ; encoding: [0x05,0x06,0x38,0xd6,0xf0,0xd4,0xa8,0xc9] +0x05,0x06,0x38,0xd6,0xf0,0xd4,0xa8,0xc9 -# GFX11: v_cvt_f64_u32_e64 v[5:6], null ; encoding: [0x05,0x00,0x96,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x96,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_div_fmas_f64 v[5:6], -|src_scc|, -1, 0.5 mul:4 ; encoding: [0x05,0x01,0x38,0xd6,0xfd,0x82,0xc1,0x33] +0x05,0x01,0x38,0xd6,0xfd,0x82,0xc1,0x33 -# GFX11: v_cvt_f64_u32_e64 v[5:6], -1 ; encoding: [0x05,0x00,0x96,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x96,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_div_fmas_f64 v[254:255], 0xaf123456, null, -1 clamp div:2 ; encoding: [0xfe,0x80,0x38,0xd6,0xff,0xf8,0x04,0x1b,0x56,0x34,0x12,0xaf] +0xfe,0x80,0x38,0xd6,0xff,0xf8,0x04,0x1b,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_f64_u32_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0x96,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0x96,0xd5,0xf0,0x00,0x00,0x08 +# W32: v_div_scale_f32 v5, vcc_lo, v1, v2, s3 ; encoding: [0x05,0x6a,0xfc,0xd6,0x01,0x05,0x0e,0x00] +# W64: v_div_scale_f32 v5, vcc, v1, v2, s3 ; encoding: [0x05,0x6a,0xfc,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x6a,0xfc,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_cvt_f64_u32_e64 v[5:6], src_scc mul:4 ; encoding: [0x05,0x00,0x96,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0x96,0xd5,0xfd,0x00,0x00,0x10 +# W32: v_div_scale_f32 v5, vcc_lo, v255, s2, s105 ; encoding: [0x05,0x6a,0xfc,0xd6,0xff,0x05,0xa4,0x01] +# W64: v_div_scale_f32 v5, vcc, v255, s2, s105 ; encoding: [0x05,0x6a,0xfc,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x6a,0xfc,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_cvt_f64_u32_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0x96,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] -0xfe,0x80,0x96,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf +# W32: v_div_scale_f32 v5, vcc_lo, s1, v255, exec_hi ; encoding: [0x05,0x6a,0xfc,0xd6,0x01,0xfe,0xff,0x01] +# W64: v_div_scale_f32 v5, vcc, s1, v255, exec_hi ; encoding: [0x05,0x6a,0xfc,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x6a,0xfc,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_cvt_floor_i32_f32_e64 v5, v1 ; encoding: [0x05,0x00,0x8d,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x8d,0xd5,0x01,0x01,0x00,0x00 +# W32: v_div_scale_f32 v5, vcc_lo, s105, s105, exec_lo ; encoding: [0x05,0x6a,0xfc,0xd6,0x69,0xd2,0xf8,0x01] +# W64: v_div_scale_f32 v5, vcc, s105, s105, exec_lo ; encoding: [0x05,0x6a,0xfc,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x6a,0xfc,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_cvt_floor_i32_f32_e64 v5, v255 ; encoding: [0x05,0x00,0x8d,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0x8d,0xd5,0xff,0x01,0x00,0x00 +# W32: v_div_scale_f32 v5, vcc_lo, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x6a,0xfc,0xd6,0x6a,0xf6,0x0c,0x04] +# W64: v_div_scale_f32 v5, vcc, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x6a,0xfc,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x6a,0xfc,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_cvt_floor_i32_f32_e64 v5, s1 ; encoding: [0x05,0x00,0x8d,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0x8d,0xd5,0x01,0x00,0x00,0x00 +# W32: v_div_scale_f32 v5, vcc_lo, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x6a,0xfc,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +# W64: v_div_scale_f32 v5, vcc, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x6a,0xfc,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x6a,0xfc,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_floor_i32_f32_e64 v5, s105 ; encoding: [0x05,0x00,0x8d,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0x8d,0xd5,0x69,0x00,0x00,0x00 +# W32: v_div_scale_f32 v5, vcc_lo, -ttmp15, -src_scc, -ttmp15 ; encoding: [0x05,0x6a,0xfc,0xd6,0x7b,0xfa,0xed,0xe1] +# W64: v_div_scale_f32 v5, vcc, -ttmp15, -src_scc, -ttmp15 ; encoding: [0x05,0x6a,0xfc,0xd6,0x7b,0xfa,0xed,0xe1] +0x05,0x6a,0xfc,0xd6,0x7b,0xfa,0xed,0xe1 -# GFX11: v_cvt_floor_i32_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x8d,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x8d,0xd5,0x6a,0x00,0x00,0x00 +# W32: v_div_scale_f32 v5, vcc_lo, m0, 0.5, m0 ; encoding: [0x05,0x6a,0xfc,0xd6,0x7d,0xe0,0xf5,0x01] +# W64: v_div_scale_f32 v5, vcc, m0, 0.5, m0 ; encoding: [0x05,0x6a,0xfc,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x6a,0xfc,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_cvt_floor_i32_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x8d,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0x8d,0xd5,0x6b,0x00,0x00,0x00 +# W32: v_div_scale_f32 v5, vcc_lo, exec_lo, -1, vcc_hi ; encoding: [0x05,0x6a,0xfc,0xd6,0x7e,0x82,0xad,0x01] +# W64: v_div_scale_f32 v5, vcc, exec_lo, -1, vcc_hi ; encoding: [0x05,0x6a,0xfc,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x6a,0xfc,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_cvt_floor_i32_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x8d,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0x8d,0xd5,0x7b,0x00,0x00,0x00 +# W32: v_div_scale_f32 v5, vcc_lo, -exec_hi, null, -vcc_lo ; encoding: [0x05,0x6a,0xfc,0xd6,0x7f,0xf8,0xa8,0xa1] +# W64: v_div_scale_f32 v5, vcc, -exec_hi, null, -vcc_lo ; encoding: [0x05,0x6a,0xfc,0xd6,0x7f,0xf8,0xa8,0xa1] +0x05,0x6a,0xfc,0xd6,0x7f,0xf8,0xa8,0xa1 -# GFX11: v_cvt_floor_i32_f32_e64 v5, m0 ; encoding: [0x05,0x00,0x8d,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0x8d,0xd5,0x7d,0x00,0x00,0x00 +# W32: v_div_scale_f32 v5, vcc_lo, null, exec_lo, neg(0xaf123456) ; encoding: [0x05,0x6a,0xfc,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] +# W64: v_div_scale_f32 v5, vcc, null, exec_lo, neg(0xaf123456) ; encoding: [0x05,0x6a,0xfc,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] +0x05,0x6a,0xfc,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_floor_i32_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x8d,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x8d,0xd5,0x7e,0x00,0x00,0x00 +# W32: v_div_scale_f32 v5, vcc_lo, -1, -exec_hi, -src_scc ; encoding: [0x05,0x6a,0xfc,0xd6,0xc1,0xfe,0xf4,0xc3] +# W64: v_div_scale_f32 v5, vcc, -1, -exec_hi, -src_scc ; encoding: [0x05,0x6a,0xfc,0xd6,0xc1,0xfe,0xf4,0xc3] +0x05,0x6a,0xfc,0xd6,0xc1,0xfe,0xf4,0xc3 -# GFX11: v_cvt_floor_i32_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x8d,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0x8d,0xd5,0x7f,0x00,0x00,0x00 +# W32: v_div_scale_f32 v5, vcc_lo, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x6a,0xfc,0xd6,0xf0,0xfa,0xc0,0x4b] +# W64: v_div_scale_f32 v5, vcc, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x6a,0xfc,0xd6,0xf0,0xfa,0xc0,0x4b] +0x05,0x6a,0xfc,0xd6,0xf0,0xfa,0xc0,0x4b -# GFX11: v_cvt_floor_i32_f32_e64 v5, null ; encoding: [0x05,0x00,0x8d,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x8d,0xd5,0x7c,0x00,0x00,0x00 +# W32: v_div_scale_f32 v5, vcc_lo, -src_scc, vcc_lo, -1 mul:4 ; encoding: [0x05,0x6a,0xfc,0xd6,0xfd,0xd4,0x04,0x33] +# W64: v_div_scale_f32 v5, vcc, -src_scc, vcc_lo, -1 mul:4 ; encoding: [0x05,0x6a,0xfc,0xd6,0xfd,0xd4,0x04,0x33] +0x05,0x6a,0xfc,0xd6,0xfd,0xd4,0x04,0x33 -# GFX11: v_cvt_floor_i32_f32_e64 v5, -1 ; encoding: [0x05,0x00,0x8d,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x8d,0xd5,0xc1,0x00,0x00,0x00 +# W32: v_div_scale_f32 v255, vcc_lo, neg(0xaf123456), -vcc_hi, null clamp div:2 ; encoding: [0xff,0xea,0xfc,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] +# W64: v_div_scale_f32 v255, vcc, neg(0xaf123456), -vcc_hi, null clamp div:2 ; encoding: [0xff,0xea,0xfc,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] +0xff,0xea,0xfc,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_floor_i32_f32_e64 v5, 0.5 ; encoding: [0x05,0x00,0x8d,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0x8d,0xd5,0xf0,0x00,0x00,0x00 +# W32: v_div_scale_f64 v[5:6], vcc_lo, v[1:2], v[2:3], v[3:4] ; encoding: [0x05,0x6a,0xfd,0xd6,0x01,0x05,0x0e,0x04] +# W64: v_div_scale_f64 v[5:6], vcc, v[1:2], v[2:3], v[3:4] ; encoding: [0x05,0x6a,0xfd,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x6a,0xfd,0xd6,0x01,0x05,0x0e,0x04 -# GFX11: v_cvt_floor_i32_f32_e64 v5, src_scc ; encoding: [0x05,0x00,0x8d,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0x8d,0xd5,0xfd,0x00,0x00,0x00 +# W32: v_div_scale_f64 v[5:6], vcc_lo, v[254:255], v[254:255], s[6:7] ; encoding: [0x05,0x6a,0xfd,0xd6,0xfe,0xfd,0x1b,0x00] +# W64: v_div_scale_f64 v[5:6], vcc, v[254:255], v[254:255], s[6:7] ; encoding: [0x05,0x6a,0xfd,0xd6,0xfe,0xfd,0x1b,0x00] +0x05,0x6a,0xfd,0xd6,0xfe,0xfd,0x1b,0x00 -# GFX11: v_cvt_floor_i32_f32_e64 v255, -|0xaf123456| ; encoding: [0xff,0x01,0x8d,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] -0xff,0x01,0x8d,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf +# W32: v_div_scale_f64 v[5:6], vcc_lo, s[2:3], s[4:5], v[254:255] ; encoding: [0x05,0x6a,0xfd,0xd6,0x02,0x08,0xf8,0x07] +# W64: v_div_scale_f64 v[5:6], vcc, s[2:3], s[4:5], v[254:255] ; encoding: [0x05,0x6a,0xfd,0xd6,0x02,0x08,0xf8,0x07] +0x05,0x6a,0xfd,0xd6,0x02,0x08,0xf8,0x07 -# GFX11: v_cvt_i16_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xd3,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xd3,0xd5,0x01,0x01,0x00,0x00 +# W32: v_div_scale_f64 v[5:6], vcc_lo, -s[104:105], s[104:105], -s[104:105] ; encoding: [0x05,0x6a,0xfd,0xd6,0x68,0xd0,0xa0,0xa1] +# W64: v_div_scale_f64 v[5:6], vcc, -s[104:105], s[104:105], -s[104:105] ; encoding: [0x05,0x6a,0xfd,0xd6,0x68,0xd0,0xa0,0xa1] +0x05,0x6a,0xfd,0xd6,0x68,0xd0,0xa0,0xa1 -# GFX11: v_cvt_i16_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xd3,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xd3,0xd5,0xff,0x01,0x00,0x00 +# W32: v_div_scale_f64 v[5:6], vcc_lo, vcc, -ttmp[14:15], -ttmp[14:15] ; encoding: [0x05,0x6a,0xfd,0xd6,0x6a,0xf4,0xe8,0xc1] +# W64: v_div_scale_f64 v[5:6], vcc, vcc, -ttmp[14:15], -ttmp[14:15] ; encoding: [0x05,0x6a,0xfd,0xd6,0x6a,0xf4,0xe8,0xc1] +0x05,0x6a,0xfd,0xd6,0x6a,0xf4,0xe8,0xc1 -# GFX11: v_cvt_i16_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xd3,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xd3,0xd5,0x01,0x00,0x00,0x00 +# W32: v_div_scale_f64 v[5:6], vcc_lo, -ttmp[14:15], 0xaf123456, null ; encoding: [0x05,0x6a,0xfd,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] +# W64: v_div_scale_f64 v[5:6], vcc, -ttmp[14:15], 0xaf123456, null ; encoding: [0x05,0x6a,0xfd,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] +0x05,0x6a,0xfd,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_i16_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xd3,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xd3,0xd5,0x69,0x00,0x00,0x00 +# W32: v_div_scale_f64 v[5:6], vcc_lo, -exec, -src_scc, -exec ; encoding: [0x05,0x6a,0xfd,0xd6,0x7e,0xfa,0xf9,0xe1] +# W64: v_div_scale_f64 v[5:6], vcc, -exec, -src_scc, -exec ; encoding: [0x05,0x6a,0xfd,0xd6,0x7e,0xfa,0xf9,0xe1] +0x05,0x6a,0xfd,0xd6,0x7e,0xfa,0xf9,0xe1 -# GFX11: v_cvt_i16_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd3,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xd3,0xd5,0x6a,0x00,0x00,0x00 +# W32: v_div_scale_f64 v[5:6], vcc_lo, null, 0.5, vcc ; encoding: [0x05,0x6a,0xfd,0xd6,0x7c,0xe0,0xa9,0x01] +# W64: v_div_scale_f64 v[5:6], vcc, null, 0.5, vcc ; encoding: [0x05,0x6a,0xfd,0xd6,0x7c,0xe0,0xa9,0x01] +0x05,0x6a,0xfd,0xd6,0x7c,0xe0,0xa9,0x01 -# GFX11: v_cvt_i16_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd3,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xd3,0xd5,0x6b,0x00,0x00,0x00 +# W32: v_div_scale_f64 v[5:6], vcc_lo, -1, -1, 0xaf123456 ; encoding: [0x05,0x6a,0xfd,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] +# W64: v_div_scale_f64 v[5:6], vcc, -1, -1, 0xaf123456 ; encoding: [0x05,0x6a,0xfd,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] +0x05,0x6a,0xfd,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_i16_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd3,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xd3,0xd5,0x7b,0x00,0x00,0x00 +# W32: v_div_scale_f64 v[5:6], vcc_lo, 0.5, null, -src_scc mul:2 ; encoding: [0x05,0x6a,0xfd,0xd6,0xf0,0xf8,0xf4,0x8b] +# W64: v_div_scale_f64 v[5:6], vcc, 0.5, null, -src_scc mul:2 ; encoding: [0x05,0x6a,0xfd,0xd6,0xf0,0xf8,0xf4,0x8b] +0x05,0x6a,0xfd,0xd6,0xf0,0xf8,0xf4,0x8b -# GFX11: v_cvt_i16_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xd3,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xd3,0xd5,0x7d,0x00,0x00,0x00 +# W32: v_div_scale_f64 v[5:6], vcc_lo, -src_scc, -exec, 0.5 mul:4 ; encoding: [0x05,0x6a,0xfd,0xd6,0xfd,0xfc,0xc0,0x73] +# W64: v_div_scale_f64 v[5:6], vcc, -src_scc, -exec, 0.5 mul:4 ; encoding: [0x05,0x6a,0xfd,0xd6,0xfd,0xfc,0xc0,0x73] +0x05,0x6a,0xfd,0xd6,0xfd,0xfc,0xc0,0x73 -# GFX11: v_cvt_i16_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd3,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xd3,0xd5,0x7e,0x00,0x00,0x00 +# W32: v_div_scale_f64 v[254:255], vcc_lo, 0xaf123456, -vcc, -1 clamp div:2 ; encoding: [0xfe,0xea,0xfd,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] +# W64: v_div_scale_f64 v[254:255], vcc, 0xaf123456, -vcc, -1 clamp div:2 ; encoding: [0xfe,0xea,0xfd,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] +0xfe,0xea,0xfd,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_i16_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd3,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xd3,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_dot2_bf16_bf16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x67,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_cvt_i16_f16_e64 v5, null ; encoding: [0x05,0x00,0xd3,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xd3,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_dot2_bf16_bf16 v5, v255, v255, s105 ; encoding: [0x05,0x00,0x67,0xd6,0xff,0xff,0xa7,0x01] +0x05,0x00,0x67,0xd6,0xff,0xff,0xa7,0x01 -# GFX11: v_cvt_i16_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xd3,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xd3,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_dot2_bf16_bf16 v5, s1, s2, v3 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x04,0x0c,0x04] +0x05,0x00,0x67,0xd6,0x01,0x04,0x0c,0x04 -# GFX11: v_cvt_i16_f16_e64 v5, 0.5 ; encoding: [0x05,0x00,0xd3,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0xd3,0xd5,0xf0,0x00,0x00,0x00 +# GFX11: v_dot2_bf16_bf16 v5, s105, s105, m0 ; encoding: [0x05,0x00,0x67,0xd6,0x69,0xd2,0xf4,0x01] +0x05,0x00,0x67,0xd6,0x69,0xd2,0xf4,0x01 -# GFX11: v_cvt_i16_f16_e64 v5, src_scc ; encoding: [0x05,0x00,0xd3,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0xd3,0xd5,0xfd,0x00,0x00,0x00 +# GFX11: v_dot2_bf16_bf16 v5, vcc_lo, ttmp15, v255 ; encoding: [0x05,0x00,0x67,0xd6,0x6a,0xf6,0xfc,0x07] +0x05,0x00,0x67,0xd6,0x6a,0xf6,0xfc,0x07 -# GFX11: v_cvt_i16_f16_e64 v255, -|0xfe0b| clamp ; encoding: [0xff,0x81,0xd3,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] -0xff,0x81,0xd3,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00 +# GFX11: v_dot2_bf16_bf16 v5, vcc_hi, 0xfe0b, vcc_hi ; encoding: [0x05,0x00,0x67,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x67,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_i32_f32_e64 v5, v1 ; encoding: [0x05,0x00,0x88,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x88,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_dot2_bf16_bf16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x67,0xd6,0x7b,0xfa,0xed,0x01] +0x05,0x00,0x67,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_cvt_i32_f32_e64 v5, v255 ; encoding: [0x05,0x00,0x88,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0x88,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_dot2_bf16_bf16 v5, |m0|, -1, -vcc_lo ; encoding: [0x05,0x01,0x67,0xd6,0x7d,0x82,0xa9,0x81] +0x05,0x01,0x67,0xd6,0x7d,0x82,0xa9,0x81 -# GFX11: v_cvt_i32_f32_e64 v5, s1 ; encoding: [0x05,0x00,0x88,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0x88,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_dot2_bf16_bf16 v5, -|exec_lo|, null, -|0xfe0b| ; encoding: [0x05,0x05,0x67,0xd6,0x7e,0xf8,0xfc,0xa3,0x0b,0xfe,0x00,0x00] +0x05,0x05,0x67,0xd6,0x7e,0xf8,0xfc,0xa3,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_i32_f32_e64 v5, s105 ; encoding: [0x05,0x00,0x88,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0x88,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_dot2_bf16_bf16 v5, -|exec_hi|, -|exec_lo|, -|exec_lo| ; encoding: [0x05,0x07,0x67,0xd6,0x7f,0xfc,0xf8,0xe1] +0x05,0x07,0x67,0xd6,0x7f,0xfc,0xf8,0xe1 -# GFX11: v_cvt_i32_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x88,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x88,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_dot2_bf16_bf16 v5, null, -exec_hi, |src_scc| ; encoding: [0x05,0x04,0x67,0xd6,0x7c,0xfe,0xf4,0x43] +0x05,0x04,0x67,0xd6,0x7c,0xfe,0xf4,0x43 -# GFX11: v_cvt_i32_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x88,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0x88,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_dot2_bf16_bf16 v5, -1, -|m0|, -|exec_hi| ; encoding: [0x05,0x06,0x67,0xd6,0xc1,0xfa,0xfc,0xc1] +0x05,0x06,0x67,0xd6,0xc1,0xfa,0xfc,0xc1 -# GFX11: v_cvt_i32_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x88,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0x88,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_dot2_bf16_bf16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x67,0xd6,0xfd,0xd4,0x04,0x23] +0x05,0x22,0x67,0xd6,0xfd,0xd4,0x04,0x23 -# GFX11: v_cvt_i32_f32_e64 v5, m0 ; encoding: [0x05,0x00,0x88,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0x88,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_i32_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x88,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x88,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_dot2_f16_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_cvt_i32_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x88,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0x88,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_dot2_f16_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x66,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x66,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_cvt_i32_f32_e64 v5, null ; encoding: [0x05,0x00,0x88,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x88,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_dot2_f16_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x66,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x66,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_cvt_i32_f32_e64 v5, -1 ; encoding: [0x05,0x00,0x88,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x88,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_dot2_f16_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x66,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x66,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_cvt_i32_f32_e64 v5, 0.5 ; encoding: [0x05,0x00,0x88,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0x88,0xd5,0xf0,0x00,0x00,0x00 +# GFX11: v_dot2_f16_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x66,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x66,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_cvt_i32_f32_e64 v5, src_scc ; encoding: [0x05,0x00,0x88,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0x88,0xd5,0xfd,0x00,0x00,0x00 +# GFX11: v_dot2_f16_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_i32_f32_e64 v255, -|0xaf123456| clamp ; encoding: [0xff,0x81,0x88,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] -0xff,0x81,0x88,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf +# GFX11: v_dot2_f16_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x66,0xd6,0x7b,0xfa,0xed,0xe1] +0x05,0x07,0x66,0xd6,0x7b,0xfa,0xed,0xe1 -# GFX11: v_cvt_i32_f64_e64 v5, v[1:2] ; encoding: [0x05,0x00,0x83,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x83,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_dot2_f16_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x66,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x66,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_cvt_i32_f64_e64 v5, v[254:255] ; encoding: [0x05,0x00,0x83,0xd5,0xfe,0x01,0x00,0x00] -0x05,0x00,0x83,0xd5,0xfe,0x01,0x00,0x00 +# GFX11: v_dot2_f16_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x66,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x01,0x66,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_cvt_i32_f64_e64 v5, s[2:3] ; encoding: [0x05,0x00,0x83,0xd5,0x02,0x00,0x00,0x00] -0x05,0x00,0x83,0xd5,0x02,0x00,0x00,0x00 +# GFX11: v_dot2_f16_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x66,0xd6,0x7f,0xf8,0xa8,0xa1] +0x05,0x05,0x66,0xd6,0x7f,0xf8,0xa8,0xa1 -# GFX11: v_cvt_i32_f64_e64 v5, s[104:105] ; encoding: [0x05,0x00,0x83,0xd5,0x68,0x00,0x00,0x00] -0x05,0x00,0x83,0xd5,0x68,0x00,0x00,0x00 +# GFX11: v_dot2_f16_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x66,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +0x05,0x04,0x66,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_i32_f64_e64 v5, vcc ; encoding: [0x05,0x00,0x83,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x83,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_dot2_f16_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x66,0xd6,0xc1,0xfe,0xf4,0xc3] +0x05,0x06,0x66,0xd6,0xc1,0xfe,0xf4,0xc3 -# GFX11: v_cvt_i32_f64_e64 v5, ttmp[14:15] ; encoding: [0x05,0x00,0x83,0xd5,0x7a,0x00,0x00,0x00] -0x05,0x00,0x83,0xd5,0x7a,0x00,0x00,0x00 +# GFX11: v_dot2_f16_f16 v5, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x66,0xd6,0xf0,0xfa,0xc0,0x43] +0x05,0x00,0x66,0xd6,0xf0,0xfa,0xc0,0x43 -# GFX11: v_cvt_i32_f64_e64 v5, exec ; encoding: [0x05,0x00,0x83,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x83,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_dot2_f16_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x66,0xd6,0xfd,0xd4,0x04,0x23] +0x05,0x22,0x66,0xd6,0xfd,0xd4,0x04,0x23 -# GFX11: v_cvt_i32_f64_e64 v5, null ; encoding: [0x05,0x00,0x83,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x83,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_dot2_f16_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_i32_f64_e64 v5, -1 ; encoding: [0x05,0x00,0x83,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x83,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_fma_dx9_zero_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_cvt_i32_f64_e64 v5, 0.5 ; encoding: [0x05,0x00,0x83,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0x83,0xd5,0xf0,0x00,0x00,0x00 +# GFX11: v_fma_dx9_zero_f32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x09,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x09,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_cvt_i32_f64_e64 v5, -|src_scc| ; encoding: [0x05,0x01,0x83,0xd5,0xfd,0x00,0x00,0x20] -0x05,0x01,0x83,0xd5,0xfd,0x00,0x00,0x20 +# GFX11: v_fma_dx9_zero_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x09,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x09,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_cvt_i32_f64_e64 v255, 0xaf123456 clamp ; encoding: [0xff,0x80,0x83,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x80,0x83,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf +# GFX11: v_fma_dx9_zero_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x09,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x09,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_cvt_i32_i16_e64 v5, v1 ; encoding: [0x05,0x00,0xea,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xea,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_fma_dx9_zero_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x09,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x09,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_cvt_i32_i16_e64 v5, v255 ; encoding: [0x05,0x00,0xea,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xea,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_fma_dx9_zero_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x09,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x09,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_i32_i16_e64 v5, s1 ; encoding: [0x05,0x00,0xea,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xea,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_fma_dx9_zero_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x09,0xd6,0x7b,0xfa,0xed,0xe1] +0x05,0x07,0x09,0xd6,0x7b,0xfa,0xed,0xe1 -# GFX11: v_cvt_i32_i16_e64 v5, s105 ; encoding: [0x05,0x00,0xea,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xea,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_fma_dx9_zero_f32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x09,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x09,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_cvt_i32_i16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xea,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xea,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_fma_dx9_zero_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x09,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x01,0x09,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_cvt_i32_i16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xea,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xea,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_fma_dx9_zero_f32 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x09,0xd6,0x7f,0xf8,0xa8,0xa1] +0x05,0x05,0x09,0xd6,0x7f,0xf8,0xa8,0xa1 -# GFX11: v_cvt_i32_i16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xea,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xea,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_fma_dx9_zero_f32 v5, null, exec_lo, -|0xaf123456| ; encoding: [0x05,0x04,0x09,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] +0x05,0x04,0x09,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_i32_i16_e64 v5, m0 ; encoding: [0x05,0x00,0xea,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xea,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_fma_dx9_zero_f32 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x09,0xd6,0xc1,0xfe,0xf4,0xc3] +0x05,0x06,0x09,0xd6,0xc1,0xfe,0xf4,0xc3 -# GFX11: v_cvt_i32_i16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xea,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xea,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_fma_dx9_zero_f32 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x09,0xd6,0xf0,0xfa,0xc0,0x4b] +0x05,0x00,0x09,0xd6,0xf0,0xfa,0xc0,0x4b -# GFX11: v_cvt_i32_i16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xea,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xea,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_fma_dx9_zero_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x09,0xd6,0xfd,0xd4,0x04,0x33] +0x05,0x02,0x09,0xd6,0xfd,0xd4,0x04,0x33 -# GFX11: v_cvt_i32_i16_e64 v5, null ; encoding: [0x05,0x00,0xea,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xea,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_fma_dx9_zero_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x09,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] +0xff,0x83,0x09,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_i32_i16_e64 v5, -1 ; encoding: [0x05,0x00,0xea,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xea,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_fma_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x48,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x48,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_cvt_i32_i16_e64 v5, 0x3800 -0x05,0x00,0xea,0xd5,0xf0,0x00,0x00,0x00 +# GFX11: v_fma_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x48,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x48,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_cvt_i32_i16_e64 v5, src_scc ; encoding: [0x05,0x00,0xea,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0xea,0xd5,0xfd,0x00,0x00,0x00 +# GFX11: v_fma_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x48,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x48,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_cvt_i32_i16_e64 v255, 0xfe0b ; encoding: [0xff,0x00,0xea,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] -0xff,0x00,0xea,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00 +# GFX11: v_fma_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x48,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x48,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_cvt_nearest_i32_f32_e64 v5, v1 ; encoding: [0x05,0x00,0x8c,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x8c,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_fma_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x48,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x48,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_cvt_nearest_i32_f32_e64 v5, v255 ; encoding: [0x05,0x00,0x8c,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0x8c,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_fma_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_nearest_i32_f32_e64 v5, s1 ; encoding: [0x05,0x00,0x8c,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0x8c,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_fma_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x48,0xd6,0x7b,0xfa,0xed,0xe1] +0x05,0x07,0x48,0xd6,0x7b,0xfa,0xed,0xe1 -# GFX11: v_cvt_nearest_i32_f32_e64 v5, s105 ; encoding: [0x05,0x00,0x8c,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0x8c,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_fma_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x48,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x48,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_cvt_nearest_i32_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x8c,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x8c,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_fma_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x48,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x01,0x48,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_cvt_nearest_i32_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x8c,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0x8c,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_fma_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x48,0xd6,0x7f,0xf8,0xa8,0xa1] +0x05,0x05,0x48,0xd6,0x7f,0xf8,0xa8,0xa1 -# GFX11: v_cvt_nearest_i32_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x8c,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0x8c,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_fma_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x48,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +0x05,0x7c,0x48,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_nearest_i32_f32_e64 v5, m0 ; encoding: [0x05,0x00,0x8c,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0x8c,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_fma_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x48,0xd6,0xc1,0xfe,0xf4,0xc3] +0x05,0x0e,0x48,0xd6,0xc1,0xfe,0xf4,0xc3 -# GFX11: v_cvt_nearest_i32_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x8c,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x8c,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_fma_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x48,0xd6,0xf0,0xfa,0xc0,0x43] +0x05,0x10,0x48,0xd6,0xf0,0xfa,0xc0,0x43 -# GFX11: v_cvt_nearest_i32_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x8c,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0x8c,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_fma_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x48,0xd6,0xfd,0xd4,0x04,0x23] +0x05,0x22,0x48,0xd6,0xfd,0xd4,0x04,0x23 -# GFX11: v_cvt_nearest_i32_f32_e64 v5, null ; encoding: [0x05,0x00,0x8c,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x8c,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 -# GFX11: v_cvt_nearest_i32_f32_e64 v5, -1 ; encoding: [0x05,0x00,0x8c,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x8c,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_fma_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_cvt_nearest_i32_f32_e64 v5, 0.5 ; encoding: [0x05,0x00,0x8c,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0x8c,0xd5,0xf0,0x00,0x00,0x00 +# GFX11: v_fma_f32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x13,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x13,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_cvt_nearest_i32_f32_e64 v5, src_scc ; encoding: [0x05,0x00,0x8c,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0x8c,0xd5,0xfd,0x00,0x00,0x00 +# GFX11: v_fma_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x13,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x13,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_cvt_nearest_i32_f32_e64 v255, -|0xaf123456| ; encoding: [0xff,0x01,0x8c,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] -0xff,0x01,0x8c,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf +# GFX11: v_fma_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x13,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x13,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_cvt_norm_i16_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xe3,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xe3,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_fma_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x13,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x13,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_cvt_norm_i16_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xe3,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xe3,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_fma_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x13,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x13,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_norm_i16_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xe3,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xe3,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_fma_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x13,0xd6,0x7b,0xfa,0xed,0xe1] +0x05,0x07,0x13,0xd6,0x7b,0xfa,0xed,0xe1 -# GFX11: v_cvt_norm_i16_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xe3,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xe3,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_fma_f32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x13,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x13,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_cvt_norm_i16_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xe3,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xe3,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_fma_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x13,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x01,0x13,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_cvt_norm_i16_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xe3,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xe3,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_fma_f32 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x13,0xd6,0x7f,0xf8,0xa8,0xa1] +0x05,0x05,0x13,0xd6,0x7f,0xf8,0xa8,0xa1 -# GFX11: v_cvt_norm_i16_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xe3,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xe3,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_fma_f32 v5, null, exec_lo, -|0xaf123456| ; encoding: [0x05,0x04,0x13,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] +0x05,0x04,0x13,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_norm_i16_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xe3,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xe3,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_fma_f32 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x13,0xd6,0xc1,0xfe,0xf4,0xc3] +0x05,0x06,0x13,0xd6,0xc1,0xfe,0xf4,0xc3 -# GFX11: v_cvt_norm_i16_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xe3,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xe3,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_fma_f32 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x13,0xd6,0xf0,0xfa,0xc0,0x4b] +0x05,0x00,0x13,0xd6,0xf0,0xfa,0xc0,0x4b -# GFX11: v_cvt_norm_i16_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xe3,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xe3,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_fma_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x13,0xd6,0xfd,0xd4,0x04,0x33] +0x05,0x02,0x13,0xd6,0xfd,0xd4,0x04,0x33 -# GFX11: v_cvt_norm_i16_f16_e64 v5, null ; encoding: [0x05,0x00,0xe3,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xe3,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_fma_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x13,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] +0xff,0x83,0x13,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_norm_i16_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xe3,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xe3,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_fma_f64 v[5:6], v[1:2], v[2:3], v[3:4] ; encoding: [0x05,0x00,0x14,0xd6,0x01,0x05,0x0e,0x04] +0x05,0x00,0x14,0xd6,0x01,0x05,0x0e,0x04 -# GFX11: v_cvt_norm_i16_f16_e64 v5, 0.5 ; encoding: [0x05,0x00,0xe3,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0xe3,0xd5,0xf0,0x00,0x00,0x00 +# GFX11: v_fma_f64 v[5:6], v[254:255], v[254:255], s[6:7] ; encoding: [0x05,0x00,0x14,0xd6,0xfe,0xfd,0x1b,0x00] +0x05,0x00,0x14,0xd6,0xfe,0xfd,0x1b,0x00 -# GFX11: v_cvt_norm_i16_f16_e64 v5, src_scc ; encoding: [0x05,0x00,0xe3,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0xe3,0xd5,0xfd,0x00,0x00,0x00 +# GFX11: v_fma_f64 v[5:6], s[2:3], s[4:5], v[254:255] ; encoding: [0x05,0x00,0x14,0xd6,0x02,0x08,0xf8,0x07] +0x05,0x00,0x14,0xd6,0x02,0x08,0xf8,0x07 -# GFX11: v_cvt_norm_i16_f16_e64 v255, -|0xfe0b| ; encoding: [0xff,0x01,0xe3,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] -0xff,0x01,0xe3,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00 +# GFX11: v_fma_f64 v[5:6], -|s[104:105]|, s[104:105], -|s[104:105]| ; encoding: [0x05,0x05,0x14,0xd6,0x68,0xd0,0xa0,0xa1] +0x05,0x05,0x14,0xd6,0x68,0xd0,0xa0,0xa1 -# GFX11: v_cvt_norm_u16_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xe4,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xe4,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_fma_f64 v[5:6], vcc, -|ttmp[14:15]|, -|ttmp[14:15]| ; encoding: [0x05,0x06,0x14,0xd6,0x6a,0xf4,0xe8,0xc1] +0x05,0x06,0x14,0xd6,0x6a,0xf4,0xe8,0xc1 -# GFX11: v_cvt_norm_u16_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xe4,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xe4,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_fma_f64 v[5:6], -|ttmp[14:15]|, 0xaf123456, null ; encoding: [0x05,0x01,0x14,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] +0x05,0x01,0x14,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_norm_u16_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xe4,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xe4,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_fma_f64 v[5:6], -|exec|, -|src_scc|, -|exec| ; encoding: [0x05,0x07,0x14,0xd6,0x7e,0xfa,0xf9,0xe1] +0x05,0x07,0x14,0xd6,0x7e,0xfa,0xf9,0xe1 -# GFX11: v_cvt_norm_u16_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xe4,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xe4,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_fma_f64 v[5:6], null, 0.5, vcc ; encoding: [0x05,0x00,0x14,0xd6,0x7c,0xe0,0xa9,0x01] +0x05,0x00,0x14,0xd6,0x7c,0xe0,0xa9,0x01 -# GFX11: v_cvt_norm_u16_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xe4,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xe4,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_fma_f64 v[5:6], -1, -1, 0xaf123456 ; encoding: [0x05,0x00,0x14,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x14,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_norm_u16_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xe4,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xe4,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_fma_f64 v[5:6], 0.5, null, -|src_scc| mul:2 ; encoding: [0x05,0x04,0x14,0xd6,0xf0,0xf8,0xf4,0x8b] +0x05,0x04,0x14,0xd6,0xf0,0xf8,0xf4,0x8b -# GFX11: v_cvt_norm_u16_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xe4,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xe4,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_fma_f64 v[5:6], -|src_scc|, -|exec|, 0.5 mul:4 ; encoding: [0x05,0x03,0x14,0xd6,0xfd,0xfc,0xc0,0x73] +0x05,0x03,0x14,0xd6,0xfd,0xfc,0xc0,0x73 -# GFX11: v_cvt_norm_u16_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xe4,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xe4,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_fma_f64 v[254:255], 0xaf123456, -|vcc|, -1 clamp div:2 ; encoding: [0xfe,0x82,0x14,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] +0xfe,0x82,0x14,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_norm_u16_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xe4,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xe4,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_ldexp_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x1c,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x1c,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_cvt_norm_u16_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xe4,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xe4,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_ldexp_f32 v5, v255, v255 ; encoding: [0x05,0x00,0x1c,0xd7,0xff,0xff,0x03,0x00] +0x05,0x00,0x1c,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_cvt_norm_u16_f16_e64 v5, null ; encoding: [0x05,0x00,0xe4,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xe4,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_ldexp_f32 v5, s1, s2 ; encoding: [0x05,0x00,0x1c,0xd7,0x01,0x04,0x00,0x00] +0x05,0x00,0x1c,0xd7,0x01,0x04,0x00,0x00 -# GFX11: v_cvt_norm_u16_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xe4,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xe4,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_ldexp_f32 v5, s105, s105 ; encoding: [0x05,0x00,0x1c,0xd7,0x69,0xd2,0x00,0x00] +0x05,0x00,0x1c,0xd7,0x69,0xd2,0x00,0x00 -# GFX11: v_cvt_norm_u16_f16_e64 v5, 0.5 ; encoding: [0x05,0x00,0xe4,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0xe4,0xd5,0xf0,0x00,0x00,0x00 +# GFX11: v_ldexp_f32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x1c,0xd7,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x1c,0xd7,0x6a,0xf6,0x00,0x00 -# GFX11: v_cvt_norm_u16_f16_e64 v5, src_scc ; encoding: [0x05,0x00,0xe4,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0xe4,0xd5,0xfd,0x00,0x00,0x00 +# GFX11: v_ldexp_f32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x1c,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x1c,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_norm_u16_f16_e64 v255, -|0xfe0b| ; encoding: [0xff,0x01,0xe4,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] -0xff,0x01,0xe4,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00 +# GFX11: v_ldexp_f32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x1c,0xd7,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x1c,0xd7,0x7b,0xfa,0x01,0x00 -# GFX11: v_cvt_off_f32_i4_e64 v5, v1 ; encoding: [0x05,0x00,0x8e,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x8e,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_ldexp_f32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x1c,0xd7,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x1c,0xd7,0x7d,0xe0,0x01,0x00 -# GFX11: v_cvt_off_f32_i4_e64 v5, v255 ; encoding: [0x05,0x00,0x8e,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0x8e,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_ldexp_f32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x1c,0xd7,0x7e,0x82,0x01,0x00] +0x05,0x00,0x1c,0xd7,0x7e,0x82,0x01,0x00 -# GFX11: v_cvt_off_f32_i4_e64 v5, s1 ; encoding: [0x05,0x00,0x8e,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0x8e,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_ldexp_f32 v5, exec_hi, null ; encoding: [0x05,0x00,0x1c,0xd7,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x1c,0xd7,0x7f,0xf8,0x00,0x00 -# GFX11: v_cvt_off_f32_i4_e64 v5, s105 ; encoding: [0x05,0x00,0x8e,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0x8e,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_ldexp_f32 v5, null, exec_lo ; encoding: [0x05,0x00,0x1c,0xd7,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x1c,0xd7,0x7c,0xfc,0x00,0x00 -# GFX11: v_cvt_off_f32_i4_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x8e,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x8e,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_ldexp_f32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x1c,0xd7,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x1c,0xd7,0xc1,0xfe,0x00,0x00 -# GFX11: v_cvt_off_f32_i4_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x8e,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0x8e,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_ldexp_f32 v5, 0.5, m0 mul:2 ; encoding: [0x05,0x00,0x1c,0xd7,0xf0,0xfa,0x00,0x08] +0x05,0x00,0x1c,0xd7,0xf0,0xfa,0x00,0x08 -# GFX11: v_cvt_off_f32_i4_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x8e,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0x8e,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_ldexp_f32 v5, src_scc, vcc_lo mul:4 ; encoding: [0x05,0x00,0x1c,0xd7,0xfd,0xd4,0x00,0x10] +0x05,0x00,0x1c,0xd7,0xfd,0xd4,0x00,0x10 -# GFX11: v_cvt_off_f32_i4_e64 v5, m0 ; encoding: [0x05,0x00,0x8e,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0x8e,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_ldexp_f32 v255, -|0xaf123456|, vcc_hi clamp div:2 ; encoding: [0xff,0x81,0x1c,0xd7,0xff,0xd6,0x00,0x38,0x56,0x34,0x12,0xaf] +0xff,0x81,0x1c,0xd7,0xff,0xd6,0x00,0x38,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_off_f32_i4_e64 v5, exec_lo ; encoding: [0x05,0x00,0x8e,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x8e,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_ldexp_f64 v[5:6], v[1:2], v2 ; encoding: [0x05,0x00,0x2b,0xd7,0x01,0x05,0x02,0x00] +0x05,0x00,0x2b,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_cvt_off_f32_i4_e64 v5, exec_hi ; encoding: [0x05,0x00,0x8e,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0x8e,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_ldexp_f64 v[5:6], v[1:2], v255 ; encoding: [0x05,0x00,0x2b,0xd7,0x01,0xff,0x03,0x00] +0x05,0x00,0x2b,0xd7,0x01,0xff,0x03,0x00 -# GFX11: v_cvt_off_f32_i4_e64 v5, null ; encoding: [0x05,0x00,0x8e,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x8e,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_ldexp_f64 v[5:6], v[1:2], s2 ; encoding: [0x05,0x00,0x2b,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x2b,0xd7,0x01,0x05,0x00,0x00 -# GFX11: v_cvt_off_f32_i4_e64 v5, -1 ; encoding: [0x05,0x00,0x8e,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x8e,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_ldexp_f64 v[5:6], v[1:2], s105 ; encoding: [0x05,0x00,0x2b,0xd7,0x01,0xd3,0x00,0x00] +0x05,0x00,0x2b,0xd7,0x01,0xd3,0x00,0x00 -# GFX11: v_cvt_off_f32_i4_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x8e,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0x8e,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_ldexp_f64 v[5:6], v[254:255], ttmp15 ; encoding: [0x05,0x00,0x2b,0xd7,0xfe,0xf7,0x00,0x00] +0x05,0x00,0x2b,0xd7,0xfe,0xf7,0x00,0x00 -# GFX11: v_cvt_off_f32_i4_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x8e,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0x8e,0xd5,0xfd,0x00,0x00,0x10 +# GFX11: v_ldexp_f64 v[5:6], s[2:3], vcc_hi ; encoding: [0x05,0x00,0x2b,0xd7,0x02,0xd6,0x00,0x00] +0x05,0x00,0x2b,0xd7,0x02,0xd6,0x00,0x00 -# GFX11: v_cvt_off_f32_i4_e64 v255, 0x4f clamp div:2 ; encoding: [0xff,0x80,0x8e,0xd5,0xff,0x00,0x00,0x18,0x4f,0x00,0x00,0x00] -0xff,0x80,0x8e,0xd5,0xff,0x00,0x00,0x18,0x4f,0x00,0x00,0x00 +# GFX11: v_ldexp_f64 v[5:6], s[104:105], vcc_lo ; encoding: [0x05,0x00,0x2b,0xd7,0x68,0xd4,0x00,0x00] +0x05,0x00,0x2b,0xd7,0x68,0xd4,0x00,0x00 -# GFX11: v_cvt_pk_i16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x06,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x06,0xd7,0x01,0x05,0x02,0x00 +# GFX11: v_ldexp_f64 v[5:6], vcc, m0 ; encoding: [0x05,0x00,0x2b,0xd7,0x6a,0xfa,0x00,0x00] +0x05,0x00,0x2b,0xd7,0x6a,0xfa,0x00,0x00 -# GFX11: v_cvt_pk_i16_f32 v5, v255, v255 ; encoding: [0x05,0x00,0x06,0xd7,0xff,0xff,0x03,0x00] -0x05,0x00,0x06,0xd7,0xff,0xff,0x03,0x00 +# GFX11: v_ldexp_f64 v[5:6], ttmp[14:15], exec_hi ; encoding: [0x05,0x00,0x2b,0xd7,0x7a,0xfe,0x00,0x00] +0x05,0x00,0x2b,0xd7,0x7a,0xfe,0x00,0x00 -# GFX11: v_cvt_pk_i16_f32 v5, s1, s2 ; encoding: [0x05,0x00,0x06,0xd7,0x01,0x04,0x00,0x00] -0x05,0x00,0x06,0xd7,0x01,0x04,0x00,0x00 +# GFX11: v_ldexp_f64 v[5:6], exec, exec_lo ; encoding: [0x05,0x00,0x2b,0xd7,0x7e,0xfc,0x00,0x00] +0x05,0x00,0x2b,0xd7,0x7e,0xfc,0x00,0x00 -# GFX11: v_cvt_pk_i16_f32 v5, s105, s105 ; encoding: [0x05,0x00,0x06,0xd7,0x69,0xd2,0x00,0x00] -0x05,0x00,0x06,0xd7,0x69,0xd2,0x00,0x00 +# GFX11: v_ldexp_f64 v[5:6], null, null ; encoding: [0x05,0x00,0x2b,0xd7,0x7c,0xf8,0x00,0x00] +0x05,0x00,0x2b,0xd7,0x7c,0xf8,0x00,0x00 -# GFX11: v_cvt_pk_i16_f32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x06,0xd7,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x06,0xd7,0x6a,0xf6,0x00,0x00 +# GFX11: v_ldexp_f64 v[5:6], -1, -1 ; encoding: [0x05,0x00,0x2b,0xd7,0xc1,0x82,0x01,0x00] +0x05,0x00,0x2b,0xd7,0xc1,0x82,0x01,0x00 -# GFX11: v_cvt_pk_i16_f32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x06,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x06,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf +# GFX11: v_ldexp_f64 v[5:6], 0.5, 0.5 mul:2 ; encoding: [0x05,0x00,0x2b,0xd7,0xf0,0xe0,0x01,0x08] +0x05,0x00,0x2b,0xd7,0xf0,0xe0,0x01,0x08 -# GFX11: v_cvt_pk_i16_f32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x06,0xd7,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x06,0xd7,0x7b,0xfa,0x01,0x00 +# GFX11: v_ldexp_f64 v[5:6], -|src_scc|, src_scc mul:4 ; encoding: [0x05,0x01,0x2b,0xd7,0xfd,0xfa,0x01,0x30] +0x05,0x01,0x2b,0xd7,0xfd,0xfa,0x01,0x30 -# GFX11: v_cvt_pk_i16_f32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x06,0xd7,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x06,0xd7,0x7d,0xe0,0x01,0x00 +# GFX11: v_ldexp_f64 v[254:255], 0xaf123456, 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0x2b,0xd7,0xff,0xfe,0x01,0x18,0x56,0x34,0x12,0xaf] +0xfe,0x80,0x2b,0xd7,0xff,0xfe,0x01,0x18,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_pk_i16_f32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x06,0xd7,0x7e,0x82,0x01,0x00] -0x05,0x00,0x06,0xd7,0x7e,0x82,0x01,0x00 +# GFX11: v_lerp_u8 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x15,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x15,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_cvt_pk_i16_f32 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x06,0xd7,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x06,0xd7,0x7f,0xf8,0x00,0x00 +# GFX11: v_lerp_u8 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x15,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x15,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_cvt_pk_i16_f32 v5, null, exec_lo ; encoding: [0x05,0x00,0x06,0xd7,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x06,0xd7,0x7c,0xfc,0x00,0x00 +# GFX11: v_lerp_u8 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x15,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x15,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_cvt_pk_i16_f32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x06,0xd7,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x06,0xd7,0xc1,0xfe,0x00,0x00 +# GFX11: v_lerp_u8 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x15,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x15,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_cvt_pk_i16_f32 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x06,0xd7,0xf0,0xfa,0x00,0x40] -0x05,0x00,0x06,0xd7,0xf0,0xfa,0x00,0x40 +# GFX11: v_lerp_u8 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x15,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x15,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_cvt_pk_i16_f32 v5, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x06,0xd7,0xfd,0xd4,0x00,0x20] -0x05,0x02,0x06,0xd7,0xfd,0xd4,0x00,0x20 +# GFX11: v_lerp_u8 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x15,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x15,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_pk_i16_f32 v255, -|0xaf123456|, -|vcc_hi| ; encoding: [0xff,0x03,0x06,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] -0xff,0x03,0x06,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf +# GFX11: v_lerp_u8 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x15,0xd6,0x7b,0xfa,0xed,0x01] +0x05,0x00,0x15,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_cvt_pk_i16_i32 v5, v1, v2 ; encoding: [0x05,0x00,0x24,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x24,0xd7,0x01,0x05,0x02,0x00 +# GFX11: v_lerp_u8 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x15,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x15,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_cvt_pk_i16_i32 v5, v255, v255 ; encoding: [0x05,0x00,0x24,0xd7,0xff,0xff,0x03,0x00] -0x05,0x00,0x24,0xd7,0xff,0xff,0x03,0x00 +# GFX11: v_lerp_u8 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x15,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x00,0x15,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_cvt_pk_i16_i32 v5, s1, s2 ; encoding: [0x05,0x00,0x24,0xd7,0x01,0x04,0x00,0x00] -0x05,0x00,0x24,0xd7,0x01,0x04,0x00,0x00 +# GFX11: v_lerp_u8 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x15,0xd6,0x7f,0xf8,0xa8,0x01] +0x05,0x00,0x15,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_cvt_pk_i16_i32 v5, s105, s105 ; encoding: [0x05,0x00,0x24,0xd7,0x69,0xd2,0x00,0x00] -0x05,0x00,0x24,0xd7,0x69,0xd2,0x00,0x00 +# GFX11: v_lerp_u8 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x15,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x15,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_pk_i16_i32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x24,0xd7,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x24,0xd7,0x6a,0xf6,0x00,0x00 +# GFX11: v_lerp_u8 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x15,0xd6,0xc1,0xfe,0xf4,0x03] +0x05,0x00,0x15,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX11: v_cvt_pk_i16_i32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x24,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x24,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_cvt_pk_i16_i32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x24,0xd7,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x24,0xd7,0x7b,0xfa,0x01,0x00 - -# GFX11: v_cvt_pk_i16_i32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x24,0xd7,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x24,0xd7,0x7d,0xe0,0x01,0x00 - -# GFX11: v_cvt_pk_i16_i32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x24,0xd7,0x7e,0x82,0x01,0x00] -0x05,0x00,0x24,0xd7,0x7e,0x82,0x01,0x00 - -# GFX11: v_cvt_pk_i16_i32 v5, exec_hi, null ; encoding: [0x05,0x00,0x24,0xd7,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x24,0xd7,0x7f,0xf8,0x00,0x00 - -# GFX11: v_cvt_pk_i16_i32 v5, null, exec_lo ; encoding: [0x05,0x00,0x24,0xd7,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x24,0xd7,0x7c,0xfc,0x00,0x00 - -# GFX11: v_cvt_pk_i16_i32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x24,0xd7,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x24,0xd7,0xc1,0xfe,0x00,0x00 - -# GFX11: v_cvt_pk_i16_i32 v5, 0.5, m0 ; encoding: [0x05,0x00,0x24,0xd7,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x24,0xd7,0xf0,0xfa,0x00,0x00 - -# GFX11: v_cvt_pk_i16_i32 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x24,0xd7,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x24,0xd7,0xfd,0xd4,0x00,0x00 - -# GFX11: v_cvt_pk_i16_i32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x24,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0x24,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_cvt_pk_norm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00 - -# GFX11: v_cvt_pk_norm_i16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] -0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00 - -# GFX11: v_cvt_pk_norm_i16_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00] -0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00 - -# GFX11: v_cvt_pk_norm_i16_f16 v5, s105, s105 ; encoding: [0x05,0x00,0x12,0xd7,0x69,0xd2,0x00,0x00] -0x05,0x00,0x12,0xd7,0x69,0xd2,0x00,0x00 - -# GFX11: v_cvt_pk_norm_i16_f16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x12,0xd7,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x12,0xd7,0x6a,0xf6,0x00,0x00 - -# GFX11: v_cvt_pk_norm_i16_f16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x12,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x12,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 - -# GFX11: v_cvt_pk_norm_i16_f16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x12,0xd7,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x12,0xd7,0x7b,0xfa,0x01,0x00 - -# GFX11: v_cvt_pk_norm_i16_f16 v5, m0, 0.5 ; encoding: [0x05,0x00,0x12,0xd7,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x12,0xd7,0x7d,0xe0,0x01,0x00 - -# GFX11: v_cvt_pk_norm_i16_f16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x12,0xd7,0x7e,0x82,0x01,0x00] -0x05,0x00,0x12,0xd7,0x7e,0x82,0x01,0x00 - -# GFX11: v_cvt_pk_norm_i16_f16 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x12,0xd7,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x12,0xd7,0x7f,0xf8,0x00,0x00 - -# GFX11: v_cvt_pk_norm_i16_f16 v5, null, exec_lo ; encoding: [0x05,0x00,0x12,0xd7,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x12,0xd7,0x7c,0xfc,0x00,0x00 - -# GFX11: v_cvt_pk_norm_i16_f16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x12,0xd7,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x12,0xd7,0xc1,0xfe,0x00,0x00 - -# GFX11: v_cvt_pk_norm_i16_f16 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x12,0xd7,0xf0,0xfa,0x00,0x40] -0x05,0x00,0x12,0xd7,0xf0,0xfa,0x00,0x40 - -# GFX11: v_cvt_pk_norm_i16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xfd,0xd4,0x00,0x20] -0x05,0x0a,0x12,0xd7,0xfd,0xd4,0x00,0x20 - -# GFX11: v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] -0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00 - -# GFX11: v_cvt_pk_norm_u16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00 - -# GFX11: v_cvt_pk_norm_u16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] -0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00 - -# GFX11: v_cvt_pk_norm_u16_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00] -0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00 - -# GFX11: v_cvt_pk_norm_u16_f16 v5, s105, s105 ; encoding: [0x05,0x00,0x13,0xd7,0x69,0xd2,0x00,0x00] -0x05,0x00,0x13,0xd7,0x69,0xd2,0x00,0x00 - -# GFX11: v_cvt_pk_norm_u16_f16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x13,0xd7,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x13,0xd7,0x6a,0xf6,0x00,0x00 - -# GFX11: v_cvt_pk_norm_u16_f16 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x13,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x13,0xd7,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 - -# GFX11: v_cvt_pk_norm_u16_f16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x13,0xd7,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x13,0xd7,0x7b,0xfa,0x01,0x00 - -# GFX11: v_cvt_pk_norm_u16_f16 v5, m0, 0.5 ; encoding: [0x05,0x00,0x13,0xd7,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x13,0xd7,0x7d,0xe0,0x01,0x00 - -# GFX11: v_cvt_pk_norm_u16_f16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x13,0xd7,0x7e,0x82,0x01,0x00] -0x05,0x00,0x13,0xd7,0x7e,0x82,0x01,0x00 - -# GFX11: v_cvt_pk_norm_u16_f16 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x13,0xd7,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x13,0xd7,0x7f,0xf8,0x00,0x00 - -# GFX11: v_cvt_pk_norm_u16_f16 v5, null, exec_lo ; encoding: [0x05,0x00,0x13,0xd7,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x13,0xd7,0x7c,0xfc,0x00,0x00 - -# GFX11: v_cvt_pk_norm_u16_f16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x13,0xd7,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x13,0xd7,0xc1,0xfe,0x00,0x00 - -# GFX11: v_cvt_pk_norm_u16_f16 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x13,0xd7,0xf0,0xfa,0x00,0x40] -0x05,0x00,0x13,0xd7,0xf0,0xfa,0x00,0x40 - -# GFX11: v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xfd,0xd4,0x00,0x20] -0x05,0x0a,0x13,0xd7,0xfd,0xd4,0x00,0x20 - -# GFX11: v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] -0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00 - -# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x2f,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x2f,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x2f,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x2f,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x2f,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x2f,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x2f,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x2f,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x2f,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x2f,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x2f,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x2f,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x2f,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x2f,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x2f,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x2f,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x2f,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x2f,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x2f,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x2f,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x2f,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x2f,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x2f,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x2f,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x2f,0xd5,0xf0,0xfa,0x00,0x40] -0x05,0x00,0x2f,0xd5,0xf0,0xfa,0x00,0x40 - -# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x2f,0xd5,0xfd,0xd4,0x00,0x20] -0x05,0x02,0x2f,0xd5,0xfd,0xd4,0x00,0x20 - -# GFX11: v_cvt_pk_rtz_f16_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp ; encoding: [0xff,0x83,0x2f,0xd5,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] -0xff,0x83,0x2f,0xd5,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf - -# GFX11: v_cvt_pk_u16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00 - -# GFX11: v_cvt_pk_u16_f32 v5, v255, v255 ; encoding: [0x05,0x00,0x07,0xd7,0xff,0xff,0x03,0x00] -0x05,0x00,0x07,0xd7,0xff,0xff,0x03,0x00 - -# GFX11: v_cvt_pk_u16_f32 v5, s1, s2 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0x04,0x00,0x00] -0x05,0x00,0x07,0xd7,0x01,0x04,0x00,0x00 - -# GFX11: v_cvt_pk_u16_f32 v5, s105, s105 ; encoding: [0x05,0x00,0x07,0xd7,0x69,0xd2,0x00,0x00] -0x05,0x00,0x07,0xd7,0x69,0xd2,0x00,0x00 - -# GFX11: v_cvt_pk_u16_f32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x07,0xd7,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x07,0xd7,0x6a,0xf6,0x00,0x00 - -# GFX11: v_cvt_pk_u16_f32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x07,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x07,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_cvt_pk_u16_f32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x07,0xd7,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x07,0xd7,0x7b,0xfa,0x01,0x00 - -# GFX11: v_cvt_pk_u16_f32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x07,0xd7,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x07,0xd7,0x7d,0xe0,0x01,0x00 - -# GFX11: v_cvt_pk_u16_f32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x07,0xd7,0x7e,0x82,0x01,0x00] -0x05,0x00,0x07,0xd7,0x7e,0x82,0x01,0x00 - -# GFX11: v_cvt_pk_u16_f32 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x07,0xd7,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x07,0xd7,0x7f,0xf8,0x00,0x00 - -# GFX11: v_cvt_pk_u16_f32 v5, null, exec_lo ; encoding: [0x05,0x00,0x07,0xd7,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x07,0xd7,0x7c,0xfc,0x00,0x00 - -# GFX11: v_cvt_pk_u16_f32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x07,0xd7,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x07,0xd7,0xc1,0xfe,0x00,0x00 - -# GFX11: v_cvt_pk_u16_f32 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x07,0xd7,0xf0,0xfa,0x00,0x40] -0x05,0x00,0x07,0xd7,0xf0,0xfa,0x00,0x40 - -# GFX11: v_cvt_pk_u16_f32 v5, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x07,0xd7,0xfd,0xd4,0x00,0x20] -0x05,0x02,0x07,0xd7,0xfd,0xd4,0x00,0x20 - -# GFX11: v_cvt_pk_u16_f32 v255, -|0xaf123456|, -|vcc_hi| ; encoding: [0xff,0x03,0x07,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] -0xff,0x03,0x07,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf - -# GFX11: v_cvt_pk_u16_u32 v5, v1, v2 ; encoding: [0x05,0x00,0x23,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x23,0xd7,0x01,0x05,0x02,0x00 - -# GFX11: v_cvt_pk_u16_u32 v5, v255, v255 ; encoding: [0x05,0x00,0x23,0xd7,0xff,0xff,0x03,0x00] -0x05,0x00,0x23,0xd7,0xff,0xff,0x03,0x00 - -# GFX11: v_cvt_pk_u16_u32 v5, s1, s2 ; encoding: [0x05,0x00,0x23,0xd7,0x01,0x04,0x00,0x00] -0x05,0x00,0x23,0xd7,0x01,0x04,0x00,0x00 - -# GFX11: v_cvt_pk_u16_u32 v5, s105, s105 ; encoding: [0x05,0x00,0x23,0xd7,0x69,0xd2,0x00,0x00] -0x05,0x00,0x23,0xd7,0x69,0xd2,0x00,0x00 - -# GFX11: v_cvt_pk_u16_u32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x23,0xd7,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x23,0xd7,0x6a,0xf6,0x00,0x00 - -# GFX11: v_cvt_pk_u16_u32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x23,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x23,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_cvt_pk_u16_u32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x23,0xd7,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x23,0xd7,0x7b,0xfa,0x01,0x00 - -# GFX11: v_cvt_pk_u16_u32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x23,0xd7,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x23,0xd7,0x7d,0xe0,0x01,0x00 - -# GFX11: v_cvt_pk_u16_u32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x23,0xd7,0x7e,0x82,0x01,0x00] -0x05,0x00,0x23,0xd7,0x7e,0x82,0x01,0x00 - -# GFX11: v_cvt_pk_u16_u32 v5, exec_hi, null ; encoding: [0x05,0x00,0x23,0xd7,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x23,0xd7,0x7f,0xf8,0x00,0x00 - -# GFX11: v_cvt_pk_u16_u32 v5, null, exec_lo ; encoding: [0x05,0x00,0x23,0xd7,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x23,0xd7,0x7c,0xfc,0x00,0x00 - -# GFX11: v_cvt_pk_u16_u32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x23,0xd7,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x23,0xd7,0xc1,0xfe,0x00,0x00 - -# GFX11: v_cvt_pk_u16_u32 v5, 0.5, m0 ; encoding: [0x05,0x00,0x23,0xd7,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x23,0xd7,0xf0,0xfa,0x00,0x00 - -# GFX11: v_cvt_pk_u16_u32 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x23,0xd7,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x23,0xd7,0xfd,0xd4,0x00,0x00 - -# GFX11: v_cvt_pk_u16_u32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x23,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0x23,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_cvt_pk_u8_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x26,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x26,0xd6,0x01,0x05,0x0e,0x00 - -# GFX11: v_cvt_pk_u8_f32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x26,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x26,0xd6,0xff,0x05,0xa4,0x01 - -# GFX11: v_cvt_pk_u8_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x26,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x26,0xd6,0x01,0xfe,0xff,0x01 - -# GFX11: v_cvt_pk_u8_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x26,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x26,0xd6,0x69,0xd2,0xf8,0x01 - -# GFX11: v_cvt_pk_u8_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x26,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x26,0xd6,0x6a,0xf6,0x0c,0x04 - -# GFX11: v_cvt_pk_u8_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x26,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -0x05,0x00,0x26,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf - -# GFX11: v_cvt_pk_u8_f32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x26,0xd6,0x7b,0xfa,0xed,0x01] -0x05,0x00,0x26,0xd6,0x7b,0xfa,0xed,0x01 - -# GFX11: v_cvt_pk_u8_f32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x26,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x26,0xd6,0x7d,0xe0,0xf5,0x01 - -# GFX11: v_cvt_pk_u8_f32 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x26,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x00,0x26,0xd6,0x7e,0x82,0xad,0x01 - -# GFX11: v_cvt_pk_u8_f32 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x26,0xd6,0x7f,0xf8,0xa8,0x01] -0x05,0x00,0x26,0xd6,0x7f,0xf8,0xa8,0x01 - -# GFX11: v_cvt_pk_u8_f32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x26,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x26,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf - -# GFX11: v_cvt_pk_u8_f32 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x26,0xd6,0xc1,0xfe,0xf4,0x03] -0x05,0x00,0x26,0xd6,0xc1,0xfe,0xf4,0x03 - -# GFX11: v_cvt_pk_u8_f32 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x26,0xd6,0xf0,0xfa,0xc0,0x03] -0x05,0x00,0x26,0xd6,0xf0,0xfa,0xc0,0x03 - -# GFX11: v_cvt_pk_u8_f32 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x26,0xd6,0xfd,0xd4,0x04,0x03] -0x05,0x00,0x26,0xd6,0xfd,0xd4,0x04,0x03 - -# GFX11: v_cvt_pk_u8_f32 v255, -|0xaf123456|, vcc_hi, null ; encoding: [0xff,0x01,0x26,0xd6,0xff,0xd6,0xf0,0x21,0x56,0x34,0x12,0xaf] -0xff,0x01,0x26,0xd6,0xff,0xd6,0xf0,0x21,0x56,0x34,0x12,0xaf - -# GFX11: v_cvt_pknorm_i16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x21,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x21,0xd7,0x01,0x05,0x02,0x00 - -# GFX11: v_cvt_pknorm_i16_f32 v5, v255, v255 ; encoding: [0x05,0x00,0x21,0xd7,0xff,0xff,0x03,0x00] -0x05,0x00,0x21,0xd7,0xff,0xff,0x03,0x00 - -# GFX11: v_cvt_pknorm_i16_f32 v5, s1, s2 ; encoding: [0x05,0x00,0x21,0xd7,0x01,0x04,0x00,0x00] -0x05,0x00,0x21,0xd7,0x01,0x04,0x00,0x00 - -# GFX11: v_cvt_pknorm_i16_f32 v5, s105, s105 ; encoding: [0x05,0x00,0x21,0xd7,0x69,0xd2,0x00,0x00] -0x05,0x00,0x21,0xd7,0x69,0xd2,0x00,0x00 - -# GFX11: v_cvt_pknorm_i16_f32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x21,0xd7,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x21,0xd7,0x6a,0xf6,0x00,0x00 - -# GFX11: v_cvt_pknorm_i16_f32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x21,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x21,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_cvt_pknorm_i16_f32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x21,0xd7,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x21,0xd7,0x7b,0xfa,0x01,0x00 - -# GFX11: v_cvt_pknorm_i16_f32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x21,0xd7,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x21,0xd7,0x7d,0xe0,0x01,0x00 - -# GFX11: v_cvt_pknorm_i16_f32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x21,0xd7,0x7e,0x82,0x01,0x00] -0x05,0x00,0x21,0xd7,0x7e,0x82,0x01,0x00 - -# GFX11: v_cvt_pknorm_i16_f32 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x21,0xd7,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x21,0xd7,0x7f,0xf8,0x00,0x00 - -# GFX11: v_cvt_pknorm_i16_f32 v5, null, exec_lo ; encoding: [0x05,0x00,0x21,0xd7,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x21,0xd7,0x7c,0xfc,0x00,0x00 - -# GFX11: v_cvt_pknorm_i16_f32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x21,0xd7,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x21,0xd7,0xc1,0xfe,0x00,0x00 - -# GFX11: v_cvt_pknorm_i16_f32 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x21,0xd7,0xf0,0xfa,0x00,0x40] -0x05,0x00,0x21,0xd7,0xf0,0xfa,0x00,0x40 - -# GFX11: v_cvt_pknorm_i16_f32 v5, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x21,0xd7,0xfd,0xd4,0x00,0x20] -0x05,0x02,0x21,0xd7,0xfd,0xd4,0x00,0x20 - -# GFX11: v_cvt_pknorm_i16_f32 v255, -|0xaf123456|, -|vcc_hi| ; encoding: [0xff,0x03,0x21,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] -0xff,0x03,0x21,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf - -# GFX11: v_cvt_pknorm_u16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x22,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x22,0xd7,0x01,0x05,0x02,0x00 - -# GFX11: v_cvt_pknorm_u16_f32 v5, v255, v255 ; encoding: [0x05,0x00,0x22,0xd7,0xff,0xff,0x03,0x00] -0x05,0x00,0x22,0xd7,0xff,0xff,0x03,0x00 - -# GFX11: v_cvt_pknorm_u16_f32 v5, s1, s2 ; encoding: [0x05,0x00,0x22,0xd7,0x01,0x04,0x00,0x00] -0x05,0x00,0x22,0xd7,0x01,0x04,0x00,0x00 - -# GFX11: v_cvt_pknorm_u16_f32 v5, s105, s105 ; encoding: [0x05,0x00,0x22,0xd7,0x69,0xd2,0x00,0x00] -0x05,0x00,0x22,0xd7,0x69,0xd2,0x00,0x00 - -# GFX11: v_cvt_pknorm_u16_f32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x22,0xd7,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x22,0xd7,0x6a,0xf6,0x00,0x00 - -# GFX11: v_cvt_pknorm_u16_f32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x22,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x22,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_cvt_pknorm_u16_f32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x22,0xd7,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x22,0xd7,0x7b,0xfa,0x01,0x00 - -# GFX11: v_cvt_pknorm_u16_f32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x22,0xd7,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x22,0xd7,0x7d,0xe0,0x01,0x00 - -# GFX11: v_cvt_pknorm_u16_f32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x22,0xd7,0x7e,0x82,0x01,0x00] -0x05,0x00,0x22,0xd7,0x7e,0x82,0x01,0x00 - -# GFX11: v_cvt_pknorm_u16_f32 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x22,0xd7,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x22,0xd7,0x7f,0xf8,0x00,0x00 - -# GFX11: v_cvt_pknorm_u16_f32 v5, null, exec_lo ; encoding: [0x05,0x00,0x22,0xd7,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x22,0xd7,0x7c,0xfc,0x00,0x00 - -# GFX11: v_cvt_pknorm_u16_f32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x22,0xd7,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x22,0xd7,0xc1,0xfe,0x00,0x00 - -# GFX11: v_cvt_pknorm_u16_f32 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x22,0xd7,0xf0,0xfa,0x00,0x40] -0x05,0x00,0x22,0xd7,0xf0,0xfa,0x00,0x40 - -# GFX11: v_cvt_pknorm_u16_f32 v5, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x22,0xd7,0xfd,0xd4,0x00,0x20] -0x05,0x02,0x22,0xd7,0xfd,0xd4,0x00,0x20 - -# GFX11: v_cvt_pknorm_u16_f32 v255, -|0xaf123456|, -|vcc_hi| ; encoding: [0xff,0x03,0x22,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] -0xff,0x03,0x22,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf - -# GFX11: v_cvt_u16_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xd2,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xd2,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_cvt_u16_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xd2,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xd2,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_cvt_u16_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xd2,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xd2,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_cvt_u16_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xd2,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xd2,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_cvt_u16_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd2,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xd2,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_cvt_u16_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd2,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xd2,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_cvt_u16_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd2,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xd2,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_cvt_u16_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xd2,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xd2,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_cvt_u16_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd2,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xd2,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_cvt_u16_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd2,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xd2,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_cvt_u16_f16_e64 v5, null ; encoding: [0x05,0x00,0xd2,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xd2,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_cvt_u16_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xd2,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xd2,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_cvt_u16_f16_e64 v5, 0.5 ; encoding: [0x05,0x00,0xd2,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0xd2,0xd5,0xf0,0x00,0x00,0x00 - -# GFX11: v_cvt_u16_f16_e64 v5, src_scc ; encoding: [0x05,0x00,0xd2,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0xd2,0xd5,0xfd,0x00,0x00,0x00 - -# GFX11: v_cvt_u16_f16_e64 v255, -|0xfe0b| clamp ; encoding: [0xff,0x81,0xd2,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] -0xff,0x81,0xd2,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00 - -# GFX11: v_cvt_u32_f32_e64 v5, v1 ; encoding: [0x05,0x00,0x87,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x87,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_cvt_u32_f32_e64 v5, v255 ; encoding: [0x05,0x00,0x87,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0x87,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_cvt_u32_f32_e64 v5, s1 ; encoding: [0x05,0x00,0x87,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0x87,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_cvt_u32_f32_e64 v5, s105 ; encoding: [0x05,0x00,0x87,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0x87,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_cvt_u32_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x87,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x87,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_cvt_u32_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x87,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0x87,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_cvt_u32_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x87,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0x87,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_cvt_u32_f32_e64 v5, m0 ; encoding: [0x05,0x00,0x87,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0x87,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_cvt_u32_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x87,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x87,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_cvt_u32_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x87,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0x87,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_cvt_u32_f32_e64 v5, null ; encoding: [0x05,0x00,0x87,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x87,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_cvt_u32_f32_e64 v5, -1 ; encoding: [0x05,0x00,0x87,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x87,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_cvt_u32_f32_e64 v5, 0.5 ; encoding: [0x05,0x00,0x87,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0x87,0xd5,0xf0,0x00,0x00,0x00 - -# GFX11: v_cvt_u32_f32_e64 v5, src_scc ; encoding: [0x05,0x00,0x87,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0x87,0xd5,0xfd,0x00,0x00,0x00 - -# GFX11: v_cvt_u32_f32_e64 v255, -|0xaf123456| clamp ; encoding: [0xff,0x81,0x87,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] -0xff,0x81,0x87,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf - -# GFX11: v_cvt_u32_f64_e64 v5, v[1:2] ; encoding: [0x05,0x00,0x95,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x95,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_cvt_u32_f64_e64 v5, v[254:255] ; encoding: [0x05,0x00,0x95,0xd5,0xfe,0x01,0x00,0x00] -0x05,0x00,0x95,0xd5,0xfe,0x01,0x00,0x00 - -# GFX11: v_cvt_u32_f64_e64 v5, s[2:3] ; encoding: [0x05,0x00,0x95,0xd5,0x02,0x00,0x00,0x00] -0x05,0x00,0x95,0xd5,0x02,0x00,0x00,0x00 - -# GFX11: v_cvt_u32_f64_e64 v5, s[104:105] ; encoding: [0x05,0x00,0x95,0xd5,0x68,0x00,0x00,0x00] -0x05,0x00,0x95,0xd5,0x68,0x00,0x00,0x00 - -# GFX11: v_cvt_u32_f64_e64 v5, vcc ; encoding: [0x05,0x00,0x95,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x95,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_cvt_u32_f64_e64 v5, ttmp[14:15] ; encoding: [0x05,0x00,0x95,0xd5,0x7a,0x00,0x00,0x00] -0x05,0x00,0x95,0xd5,0x7a,0x00,0x00,0x00 - -# GFX11: v_cvt_u32_f64_e64 v5, exec ; encoding: [0x05,0x00,0x95,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x95,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_cvt_u32_f64_e64 v5, null ; encoding: [0x05,0x00,0x95,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x95,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_cvt_u32_f64_e64 v5, -1 ; encoding: [0x05,0x00,0x95,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x95,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_cvt_u32_f64_e64 v5, 0.5 ; encoding: [0x05,0x00,0x95,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0x95,0xd5,0xf0,0x00,0x00,0x00 - -# GFX11: v_cvt_u32_f64_e64 v5, -|src_scc| ; encoding: [0x05,0x01,0x95,0xd5,0xfd,0x00,0x00,0x20] -0x05,0x01,0x95,0xd5,0xfd,0x00,0x00,0x20 - -# GFX11: v_cvt_u32_f64_e64 v255, 0xaf123456 clamp ; encoding: [0xff,0x80,0x95,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x80,0x95,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_cvt_u32_u16_e64 v5, v1 ; encoding: [0x05,0x00,0xeb,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xeb,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_cvt_u32_u16_e64 v5, v255 ; encoding: [0x05,0x00,0xeb,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xeb,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_cvt_u32_u16_e64 v5, s1 ; encoding: [0x05,0x00,0xeb,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xeb,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_cvt_u32_u16_e64 v5, s105 ; encoding: [0x05,0x00,0xeb,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xeb,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_cvt_u32_u16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xeb,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xeb,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_cvt_u32_u16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xeb,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xeb,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_cvt_u32_u16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xeb,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xeb,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_cvt_u32_u16_e64 v5, m0 ; encoding: [0x05,0x00,0xeb,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xeb,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_cvt_u32_u16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xeb,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xeb,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_cvt_u32_u16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xeb,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xeb,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_cvt_u32_u16_e64 v5, null ; encoding: [0x05,0x00,0xeb,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xeb,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_cvt_u32_u16_e64 v5, -1 ; encoding: [0x05,0x00,0xeb,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xeb,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_cvt_u32_u16_e64 v5, 0x3800 -0x05,0x00,0xeb,0xd5,0xf0,0x00,0x00,0x00 - -# GFX11: v_cvt_u32_u16_e64 v5, src_scc ; encoding: [0x05,0x00,0xeb,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0xeb,0xd5,0xfd,0x00,0x00,0x00 - -# GFX11: v_cvt_u32_u16_e64 v255, 0xfe0b ; encoding: [0xff,0x00,0xeb,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] -0xff,0x00,0xeb,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00 - -# GFX11: v_div_fixup_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00 - -# GFX11: v_div_fixup_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01 - -# GFX11: v_div_fixup_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01 - -# GFX11: v_div_fixup_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01 - -# GFX11: v_div_fixup_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04 - -# GFX11: v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 - -# GFX11: v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1] -0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1 - -# GFX11: v_div_fixup_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01 - -# GFX11: v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01 - -# GFX11: v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x54,0xd6,0x7f,0xf8,0xa8,0xa1] -0x05,0x05,0x54,0xd6,0x7f,0xf8,0xa8,0xa1 - -# GFX11: v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] -0x05,0x7c,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00 - -# GFX11: v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3] -0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3 - -# GFX11: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43] -0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43 - -# GFX11: v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23] -0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23 - -# GFX11: v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] -0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 - -# GFX11: v_div_fixup_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00 - -# GFX11: v_div_fixup_f32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x27,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x27,0xd6,0xff,0x05,0xa4,0x01 - -# GFX11: v_div_fixup_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x27,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x27,0xd6,0x01,0xfe,0xff,0x01 - -# GFX11: v_div_fixup_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x27,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x27,0xd6,0x69,0xd2,0xf8,0x01 - -# GFX11: v_div_fixup_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x27,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x27,0xd6,0x6a,0xf6,0x0c,0x04 - -# GFX11: v_div_fixup_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x27,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -0x05,0x00,0x27,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf - -# GFX11: v_div_fixup_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x27,0xd6,0x7b,0xfa,0xed,0xe1] -0x05,0x07,0x27,0xd6,0x7b,0xfa,0xed,0xe1 - -# GFX11: v_div_fixup_f32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x27,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x27,0xd6,0x7d,0xe0,0xf5,0x01 - -# GFX11: v_div_fixup_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x27,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x01,0x27,0xd6,0x7e,0x82,0xad,0x01 - -# GFX11: v_div_fixup_f32 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x27,0xd6,0x7f,0xf8,0xa8,0xa1] -0x05,0x05,0x27,0xd6,0x7f,0xf8,0xa8,0xa1 - -# GFX11: v_div_fixup_f32 v5, null, exec_lo, -|0xaf123456| ; encoding: [0x05,0x04,0x27,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] -0x05,0x04,0x27,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf - -# GFX11: v_div_fixup_f32 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x27,0xd6,0xc1,0xfe,0xf4,0xc3] -0x05,0x06,0x27,0xd6,0xc1,0xfe,0xf4,0xc3 - -# GFX11: v_div_fixup_f32 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x27,0xd6,0xf0,0xfa,0xc0,0x4b] -0x05,0x00,0x27,0xd6,0xf0,0xfa,0xc0,0x4b - -# GFX11: v_div_fixup_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x27,0xd6,0xfd,0xd4,0x04,0x33] -0x05,0x02,0x27,0xd6,0xfd,0xd4,0x04,0x33 - -# GFX11: v_div_fixup_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x27,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -0xff,0x83,0x27,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf - -# GFX11: v_div_fixup_f64 v[5:6], v[1:2], v[2:3], v[3:4] ; encoding: [0x05,0x00,0x28,0xd6,0x01,0x05,0x0e,0x04] -0x05,0x00,0x28,0xd6,0x01,0x05,0x0e,0x04 - -# GFX11: v_div_fixup_f64 v[5:6], v[254:255], v[254:255], s[6:7] ; encoding: [0x05,0x00,0x28,0xd6,0xfe,0xfd,0x1b,0x00] -0x05,0x00,0x28,0xd6,0xfe,0xfd,0x1b,0x00 - -# GFX11: v_div_fixup_f64 v[5:6], s[2:3], s[4:5], v[254:255] ; encoding: [0x05,0x00,0x28,0xd6,0x02,0x08,0xf8,0x07] -0x05,0x00,0x28,0xd6,0x02,0x08,0xf8,0x07 - -# GFX11: v_div_fixup_f64 v[5:6], -|s[104:105]|, s[104:105], -|s[104:105]| ; encoding: [0x05,0x05,0x28,0xd6,0x68,0xd0,0xa0,0xa1] -0x05,0x05,0x28,0xd6,0x68,0xd0,0xa0,0xa1 - -# GFX11: v_div_fixup_f64 v[5:6], vcc, -|ttmp[14:15]|, -|ttmp[14:15]| ; encoding: [0x05,0x06,0x28,0xd6,0x6a,0xf4,0xe8,0xc1] -0x05,0x06,0x28,0xd6,0x6a,0xf4,0xe8,0xc1 - -# GFX11: v_div_fixup_f64 v[5:6], -|ttmp[14:15]|, 0xaf123456, null ; encoding: [0x05,0x01,0x28,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] -0x05,0x01,0x28,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf - -# GFX11: v_div_fixup_f64 v[5:6], -|exec|, -|src_scc|, -|exec| ; encoding: [0x05,0x07,0x28,0xd6,0x7e,0xfa,0xf9,0xe1] -0x05,0x07,0x28,0xd6,0x7e,0xfa,0xf9,0xe1 - -# GFX11: v_div_fixup_f64 v[5:6], null, 0.5, vcc ; encoding: [0x05,0x00,0x28,0xd6,0x7c,0xe0,0xa9,0x01] -0x05,0x00,0x28,0xd6,0x7c,0xe0,0xa9,0x01 - -# GFX11: v_div_fixup_f64 v[5:6], -1, -1, 0xaf123456 ; encoding: [0x05,0x00,0x28,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x28,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf - -# GFX11: v_div_fixup_f64 v[5:6], 0.5, null, -|src_scc| mul:2 ; encoding: [0x05,0x04,0x28,0xd6,0xf0,0xf8,0xf4,0x8b] -0x05,0x04,0x28,0xd6,0xf0,0xf8,0xf4,0x8b - -# GFX11: v_div_fixup_f64 v[5:6], -|src_scc|, -|exec|, 0.5 mul:4 ; encoding: [0x05,0x03,0x28,0xd6,0xfd,0xfc,0xc0,0x73] -0x05,0x03,0x28,0xd6,0xfd,0xfc,0xc0,0x73 - -# GFX11: v_div_fixup_f64 v[254:255], 0xaf123456, -|vcc|, -1 clamp div:2 ; encoding: [0xfe,0x82,0x28,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] -0xfe,0x82,0x28,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf - -# GFX11: v_div_fmas_f32 v5, v1, 0xaf123456, 0xaf123456 ; encoding: [0x05,0x00,0x37,0xd6,0x01,0xff,0xfd,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x37,0xd6,0x01,0xff,0xfd,0x03,0x56,0x34,0x12,0xaf - -# GFX11: v_div_fmas_f32 v5, v255, src_scc, src_scc ; encoding: [0x05,0x00,0x37,0xd6,0xff,0xfb,0xf5,0x03] -0x05,0x00,0x37,0xd6,0xff,0xfb,0xf5,0x03 - -# GFX11: v_div_fmas_f32 v5, s105, s105, s105 ; encoding: [0x05,0x00,0x37,0xd6,0x69,0xd2,0xa4,0x01] -0x05,0x00,0x37,0xd6,0x69,0xd2,0xa4,0x01 - -# GFX11: v_div_fmas_f32 v5, vcc_lo, v2, v3 ; encoding: [0x05,0x00,0x37,0xd6,0x6a,0x04,0x0e,0x04] -0x05,0x00,0x37,0xd6,0x6a,0x04,0x0e,0x04 - -# GFX11: v_div_fmas_f32 v5, vcc_hi, v255, vcc_hi ; encoding: [0x05,0x00,0x37,0xd6,0x6b,0xfe,0xaf,0x01] -0x05,0x00,0x37,0xd6,0x6b,0xfe,0xaf,0x01 - -# GFX11: v_div_fmas_f32 v5, -|ttmp15|, -|ttmp15|, ttmp15 ; encoding: [0x05,0x03,0x37,0xd6,0x7b,0xf6,0xec,0x61] -0x05,0x03,0x37,0xd6,0x7b,0xf6,0xec,0x61 - -# GFX11: v_div_fmas_f32 v5, m0, 0.5, v255 ; encoding: [0x05,0x00,0x37,0xd6,0x7d,0xe0,0xfd,0x07] -0x05,0x00,0x37,0xd6,0x7d,0xe0,0xfd,0x07 - -# GFX11: v_div_fmas_f32 v5, -|exec_lo|, exec_lo, -|exec_lo| ; encoding: [0x05,0x05,0x37,0xd6,0x7e,0xfc,0xf8,0xa1] -0x05,0x05,0x37,0xd6,0x7e,0xfc,0xf8,0xa1 - -# GFX11: v_div_fmas_f32 v5, -|exec_hi|, -|exec_hi|, -|exec_hi| ; encoding: [0x05,0x07,0x37,0xd6,0x7f,0xfe,0xfc,0xe1] -0x05,0x07,0x37,0xd6,0x7f,0xfe,0xfc,0xe1 - -# GFX11: v_div_fmas_f32 v5, null, m0, -|m0| ; encoding: [0x05,0x04,0x37,0xd6,0x7c,0xfa,0xf4,0x81] -0x05,0x04,0x37,0xd6,0x7c,0xfa,0xf4,0x81 - -# GFX11: v_div_fmas_f32 v5, -1, -|vcc_lo|, -|vcc_lo| ; encoding: [0x05,0x06,0x37,0xd6,0xc1,0xd4,0xa8,0xc1] -0x05,0x06,0x37,0xd6,0xc1,0xd4,0xa8,0xc1 - -# GFX11: v_div_fmas_f32 v5, 0.5, -|vcc_hi|, 0.5 mul:2 ; encoding: [0x05,0x02,0x37,0xd6,0xf0,0xd6,0xc0,0x4b] -0x05,0x02,0x37,0xd6,0xf0,0xd6,0xc0,0x4b - -# GFX11: v_div_fmas_f32 v5, src_scc, -1, -1 mul:4 ; encoding: [0x05,0x00,0x37,0xd6,0xfd,0x82,0x05,0x13] -0x05,0x00,0x37,0xd6,0xfd,0x82,0x05,0x13 - -# GFX11: v_div_fmas_f32 v255, -|0xaf123456|, null, null clamp div:2 ; encoding: [0xff,0x81,0x37,0xd6,0xff,0xf8,0xf0,0x39,0x56,0x34,0x12,0xaf] -0xff,0x81,0x37,0xd6,0xff,0xf8,0xf0,0x39,0x56,0x34,0x12,0xaf - -# GFX11: v_div_fmas_f64 v[5:6], v[1:2], 0xaf123456, 0xaf123456 ; encoding: [0x05,0x00,0x38,0xd6,0x01,0xff,0xfd,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x38,0xd6,0x01,0xff,0xfd,0x03,0x56,0x34,0x12,0xaf - -# GFX11: v_div_fmas_f64 v[5:6], v[254:255], src_scc, v[3:4] ; encoding: [0x05,0x00,0x38,0xd6,0xfe,0xfb,0x0d,0x04] -0x05,0x00,0x38,0xd6,0xfe,0xfb,0x0d,0x04 - -# GFX11: v_div_fmas_f64 v[5:6], s[104:105], |s[104:105]|, s[104:105] ; encoding: [0x05,0x02,0x38,0xd6,0x68,0xd0,0xa0,0x01] -0x05,0x02,0x38,0xd6,0x68,0xd0,0xa0,0x01 - -# GFX11: v_div_fmas_f64 v[5:6], -|vcc|, v[2:3], -|v[254:255]| ; encoding: [0x05,0x05,0x38,0xd6,0x6a,0x04,0xfa,0xa7] -0x05,0x05,0x38,0xd6,0x6a,0x04,0xfa,0xa7 - -# GFX11: v_div_fmas_f64 v[5:6], -|ttmp[14:15]|, -|ttmp[14:15]|, -|ttmp[14:15]| ; encoding: [0x05,0x07,0x38,0xd6,0x7a,0xf4,0xe8,0xe1] -0x05,0x07,0x38,0xd6,0x7a,0xf4,0xe8,0xe1 - -# GFX11: v_div_fmas_f64 v[5:6], -|exec|, -|v[254:255]|, null ; encoding: [0x05,0x03,0x38,0xd6,0x7e,0xfc,0xf3,0x61] -0x05,0x03,0x38,0xd6,0x7e,0xfc,0xf3,0x61 - -# GFX11: v_div_fmas_f64 v[5:6], null, 0.5, -src_scc ; encoding: [0x05,0x00,0x38,0xd6,0x7c,0xe0,0xf5,0x83] -0x05,0x00,0x38,0xd6,0x7c,0xe0,0xf5,0x83 - -# GFX11: v_div_fmas_f64 v[5:6], -1, -exec, |exec| ; encoding: [0x05,0x04,0x38,0xd6,0xc1,0xfc,0xf8,0x41] -0x05,0x04,0x38,0xd6,0xc1,0xfc,0xf8,0x41 - -# GFX11: v_div_fmas_f64 v[5:6], 0.5, -|vcc|, -|vcc| mul:2 ; encoding: [0x05,0x06,0x38,0xd6,0xf0,0xd4,0xa8,0xc9] -0x05,0x06,0x38,0xd6,0xf0,0xd4,0xa8,0xc9 - -# GFX11: v_div_fmas_f64 v[5:6], -|src_scc|, -1, 0.5 mul:4 ; encoding: [0x05,0x01,0x38,0xd6,0xfd,0x82,0xc1,0x33] -0x05,0x01,0x38,0xd6,0xfd,0x82,0xc1,0x33 - -# GFX11: v_div_fmas_f64 v[254:255], 0xaf123456, null, -1 clamp div:2 ; encoding: [0xfe,0x80,0x38,0xd6,0xff,0xf8,0x04,0x1b,0x56,0x34,0x12,0xaf] -0xfe,0x80,0x38,0xd6,0xff,0xf8,0x04,0x1b,0x56,0x34,0x12,0xaf - -# W32: v_div_scale_f32 v5, vcc_lo, v1, v2, s3 ; encoding: [0x05,0x6a,0xfc,0xd6,0x01,0x05,0x0e,0x00] -# W64: v_div_scale_f32 v5, vcc, v1, v2, s3 ; encoding: [0x05,0x6a,0xfc,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x6a,0xfc,0xd6,0x01,0x05,0x0e,0x00 - -# W32: v_div_scale_f32 v5, vcc_lo, v255, s2, s105 ; encoding: [0x05,0x6a,0xfc,0xd6,0xff,0x05,0xa4,0x01] -# W64: v_div_scale_f32 v5, vcc, v255, s2, s105 ; encoding: [0x05,0x6a,0xfc,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x6a,0xfc,0xd6,0xff,0x05,0xa4,0x01 - -# W32: v_div_scale_f32 v5, vcc_lo, s1, v255, exec_hi ; encoding: [0x05,0x6a,0xfc,0xd6,0x01,0xfe,0xff,0x01] -# W64: v_div_scale_f32 v5, vcc, s1, v255, exec_hi ; encoding: [0x05,0x6a,0xfc,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x6a,0xfc,0xd6,0x01,0xfe,0xff,0x01 - -# W32: v_div_scale_f32 v5, vcc_lo, s105, s105, exec_lo ; encoding: [0x05,0x6a,0xfc,0xd6,0x69,0xd2,0xf8,0x01] -# W64: v_div_scale_f32 v5, vcc, s105, s105, exec_lo ; encoding: [0x05,0x6a,0xfc,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x6a,0xfc,0xd6,0x69,0xd2,0xf8,0x01 - -# W32: v_div_scale_f32 v5, vcc_lo, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x6a,0xfc,0xd6,0x6a,0xf6,0x0c,0x04] -# W64: v_div_scale_f32 v5, vcc, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x6a,0xfc,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x6a,0xfc,0xd6,0x6a,0xf6,0x0c,0x04 - -# W32: v_div_scale_f32 v5, vcc_lo, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x6a,0xfc,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -# W64: v_div_scale_f32 v5, vcc, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x6a,0xfc,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -0x05,0x6a,0xfc,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf - -# W32: v_div_scale_f32 v5, vcc_lo, -ttmp15, -src_scc, -ttmp15 ; encoding: [0x05,0x6a,0xfc,0xd6,0x7b,0xfa,0xed,0xe1] -# W64: v_div_scale_f32 v5, vcc, -ttmp15, -src_scc, -ttmp15 ; encoding: [0x05,0x6a,0xfc,0xd6,0x7b,0xfa,0xed,0xe1] -0x05,0x6a,0xfc,0xd6,0x7b,0xfa,0xed,0xe1 - -# W32: v_div_scale_f32 v5, vcc_lo, m0, 0.5, m0 ; encoding: [0x05,0x6a,0xfc,0xd6,0x7d,0xe0,0xf5,0x01] -# W64: v_div_scale_f32 v5, vcc, m0, 0.5, m0 ; encoding: [0x05,0x6a,0xfc,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x6a,0xfc,0xd6,0x7d,0xe0,0xf5,0x01 - -# W32: v_div_scale_f32 v5, vcc_lo, exec_lo, -1, vcc_hi ; encoding: [0x05,0x6a,0xfc,0xd6,0x7e,0x82,0xad,0x01] -# W64: v_div_scale_f32 v5, vcc, exec_lo, -1, vcc_hi ; encoding: [0x05,0x6a,0xfc,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x6a,0xfc,0xd6,0x7e,0x82,0xad,0x01 - -# W32: v_div_scale_f32 v5, vcc_lo, -exec_hi, null, -vcc_lo ; encoding: [0x05,0x6a,0xfc,0xd6,0x7f,0xf8,0xa8,0xa1] -# W64: v_div_scale_f32 v5, vcc, -exec_hi, null, -vcc_lo ; encoding: [0x05,0x6a,0xfc,0xd6,0x7f,0xf8,0xa8,0xa1] -0x05,0x6a,0xfc,0xd6,0x7f,0xf8,0xa8,0xa1 - -# W32: v_div_scale_f32 v5, vcc_lo, null, exec_lo, neg(0xaf123456) ; encoding: [0x05,0x6a,0xfc,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] -# W64: v_div_scale_f32 v5, vcc, null, exec_lo, neg(0xaf123456) ; encoding: [0x05,0x6a,0xfc,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] -0x05,0x6a,0xfc,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf - -# W32: v_div_scale_f32 v5, vcc_lo, -1, -exec_hi, -src_scc ; encoding: [0x05,0x6a,0xfc,0xd6,0xc1,0xfe,0xf4,0xc3] -# W64: v_div_scale_f32 v5, vcc, -1, -exec_hi, -src_scc ; encoding: [0x05,0x6a,0xfc,0xd6,0xc1,0xfe,0xf4,0xc3] -0x05,0x6a,0xfc,0xd6,0xc1,0xfe,0xf4,0xc3 - -# W32: v_div_scale_f32 v5, vcc_lo, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x6a,0xfc,0xd6,0xf0,0xfa,0xc0,0x4b] -# W64: v_div_scale_f32 v5, vcc, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x6a,0xfc,0xd6,0xf0,0xfa,0xc0,0x4b] -0x05,0x6a,0xfc,0xd6,0xf0,0xfa,0xc0,0x4b - -# W32: v_div_scale_f32 v5, vcc_lo, -src_scc, vcc_lo, -1 mul:4 ; encoding: [0x05,0x6a,0xfc,0xd6,0xfd,0xd4,0x04,0x33] -# W64: v_div_scale_f32 v5, vcc, -src_scc, vcc_lo, -1 mul:4 ; encoding: [0x05,0x6a,0xfc,0xd6,0xfd,0xd4,0x04,0x33] -0x05,0x6a,0xfc,0xd6,0xfd,0xd4,0x04,0x33 - -# W32: v_div_scale_f32 v255, vcc_lo, neg(0xaf123456), -vcc_hi, null clamp div:2 ; encoding: [0xff,0xea,0xfc,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -# W64: v_div_scale_f32 v255, vcc, neg(0xaf123456), -vcc_hi, null clamp div:2 ; encoding: [0xff,0xea,0xfc,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -0xff,0xea,0xfc,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf - -# W32: v_div_scale_f64 v[5:6], vcc_lo, v[1:2], v[2:3], v[3:4] ; encoding: [0x05,0x6a,0xfd,0xd6,0x01,0x05,0x0e,0x04] -# W64: v_div_scale_f64 v[5:6], vcc, v[1:2], v[2:3], v[3:4] ; encoding: [0x05,0x6a,0xfd,0xd6,0x01,0x05,0x0e,0x04] -0x05,0x6a,0xfd,0xd6,0x01,0x05,0x0e,0x04 - -# W32: v_div_scale_f64 v[5:6], vcc_lo, v[254:255], v[254:255], s[6:7] ; encoding: [0x05,0x6a,0xfd,0xd6,0xfe,0xfd,0x1b,0x00] -# W64: v_div_scale_f64 v[5:6], vcc, v[254:255], v[254:255], s[6:7] ; encoding: [0x05,0x6a,0xfd,0xd6,0xfe,0xfd,0x1b,0x00] -0x05,0x6a,0xfd,0xd6,0xfe,0xfd,0x1b,0x00 - -# W32: v_div_scale_f64 v[5:6], vcc_lo, s[2:3], s[4:5], v[254:255] ; encoding: [0x05,0x6a,0xfd,0xd6,0x02,0x08,0xf8,0x07] -# W64: v_div_scale_f64 v[5:6], vcc, s[2:3], s[4:5], v[254:255] ; encoding: [0x05,0x6a,0xfd,0xd6,0x02,0x08,0xf8,0x07] -0x05,0x6a,0xfd,0xd6,0x02,0x08,0xf8,0x07 - -# W32: v_div_scale_f64 v[5:6], vcc_lo, -s[104:105], s[104:105], -s[104:105] ; encoding: [0x05,0x6a,0xfd,0xd6,0x68,0xd0,0xa0,0xa1] -# W64: v_div_scale_f64 v[5:6], vcc, -s[104:105], s[104:105], -s[104:105] ; encoding: [0x05,0x6a,0xfd,0xd6,0x68,0xd0,0xa0,0xa1] -0x05,0x6a,0xfd,0xd6,0x68,0xd0,0xa0,0xa1 - -# W32: v_div_scale_f64 v[5:6], vcc_lo, vcc, -ttmp[14:15], -ttmp[14:15] ; encoding: [0x05,0x6a,0xfd,0xd6,0x6a,0xf4,0xe8,0xc1] -# W64: v_div_scale_f64 v[5:6], vcc, vcc, -ttmp[14:15], -ttmp[14:15] ; encoding: [0x05,0x6a,0xfd,0xd6,0x6a,0xf4,0xe8,0xc1] -0x05,0x6a,0xfd,0xd6,0x6a,0xf4,0xe8,0xc1 - -# W32: v_div_scale_f64 v[5:6], vcc_lo, -ttmp[14:15], 0xaf123456, null ; encoding: [0x05,0x6a,0xfd,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] -# W64: v_div_scale_f64 v[5:6], vcc, -ttmp[14:15], 0xaf123456, null ; encoding: [0x05,0x6a,0xfd,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] -0x05,0x6a,0xfd,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf - -# W32: v_div_scale_f64 v[5:6], vcc_lo, -exec, -src_scc, -exec ; encoding: [0x05,0x6a,0xfd,0xd6,0x7e,0xfa,0xf9,0xe1] -# W64: v_div_scale_f64 v[5:6], vcc, -exec, -src_scc, -exec ; encoding: [0x05,0x6a,0xfd,0xd6,0x7e,0xfa,0xf9,0xe1] -0x05,0x6a,0xfd,0xd6,0x7e,0xfa,0xf9,0xe1 - -# W32: v_div_scale_f64 v[5:6], vcc_lo, null, 0.5, vcc ; encoding: [0x05,0x6a,0xfd,0xd6,0x7c,0xe0,0xa9,0x01] -# W64: v_div_scale_f64 v[5:6], vcc, null, 0.5, vcc ; encoding: [0x05,0x6a,0xfd,0xd6,0x7c,0xe0,0xa9,0x01] -0x05,0x6a,0xfd,0xd6,0x7c,0xe0,0xa9,0x01 - -# W32: v_div_scale_f64 v[5:6], vcc_lo, -1, -1, 0xaf123456 ; encoding: [0x05,0x6a,0xfd,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] -# W64: v_div_scale_f64 v[5:6], vcc, -1, -1, 0xaf123456 ; encoding: [0x05,0x6a,0xfd,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] -0x05,0x6a,0xfd,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf - -# W32: v_div_scale_f64 v[5:6], vcc_lo, 0.5, null, -src_scc mul:2 ; encoding: [0x05,0x6a,0xfd,0xd6,0xf0,0xf8,0xf4,0x8b] -# W64: v_div_scale_f64 v[5:6], vcc, 0.5, null, -src_scc mul:2 ; encoding: [0x05,0x6a,0xfd,0xd6,0xf0,0xf8,0xf4,0x8b] -0x05,0x6a,0xfd,0xd6,0xf0,0xf8,0xf4,0x8b - -# W32: v_div_scale_f64 v[5:6], vcc_lo, -src_scc, -exec, 0.5 mul:4 ; encoding: [0x05,0x6a,0xfd,0xd6,0xfd,0xfc,0xc0,0x73] -# W64: v_div_scale_f64 v[5:6], vcc, -src_scc, -exec, 0.5 mul:4 ; encoding: [0x05,0x6a,0xfd,0xd6,0xfd,0xfc,0xc0,0x73] -0x05,0x6a,0xfd,0xd6,0xfd,0xfc,0xc0,0x73 - -# W32: v_div_scale_f64 v[254:255], vcc_lo, 0xaf123456, -vcc, -1 clamp div:2 ; encoding: [0xfe,0xea,0xfd,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] -# W64: v_div_scale_f64 v[254:255], vcc, 0xaf123456, -vcc, -1 clamp div:2 ; encoding: [0xfe,0xea,0xfd,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] -0xfe,0xea,0xfd,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf - -# GFX11: v_dot2_bf16_bf16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x67,0xd6,0x01,0x05,0x0e,0x00 - -# GFX11: v_dot2_bf16_bf16 v5, v255, v255, s105 ; encoding: [0x05,0x00,0x67,0xd6,0xff,0xff,0xa7,0x01] -0x05,0x00,0x67,0xd6,0xff,0xff,0xa7,0x01 - -# GFX11: v_dot2_bf16_bf16 v5, s1, s2, v3 ; encoding: [0x05,0x00,0x67,0xd6,0x01,0x04,0x0c,0x04] -0x05,0x00,0x67,0xd6,0x01,0x04,0x0c,0x04 - -# GFX11: v_dot2_bf16_bf16 v5, s105, s105, m0 ; encoding: [0x05,0x00,0x67,0xd6,0x69,0xd2,0xf4,0x01] -0x05,0x00,0x67,0xd6,0x69,0xd2,0xf4,0x01 - -# GFX11: v_dot2_bf16_bf16 v5, vcc_lo, ttmp15, v255 ; encoding: [0x05,0x00,0x67,0xd6,0x6a,0xf6,0xfc,0x07] -0x05,0x00,0x67,0xd6,0x6a,0xf6,0xfc,0x07 - -# GFX11: v_dot2_bf16_bf16 v5, vcc_hi, 0xfe0b, vcc_hi ; encoding: [0x05,0x00,0x67,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x67,0xd6,0x6b,0xfe,0xad,0x01,0x0b,0xfe,0x00,0x00 - -# GFX11: v_dot2_bf16_bf16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x67,0xd6,0x7b,0xfa,0xed,0x01] -0x05,0x00,0x67,0xd6,0x7b,0xfa,0xed,0x01 - -# GFX11: v_dot2_bf16_bf16 v5, |m0|, -1, -vcc_lo ; encoding: [0x05,0x01,0x67,0xd6,0x7d,0x82,0xa9,0x81] -0x05,0x01,0x67,0xd6,0x7d,0x82,0xa9,0x81 - -# GFX11: v_dot2_bf16_bf16 v5, -|exec_lo|, null, -|0xfe0b| ; encoding: [0x05,0x05,0x67,0xd6,0x7e,0xf8,0xfc,0xa3,0x0b,0xfe,0x00,0x00] -0x05,0x05,0x67,0xd6,0x7e,0xf8,0xfc,0xa3,0x0b,0xfe,0x00,0x00 - -# GFX11: v_dot2_bf16_bf16 v5, -|exec_hi|, -|exec_lo|, -|exec_lo| ; encoding: [0x05,0x07,0x67,0xd6,0x7f,0xfc,0xf8,0xe1] -0x05,0x07,0x67,0xd6,0x7f,0xfc,0xf8,0xe1 - -# GFX11: v_dot2_bf16_bf16 v5, null, -exec_hi, |src_scc| ; encoding: [0x05,0x04,0x67,0xd6,0x7c,0xfe,0xf4,0x43] -0x05,0x04,0x67,0xd6,0x7c,0xfe,0xf4,0x43 - -# GFX11: v_dot2_bf16_bf16 v5, -1, -|m0|, -|exec_hi| ; encoding: [0x05,0x06,0x67,0xd6,0xc1,0xfa,0xfc,0xc1] -0x05,0x06,0x67,0xd6,0xc1,0xfa,0xfc,0xc1 - -# GFX11: v_dot2_bf16_bf16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x67,0xd6,0xfd,0xd4,0x04,0x23] -0x05,0x22,0x67,0xd6,0xfd,0xd4,0x04,0x23 - -# GFX11: v_dot2_bf16_bf16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] -0xff,0x43,0x67,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 - -# GFX11: v_dot2_f16_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x66,0xd6,0x01,0x05,0x0e,0x00 - -# GFX11: v_dot2_f16_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x66,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x66,0xd6,0xff,0x05,0xa4,0x01 - -# GFX11: v_dot2_f16_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x66,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x66,0xd6,0x01,0xfe,0xff,0x01 - -# GFX11: v_dot2_f16_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x66,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x66,0xd6,0x69,0xd2,0xf8,0x01 - -# GFX11: v_dot2_f16_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x66,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x66,0xd6,0x6a,0xf6,0x0c,0x04 - -# GFX11: v_dot2_f16_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x66,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 - -# GFX11: v_dot2_f16_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x66,0xd6,0x7b,0xfa,0xed,0xe1] -0x05,0x07,0x66,0xd6,0x7b,0xfa,0xed,0xe1 - -# GFX11: v_dot2_f16_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x66,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x66,0xd6,0x7d,0xe0,0xf5,0x01 - -# GFX11: v_dot2_f16_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x66,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x01,0x66,0xd6,0x7e,0x82,0xad,0x01 - -# GFX11: v_dot2_f16_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x66,0xd6,0x7f,0xf8,0xa8,0xa1] -0x05,0x05,0x66,0xd6,0x7f,0xf8,0xa8,0xa1 - -# GFX11: v_dot2_f16_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x66,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] -0x05,0x04,0x66,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00 - -# GFX11: v_dot2_f16_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x66,0xd6,0xc1,0xfe,0xf4,0xc3] -0x05,0x06,0x66,0xd6,0xc1,0xfe,0xf4,0xc3 - -# GFX11: v_dot2_f16_f16 v5, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x66,0xd6,0xf0,0xfa,0xc0,0x43] -0x05,0x00,0x66,0xd6,0xf0,0xfa,0xc0,0x43 - -# GFX11: v_dot2_f16_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x66,0xd6,0xfd,0xd4,0x04,0x23] -0x05,0x22,0x66,0xd6,0xfd,0xd4,0x04,0x23 - -# GFX11: v_dot2_f16_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] ; encoding: [0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] -0xff,0x43,0x66,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 - -# GFX11: v_exp_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xd8,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xd8,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_exp_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xd8,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xd8,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_exp_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xd8,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xd8,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_exp_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xd8,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xd8,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_exp_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd8,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xd8,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_exp_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd8,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xd8,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_exp_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd8,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xd8,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_exp_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xd8,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xd8,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_exp_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd8,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xd8,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_exp_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd8,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xd8,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_exp_f16_e64 v5, null ; encoding: [0x05,0x00,0xd8,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xd8,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_exp_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xd8,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xd8,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_exp_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xd8,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xd8,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_exp_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xd8,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xd8,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_exp_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xd8,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] -0xff,0x81,0xd8,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 - -# GFX11: v_exp_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xa5,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xa5,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_exp_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xa5,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xa5,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_exp_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xa5,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xa5,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_exp_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xa5,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xa5,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_exp_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xa5,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xa5,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_exp_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xa5,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xa5,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_exp_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xa5,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xa5,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_exp_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xa5,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xa5,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_exp_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xa5,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xa5,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_exp_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xa5,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xa5,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_exp_f32_e64 v5, null ; encoding: [0x05,0x00,0xa5,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xa5,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_exp_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xa5,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xa5,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_exp_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xa5,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xa5,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_exp_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xa5,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xa5,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_exp_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xa5,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] -0xff,0x81,0xa5,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf - -# GFX11: v_floor_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xdb,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xdb,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_floor_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xdb,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xdb,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_floor_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xdb,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xdb,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_floor_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xdb,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xdb,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_floor_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xdb,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xdb,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_floor_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xdb,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xdb,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_floor_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xdb,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xdb,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_floor_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xdb,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xdb,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_floor_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xdb,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xdb,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_floor_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xdb,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xdb,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_floor_f16_e64 v5, null ; encoding: [0x05,0x00,0xdb,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xdb,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_floor_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xdb,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xdb,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_floor_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xdb,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xdb,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_floor_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xdb,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xdb,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_floor_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xdb,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] -0xff,0x81,0xdb,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 - -# GFX11: v_floor_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xa4,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xa4,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_floor_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xa4,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xa4,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_floor_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xa4,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xa4,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_floor_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xa4,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xa4,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_floor_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xa4,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xa4,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_floor_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xa4,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xa4,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_floor_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xa4,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xa4,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_floor_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xa4,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xa4,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_floor_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xa4,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xa4,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_floor_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xa4,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xa4,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_floor_f32_e64 v5, null ; encoding: [0x05,0x00,0xa4,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xa4,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_floor_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xa4,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xa4,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_floor_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xa4,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xa4,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_floor_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xa4,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xa4,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_floor_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xa4,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] -0xff,0x81,0xa4,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf - -# GFX11: v_floor_f64_e64 v[5:6], v[1:2] ; encoding: [0x05,0x00,0x9a,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x9a,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_floor_f64_e64 v[5:6], v[254:255] ; encoding: [0x05,0x00,0x9a,0xd5,0xfe,0x01,0x00,0x00] -0x05,0x00,0x9a,0xd5,0xfe,0x01,0x00,0x00 - -# GFX11: v_floor_f64_e64 v[5:6], s[2:3] ; encoding: [0x05,0x00,0x9a,0xd5,0x02,0x00,0x00,0x00] -0x05,0x00,0x9a,0xd5,0x02,0x00,0x00,0x00 - -# GFX11: v_floor_f64_e64 v[5:6], s[104:105] ; encoding: [0x05,0x00,0x9a,0xd5,0x68,0x00,0x00,0x00] -0x05,0x00,0x9a,0xd5,0x68,0x00,0x00,0x00 - -# GFX11: v_floor_f64_e64 v[5:6], vcc ; encoding: [0x05,0x00,0x9a,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x9a,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_floor_f64_e64 v[5:6], ttmp[14:15] ; encoding: [0x05,0x00,0x9a,0xd5,0x7a,0x00,0x00,0x00] -0x05,0x00,0x9a,0xd5,0x7a,0x00,0x00,0x00 - -# GFX11: v_floor_f64_e64 v[5:6], exec ; encoding: [0x05,0x00,0x9a,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x9a,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_floor_f64_e64 v[5:6], null ; encoding: [0x05,0x00,0x9a,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x9a,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_floor_f64_e64 v[5:6], -1 ; encoding: [0x05,0x00,0x9a,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x9a,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_floor_f64_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0x9a,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0x9a,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_floor_f64_e64 v[5:6], -|src_scc| mul:4 ; encoding: [0x05,0x01,0x9a,0xd5,0xfd,0x00,0x00,0x30] -0x05,0x01,0x9a,0xd5,0xfd,0x00,0x00,0x30 - -# GFX11: v_floor_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0x9a,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] -0xfe,0x80,0x9a,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf - -# GFX11: v_fma_dx9_zero_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x09,0xd6,0x01,0x05,0x0e,0x00 - -# GFX11: v_fma_dx9_zero_f32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x09,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x09,0xd6,0xff,0x05,0xa4,0x01 - -# GFX11: v_fma_dx9_zero_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x09,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x09,0xd6,0x01,0xfe,0xff,0x01 - -# GFX11: v_fma_dx9_zero_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x09,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x09,0xd6,0x69,0xd2,0xf8,0x01 - -# GFX11: v_fma_dx9_zero_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x09,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x09,0xd6,0x6a,0xf6,0x0c,0x04 - -# GFX11: v_fma_dx9_zero_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x09,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -0x05,0x00,0x09,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf - -# GFX11: v_fma_dx9_zero_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x09,0xd6,0x7b,0xfa,0xed,0xe1] -0x05,0x07,0x09,0xd6,0x7b,0xfa,0xed,0xe1 - -# GFX11: v_fma_dx9_zero_f32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x09,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x09,0xd6,0x7d,0xe0,0xf5,0x01 - -# GFX11: v_fma_dx9_zero_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x09,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x01,0x09,0xd6,0x7e,0x82,0xad,0x01 - -# GFX11: v_fma_dx9_zero_f32 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x09,0xd6,0x7f,0xf8,0xa8,0xa1] -0x05,0x05,0x09,0xd6,0x7f,0xf8,0xa8,0xa1 - -# GFX11: v_fma_dx9_zero_f32 v5, null, exec_lo, -|0xaf123456| ; encoding: [0x05,0x04,0x09,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] -0x05,0x04,0x09,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf - -# GFX11: v_fma_dx9_zero_f32 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x09,0xd6,0xc1,0xfe,0xf4,0xc3] -0x05,0x06,0x09,0xd6,0xc1,0xfe,0xf4,0xc3 - -# GFX11: v_fma_dx9_zero_f32 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x09,0xd6,0xf0,0xfa,0xc0,0x4b] -0x05,0x00,0x09,0xd6,0xf0,0xfa,0xc0,0x4b - -# GFX11: v_fma_dx9_zero_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x09,0xd6,0xfd,0xd4,0x04,0x33] -0x05,0x02,0x09,0xd6,0xfd,0xd4,0x04,0x33 - -# GFX11: v_fma_dx9_zero_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x09,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -0xff,0x83,0x09,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf - -# GFX11: v_fma_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x48,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x48,0xd6,0x01,0x05,0x0e,0x00 - -# GFX11: v_fma_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x48,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x48,0xd6,0xff,0x05,0xa4,0x01 - -# GFX11: v_fma_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x48,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x48,0xd6,0x01,0xfe,0xff,0x01 - -# GFX11: v_fma_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x48,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x48,0xd6,0x69,0xd2,0xf8,0x01 - -# GFX11: v_fma_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x48,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x48,0xd6,0x6a,0xf6,0x0c,0x04 - -# GFX11: v_fma_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x48,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 - -# GFX11: v_fma_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x48,0xd6,0x7b,0xfa,0xed,0xe1] -0x05,0x07,0x48,0xd6,0x7b,0xfa,0xed,0xe1 - -# GFX11: v_fma_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x48,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x48,0xd6,0x7d,0xe0,0xf5,0x01 - -# GFX11: v_fma_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x48,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x01,0x48,0xd6,0x7e,0x82,0xad,0x01 - -# GFX11: v_fma_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x48,0xd6,0x7f,0xf8,0xa8,0xa1] -0x05,0x05,0x48,0xd6,0x7f,0xf8,0xa8,0xa1 - -# GFX11: v_fma_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x48,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] -0x05,0x7c,0x48,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00 - -# GFX11: v_fma_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x48,0xd6,0xc1,0xfe,0xf4,0xc3] -0x05,0x0e,0x48,0xd6,0xc1,0xfe,0xf4,0xc3 - -# GFX11: v_fma_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x48,0xd6,0xf0,0xfa,0xc0,0x43] -0x05,0x10,0x48,0xd6,0xf0,0xfa,0xc0,0x43 - -# GFX11: v_fma_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x48,0xd6,0xfd,0xd4,0x04,0x23] -0x05,0x22,0x48,0xd6,0xfd,0xd4,0x04,0x23 - -# GFX11: v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] -0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 - -# GFX11: v_fma_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x00 - -# GFX11: v_fma_f32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x13,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x13,0xd6,0xff,0x05,0xa4,0x01 - -# GFX11: v_fma_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x13,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x13,0xd6,0x01,0xfe,0xff,0x01 - -# GFX11: v_fma_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x13,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x13,0xd6,0x69,0xd2,0xf8,0x01 - -# GFX11: v_fma_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x13,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x13,0xd6,0x6a,0xf6,0x0c,0x04 - -# GFX11: v_fma_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x13,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -0x05,0x00,0x13,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf - -# GFX11: v_fma_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x13,0xd6,0x7b,0xfa,0xed,0xe1] -0x05,0x07,0x13,0xd6,0x7b,0xfa,0xed,0xe1 - -# GFX11: v_fma_f32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x13,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x13,0xd6,0x7d,0xe0,0xf5,0x01 - -# GFX11: v_fma_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x13,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x01,0x13,0xd6,0x7e,0x82,0xad,0x01 - -# GFX11: v_fma_f32 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x13,0xd6,0x7f,0xf8,0xa8,0xa1] -0x05,0x05,0x13,0xd6,0x7f,0xf8,0xa8,0xa1 - -# GFX11: v_fma_f32 v5, null, exec_lo, -|0xaf123456| ; encoding: [0x05,0x04,0x13,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] -0x05,0x04,0x13,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf - -# GFX11: v_fma_f32 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x13,0xd6,0xc1,0xfe,0xf4,0xc3] -0x05,0x06,0x13,0xd6,0xc1,0xfe,0xf4,0xc3 - -# GFX11: v_fma_f32 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x13,0xd6,0xf0,0xfa,0xc0,0x4b] -0x05,0x00,0x13,0xd6,0xf0,0xfa,0xc0,0x4b - -# GFX11: v_fma_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x13,0xd6,0xfd,0xd4,0x04,0x33] -0x05,0x02,0x13,0xd6,0xfd,0xd4,0x04,0x33 - -# GFX11: v_fma_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x13,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -0xff,0x83,0x13,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf - -# GFX11: v_fma_f64 v[5:6], v[1:2], v[2:3], v[3:4] ; encoding: [0x05,0x00,0x14,0xd6,0x01,0x05,0x0e,0x04] -0x05,0x00,0x14,0xd6,0x01,0x05,0x0e,0x04 - -# GFX11: v_fma_f64 v[5:6], v[254:255], v[254:255], s[6:7] ; encoding: [0x05,0x00,0x14,0xd6,0xfe,0xfd,0x1b,0x00] -0x05,0x00,0x14,0xd6,0xfe,0xfd,0x1b,0x00 - -# GFX11: v_fma_f64 v[5:6], s[2:3], s[4:5], v[254:255] ; encoding: [0x05,0x00,0x14,0xd6,0x02,0x08,0xf8,0x07] -0x05,0x00,0x14,0xd6,0x02,0x08,0xf8,0x07 - -# GFX11: v_fma_f64 v[5:6], -|s[104:105]|, s[104:105], -|s[104:105]| ; encoding: [0x05,0x05,0x14,0xd6,0x68,0xd0,0xa0,0xa1] -0x05,0x05,0x14,0xd6,0x68,0xd0,0xa0,0xa1 - -# GFX11: v_fma_f64 v[5:6], vcc, -|ttmp[14:15]|, -|ttmp[14:15]| ; encoding: [0x05,0x06,0x14,0xd6,0x6a,0xf4,0xe8,0xc1] -0x05,0x06,0x14,0xd6,0x6a,0xf4,0xe8,0xc1 - -# GFX11: v_fma_f64 v[5:6], -|ttmp[14:15]|, 0xaf123456, null ; encoding: [0x05,0x01,0x14,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf] -0x05,0x01,0x14,0xd6,0x7a,0xfe,0xf1,0x21,0x56,0x34,0x12,0xaf - -# GFX11: v_fma_f64 v[5:6], -|exec|, -|src_scc|, -|exec| ; encoding: [0x05,0x07,0x14,0xd6,0x7e,0xfa,0xf9,0xe1] -0x05,0x07,0x14,0xd6,0x7e,0xfa,0xf9,0xe1 - -# GFX11: v_fma_f64 v[5:6], null, 0.5, vcc ; encoding: [0x05,0x00,0x14,0xd6,0x7c,0xe0,0xa9,0x01] -0x05,0x00,0x14,0xd6,0x7c,0xe0,0xa9,0x01 - -# GFX11: v_fma_f64 v[5:6], -1, -1, 0xaf123456 ; encoding: [0x05,0x00,0x14,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x14,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf - -# GFX11: v_fma_f64 v[5:6], 0.5, null, -|src_scc| mul:2 ; encoding: [0x05,0x04,0x14,0xd6,0xf0,0xf8,0xf4,0x8b] -0x05,0x04,0x14,0xd6,0xf0,0xf8,0xf4,0x8b - -# GFX11: v_fma_f64 v[5:6], -|src_scc|, -|exec|, 0.5 mul:4 ; encoding: [0x05,0x03,0x14,0xd6,0xfd,0xfc,0xc0,0x73] -0x05,0x03,0x14,0xd6,0xfd,0xfc,0xc0,0x73 - -# GFX11: v_fma_f64 v[254:255], 0xaf123456, -|vcc|, -1 clamp div:2 ; encoding: [0xfe,0x82,0x14,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf] -0xfe,0x82,0x14,0xd6,0xff,0xd4,0x04,0x5b,0x56,0x34,0x12,0xaf - -# GFX11: v_fmac_dx9_zero_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x06,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x06,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_fmac_dx9_zero_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x06,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x06,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_fmac_dx9_zero_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x06,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x06,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_fmac_dx9_zero_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x06,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x06,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_fmac_dx9_zero_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x06,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x06,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_fmac_dx9_zero_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x06,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x06,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_fmac_dx9_zero_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x06,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x06,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_fmac_dx9_zero_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x06,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x06,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_fmac_dx9_zero_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x06,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x06,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_fmac_dx9_zero_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x06,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x06,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_fmac_dx9_zero_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x06,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x06,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_fmac_dx9_zero_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x06,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x06,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_fmac_dx9_zero_f32_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x06,0xd5,0xf0,0xfa,0x00,0x48] -0x05,0x00,0x06,0xd5,0xf0,0xfa,0x00,0x48 - -# GFX11: v_fmac_dx9_zero_f32_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x06,0xd5,0xfd,0xd4,0x00,0x30] -0x05,0x02,0x06,0xd5,0xfd,0xd4,0x00,0x30 - -# GFX11: v_fmac_dx9_zero_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x06,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] -0xff,0x83,0x06,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf - -# GFX11: v_fmac_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x36,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x36,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_fmac_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x36,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x36,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_fmac_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x36,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x36,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_fmac_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x36,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x36,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_fmac_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x36,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x36,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_fmac_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x36,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x36,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 - -# GFX11: v_fmac_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x36,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x36,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_fmac_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x36,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x36,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_fmac_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x36,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x36,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_fmac_f16_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x36,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x36,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_fmac_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x36,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x36,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_fmac_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x36,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x36,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_fmac_f16_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x36,0xd5,0xf0,0xfa,0x00,0x48] -0x05,0x00,0x36,0xd5,0xf0,0xfa,0x00,0x48 - -# GFX11: v_fmac_f16_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x36,0xd5,0xfd,0xd4,0x00,0x30] -0x05,0x02,0x36,0xd5,0xfd,0xd4,0x00,0x30 - -# GFX11: v_fmac_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x36,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] -0xff,0x83,0x36,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00 - -# GFX11: v_fmac_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x2b,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x2b,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_fmac_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x2b,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x2b,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_fmac_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x2b,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x2b,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_fmac_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x2b,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x2b,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_fmac_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x2b,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x2b,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_fmac_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x2b,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x2b,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_fmac_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x2b,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x2b,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_fmac_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x2b,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x2b,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_fmac_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x2b,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x2b,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_fmac_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x2b,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x2b,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_fmac_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x2b,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x2b,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_fmac_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x2b,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x2b,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_fmac_f32_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x2b,0xd5,0xf0,0xfa,0x00,0x48] -0x05,0x00,0x2b,0xd5,0xf0,0xfa,0x00,0x48 - -# GFX11: v_fmac_f32_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x2b,0xd5,0xfd,0xd4,0x00,0x30] -0x05,0x02,0x2b,0xd5,0xfd,0xd4,0x00,0x30 - -# GFX11: v_fmac_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x2b,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] -0xff,0x83,0x2b,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf - -# GFX11: v_fract_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xdf,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xdf,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_fract_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xdf,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xdf,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_fract_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xdf,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xdf,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_fract_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xdf,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xdf,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_fract_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xdf,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xdf,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_fract_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xdf,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xdf,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_fract_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xdf,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xdf,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_fract_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xdf,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xdf,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_fract_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xdf,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xdf,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_fract_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xdf,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xdf,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_fract_f16_e64 v5, null ; encoding: [0x05,0x00,0xdf,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xdf,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_fract_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xdf,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xdf,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_fract_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xdf,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xdf,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_fract_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xdf,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xdf,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_fract_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xdf,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] -0xff,0x81,0xdf,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 - -# GFX11: v_fract_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xa0,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xa0,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_fract_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xa0,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xa0,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_fract_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xa0,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xa0,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_fract_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xa0,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xa0,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_fract_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xa0,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xa0,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_fract_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xa0,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xa0,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_fract_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xa0,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xa0,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_fract_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xa0,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xa0,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_fract_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xa0,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xa0,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_fract_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xa0,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xa0,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_fract_f32_e64 v5, null ; encoding: [0x05,0x00,0xa0,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xa0,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_fract_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xa0,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xa0,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_fract_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xa0,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xa0,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_fract_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xa0,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xa0,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_fract_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xa0,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] -0xff,0x81,0xa0,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf - -# GFX11: v_fract_f64_e64 v[5:6], v[1:2] ; encoding: [0x05,0x00,0xbe,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xbe,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_fract_f64_e64 v[5:6], v[254:255] ; encoding: [0x05,0x00,0xbe,0xd5,0xfe,0x01,0x00,0x00] -0x05,0x00,0xbe,0xd5,0xfe,0x01,0x00,0x00 - -# GFX11: v_fract_f64_e64 v[5:6], s[2:3] ; encoding: [0x05,0x00,0xbe,0xd5,0x02,0x00,0x00,0x00] -0x05,0x00,0xbe,0xd5,0x02,0x00,0x00,0x00 - -# GFX11: v_fract_f64_e64 v[5:6], s[104:105] ; encoding: [0x05,0x00,0xbe,0xd5,0x68,0x00,0x00,0x00] -0x05,0x00,0xbe,0xd5,0x68,0x00,0x00,0x00 - -# GFX11: v_fract_f64_e64 v[5:6], vcc ; encoding: [0x05,0x00,0xbe,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xbe,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_fract_f64_e64 v[5:6], ttmp[14:15] ; encoding: [0x05,0x00,0xbe,0xd5,0x7a,0x00,0x00,0x00] -0x05,0x00,0xbe,0xd5,0x7a,0x00,0x00,0x00 - -# GFX11: v_fract_f64_e64 v[5:6], exec ; encoding: [0x05,0x00,0xbe,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xbe,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_fract_f64_e64 v[5:6], null ; encoding: [0x05,0x00,0xbe,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xbe,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_fract_f64_e64 v[5:6], -1 ; encoding: [0x05,0x00,0xbe,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xbe,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_fract_f64_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0xbe,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xbe,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_fract_f64_e64 v[5:6], -|src_scc| mul:4 ; encoding: [0x05,0x01,0xbe,0xd5,0xfd,0x00,0x00,0x30] -0x05,0x01,0xbe,0xd5,0xfd,0x00,0x00,0x30 - -# GFX11: v_fract_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0xbe,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] -0xfe,0x80,0xbe,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf - -# GFX11: v_frexp_exp_i16_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xda,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xda,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_frexp_exp_i16_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xda,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xda,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_frexp_exp_i16_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xda,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xda,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i16_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xda,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xda,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i16_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xda,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xda,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i16_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xda,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xda,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i16_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xda,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xda,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i16_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xda,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xda,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i16_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xda,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xda,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i16_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xda,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xda,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i16_f16_e64 v5, null ; encoding: [0x05,0x00,0xda,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xda,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i16_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xda,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xda,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i16_f16_e64 v5, 0.5 ; encoding: [0x05,0x00,0xda,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0xda,0xd5,0xf0,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i16_f16_e64 v5, src_scc ; encoding: [0x05,0x00,0xda,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0xda,0xd5,0xfd,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i16_f16_e64 v255, -|0xfe0b| ; encoding: [0xff,0x01,0xda,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] -0xff,0x01,0xda,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xbf,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xbf,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xbf,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xbf,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xbf,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xbf,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xbf,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xbf,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xbf,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xbf,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xbf,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xbf,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xbf,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xbf,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xbf,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xbf,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xbf,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xbf,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xbf,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xbf,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f32_e64 v5, null ; encoding: [0x05,0x00,0xbf,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xbf,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xbf,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xbf,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f32_e64 v5, 0.5 ; encoding: [0x05,0x00,0xbf,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0xbf,0xd5,0xf0,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f32_e64 v5, src_scc ; encoding: [0x05,0x00,0xbf,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0xbf,0xd5,0xfd,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f32_e64 v255, -|0xaf123456| ; encoding: [0xff,0x01,0xbf,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] -0xff,0x01,0xbf,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf - -# GFX11: v_frexp_exp_i32_f64_e64 v5, v[1:2] ; encoding: [0x05,0x00,0xbc,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xbc,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f64_e64 v5, v[254:255] ; encoding: [0x05,0x00,0xbc,0xd5,0xfe,0x01,0x00,0x00] -0x05,0x00,0xbc,0xd5,0xfe,0x01,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f64_e64 v5, s[2:3] ; encoding: [0x05,0x00,0xbc,0xd5,0x02,0x00,0x00,0x00] -0x05,0x00,0xbc,0xd5,0x02,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f64_e64 v5, s[104:105] ; encoding: [0x05,0x00,0xbc,0xd5,0x68,0x00,0x00,0x00] -0x05,0x00,0xbc,0xd5,0x68,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f64_e64 v5, vcc ; encoding: [0x05,0x00,0xbc,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xbc,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f64_e64 v5, ttmp[14:15] ; encoding: [0x05,0x00,0xbc,0xd5,0x7a,0x00,0x00,0x00] -0x05,0x00,0xbc,0xd5,0x7a,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f64_e64 v5, exec ; encoding: [0x05,0x00,0xbc,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xbc,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f64_e64 v5, null ; encoding: [0x05,0x00,0xbc,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xbc,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f64_e64 v5, -1 ; encoding: [0x05,0x00,0xbc,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xbc,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f64_e64 v5, 0.5 ; encoding: [0x05,0x00,0xbc,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0xbc,0xd5,0xf0,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f64_e64 v5, -|src_scc| ; encoding: [0x05,0x01,0xbc,0xd5,0xfd,0x00,0x00,0x20] -0x05,0x01,0xbc,0xd5,0xfd,0x00,0x00,0x20 - -# GFX11: v_frexp_exp_i32_f64_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0xbc,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0xbc,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_frexp_mant_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xd9,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xd9,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_frexp_mant_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xd9,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xd9,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_frexp_mant_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xd9,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xd9,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xd9,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xd9,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd9,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xd9,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd9,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xd9,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd9,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xd9,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xd9,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xd9,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd9,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xd9,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd9,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xd9,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f16_e64 v5, null ; encoding: [0x05,0x00,0xd9,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xd9,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xd9,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xd9,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xd9,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xd9,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_frexp_mant_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xd9,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xd9,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_frexp_mant_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xd9,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] -0xff,0x81,0xd9,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 - -# GFX11: v_frexp_mant_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xc0,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xc0,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_frexp_mant_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xc0,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xc0,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_frexp_mant_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xc0,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xc0,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xc0,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xc0,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xc0,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xc0,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xc0,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xc0,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xc0,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xc0,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xc0,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xc0,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xc0,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xc0,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xc0,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xc0,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f32_e64 v5, null ; encoding: [0x05,0x00,0xc0,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xc0,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xc0,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xc0,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xc0,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xc0,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_frexp_mant_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xc0,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xc0,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_frexp_mant_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xc0,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] -0xff,0x81,0xc0,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf - -# GFX11: v_frexp_mant_f64_e64 v[5:6], v[1:2] ; encoding: [0x05,0x00,0xbd,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xbd,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_frexp_mant_f64_e64 v[5:6], v[254:255] ; encoding: [0x05,0x00,0xbd,0xd5,0xfe,0x01,0x00,0x00] -0x05,0x00,0xbd,0xd5,0xfe,0x01,0x00,0x00 - -# GFX11: v_frexp_mant_f64_e64 v[5:6], s[2:3] ; encoding: [0x05,0x00,0xbd,0xd5,0x02,0x00,0x00,0x00] -0x05,0x00,0xbd,0xd5,0x02,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f64_e64 v[5:6], s[104:105] ; encoding: [0x05,0x00,0xbd,0xd5,0x68,0x00,0x00,0x00] -0x05,0x00,0xbd,0xd5,0x68,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f64_e64 v[5:6], vcc ; encoding: [0x05,0x00,0xbd,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xbd,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f64_e64 v[5:6], ttmp[14:15] ; encoding: [0x05,0x00,0xbd,0xd5,0x7a,0x00,0x00,0x00] -0x05,0x00,0xbd,0xd5,0x7a,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f64_e64 v[5:6], exec ; encoding: [0x05,0x00,0xbd,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xbd,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f64_e64 v[5:6], null ; encoding: [0x05,0x00,0xbd,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xbd,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f64_e64 v[5:6], -1 ; encoding: [0x05,0x00,0xbd,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xbd,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f64_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0xbd,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xbd,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_frexp_mant_f64_e64 v[5:6], -|src_scc| mul:4 ; encoding: [0x05,0x01,0xbd,0xd5,0xfd,0x00,0x00,0x30] -0x05,0x01,0xbd,0xd5,0xfd,0x00,0x00,0x30 - -# GFX11: v_frexp_mant_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0xbd,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] -0xfe,0x80,0xbd,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf - -# GFX11: v_ldexp_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x3b,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x3b,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_ldexp_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x3b,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x3b,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_ldexp_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x3b,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x3b,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_ldexp_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x3b,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x3b,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_ldexp_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x3b,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x3b,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_ldexp_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x3b,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x3b,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 - -# GFX11: v_ldexp_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x3b,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x3b,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_ldexp_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x3b,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x3b,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_ldexp_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x3b,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x3b,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_ldexp_f16_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x3b,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x3b,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_ldexp_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x3b,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x3b,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_ldexp_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x3b,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x3b,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_ldexp_f16_e64 v5, 0.5, m0 mul:2 ; encoding: [0x05,0x00,0x3b,0xd5,0xf0,0xfa,0x00,0x08] -0x05,0x00,0x3b,0xd5,0xf0,0xfa,0x00,0x08 - -# GFX11: v_ldexp_f16_e64 v5, src_scc, vcc_lo mul:4 ; encoding: [0x05,0x00,0x3b,0xd5,0xfd,0xd4,0x00,0x10] -0x05,0x00,0x3b,0xd5,0xfd,0xd4,0x00,0x10 - -# GFX11: v_ldexp_f16_e64 v255, -|0xfe0b|, vcc_hi clamp div:2 ; encoding: [0xff,0x81,0x3b,0xd5,0xff,0xd6,0x00,0x38,0x0b,0xfe,0x00,0x00] -0xff,0x81,0x3b,0xd5,0xff,0xd6,0x00,0x38,0x0b,0xfe,0x00,0x00 - -# GFX11: v_ldexp_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x1c,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x1c,0xd7,0x01,0x05,0x02,0x00 - -# GFX11: v_ldexp_f32 v5, v255, v255 ; encoding: [0x05,0x00,0x1c,0xd7,0xff,0xff,0x03,0x00] -0x05,0x00,0x1c,0xd7,0xff,0xff,0x03,0x00 - -# GFX11: v_ldexp_f32 v5, s1, s2 ; encoding: [0x05,0x00,0x1c,0xd7,0x01,0x04,0x00,0x00] -0x05,0x00,0x1c,0xd7,0x01,0x04,0x00,0x00 - -# GFX11: v_ldexp_f32 v5, s105, s105 ; encoding: [0x05,0x00,0x1c,0xd7,0x69,0xd2,0x00,0x00] -0x05,0x00,0x1c,0xd7,0x69,0xd2,0x00,0x00 - -# GFX11: v_ldexp_f32 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x1c,0xd7,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x1c,0xd7,0x6a,0xf6,0x00,0x00 - -# GFX11: v_ldexp_f32 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x1c,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x1c,0xd7,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_ldexp_f32 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x1c,0xd7,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x1c,0xd7,0x7b,0xfa,0x01,0x00 - -# GFX11: v_ldexp_f32 v5, m0, 0.5 ; encoding: [0x05,0x00,0x1c,0xd7,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x1c,0xd7,0x7d,0xe0,0x01,0x00 - -# GFX11: v_ldexp_f32 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x1c,0xd7,0x7e,0x82,0x01,0x00] -0x05,0x00,0x1c,0xd7,0x7e,0x82,0x01,0x00 - -# GFX11: v_ldexp_f32 v5, exec_hi, null ; encoding: [0x05,0x00,0x1c,0xd7,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x1c,0xd7,0x7f,0xf8,0x00,0x00 - -# GFX11: v_ldexp_f32 v5, null, exec_lo ; encoding: [0x05,0x00,0x1c,0xd7,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x1c,0xd7,0x7c,0xfc,0x00,0x00 - -# GFX11: v_ldexp_f32 v5, -1, exec_hi ; encoding: [0x05,0x00,0x1c,0xd7,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x1c,0xd7,0xc1,0xfe,0x00,0x00 - -# GFX11: v_ldexp_f32 v5, 0.5, m0 mul:2 ; encoding: [0x05,0x00,0x1c,0xd7,0xf0,0xfa,0x00,0x08] -0x05,0x00,0x1c,0xd7,0xf0,0xfa,0x00,0x08 - -# GFX11: v_ldexp_f32 v5, src_scc, vcc_lo mul:4 ; encoding: [0x05,0x00,0x1c,0xd7,0xfd,0xd4,0x00,0x10] -0x05,0x00,0x1c,0xd7,0xfd,0xd4,0x00,0x10 - -# GFX11: v_ldexp_f32 v255, -|0xaf123456|, vcc_hi clamp div:2 ; encoding: [0xff,0x81,0x1c,0xd7,0xff,0xd6,0x00,0x38,0x56,0x34,0x12,0xaf] -0xff,0x81,0x1c,0xd7,0xff,0xd6,0x00,0x38,0x56,0x34,0x12,0xaf - -# GFX11: v_ldexp_f64 v[5:6], v[1:2], v2 ; encoding: [0x05,0x00,0x2b,0xd7,0x01,0x05,0x02,0x00] -0x05,0x00,0x2b,0xd7,0x01,0x05,0x02,0x00 - -# GFX11: v_ldexp_f64 v[5:6], v[1:2], v255 ; encoding: [0x05,0x00,0x2b,0xd7,0x01,0xff,0x03,0x00] -0x05,0x00,0x2b,0xd7,0x01,0xff,0x03,0x00 - -# GFX11: v_ldexp_f64 v[5:6], v[1:2], s2 ; encoding: [0x05,0x00,0x2b,0xd7,0x01,0x05,0x00,0x00] -0x05,0x00,0x2b,0xd7,0x01,0x05,0x00,0x00 - -# GFX11: v_ldexp_f64 v[5:6], v[1:2], s105 ; encoding: [0x05,0x00,0x2b,0xd7,0x01,0xd3,0x00,0x00] -0x05,0x00,0x2b,0xd7,0x01,0xd3,0x00,0x00 - -# GFX11: v_ldexp_f64 v[5:6], v[254:255], ttmp15 ; encoding: [0x05,0x00,0x2b,0xd7,0xfe,0xf7,0x00,0x00] -0x05,0x00,0x2b,0xd7,0xfe,0xf7,0x00,0x00 - -# GFX11: v_ldexp_f64 v[5:6], s[2:3], vcc_hi ; encoding: [0x05,0x00,0x2b,0xd7,0x02,0xd6,0x00,0x00] -0x05,0x00,0x2b,0xd7,0x02,0xd6,0x00,0x00 - -# GFX11: v_ldexp_f64 v[5:6], s[104:105], vcc_lo ; encoding: [0x05,0x00,0x2b,0xd7,0x68,0xd4,0x00,0x00] -0x05,0x00,0x2b,0xd7,0x68,0xd4,0x00,0x00 - -# GFX11: v_ldexp_f64 v[5:6], vcc, m0 ; encoding: [0x05,0x00,0x2b,0xd7,0x6a,0xfa,0x00,0x00] -0x05,0x00,0x2b,0xd7,0x6a,0xfa,0x00,0x00 - -# GFX11: v_ldexp_f64 v[5:6], ttmp[14:15], exec_hi ; encoding: [0x05,0x00,0x2b,0xd7,0x7a,0xfe,0x00,0x00] -0x05,0x00,0x2b,0xd7,0x7a,0xfe,0x00,0x00 - -# GFX11: v_ldexp_f64 v[5:6], exec, exec_lo ; encoding: [0x05,0x00,0x2b,0xd7,0x7e,0xfc,0x00,0x00] -0x05,0x00,0x2b,0xd7,0x7e,0xfc,0x00,0x00 - -# GFX11: v_ldexp_f64 v[5:6], null, null ; encoding: [0x05,0x00,0x2b,0xd7,0x7c,0xf8,0x00,0x00] -0x05,0x00,0x2b,0xd7,0x7c,0xf8,0x00,0x00 - -# GFX11: v_ldexp_f64 v[5:6], -1, -1 ; encoding: [0x05,0x00,0x2b,0xd7,0xc1,0x82,0x01,0x00] -0x05,0x00,0x2b,0xd7,0xc1,0x82,0x01,0x00 - -# GFX11: v_ldexp_f64 v[5:6], 0.5, 0.5 mul:2 ; encoding: [0x05,0x00,0x2b,0xd7,0xf0,0xe0,0x01,0x08] -0x05,0x00,0x2b,0xd7,0xf0,0xe0,0x01,0x08 - -# GFX11: v_ldexp_f64 v[5:6], -|src_scc|, src_scc mul:4 ; encoding: [0x05,0x01,0x2b,0xd7,0xfd,0xfa,0x01,0x30] -0x05,0x01,0x2b,0xd7,0xfd,0xfa,0x01,0x30 - -# GFX11: v_ldexp_f64 v[254:255], 0xaf123456, 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0x2b,0xd7,0xff,0xfe,0x01,0x18,0x56,0x34,0x12,0xaf] -0xfe,0x80,0x2b,0xd7,0xff,0xfe,0x01,0x18,0x56,0x34,0x12,0xaf - -# GFX11: v_lerp_u8 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x15,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x15,0xd6,0x01,0x05,0x0e,0x00 - -# GFX11: v_lerp_u8 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x15,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x15,0xd6,0xff,0x05,0xa4,0x01 - -# GFX11: v_lerp_u8 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x15,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x15,0xd6,0x01,0xfe,0xff,0x01 - -# GFX11: v_lerp_u8 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x15,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x15,0xd6,0x69,0xd2,0xf8,0x01 - -# GFX11: v_lerp_u8 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x15,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x15,0xd6,0x6a,0xf6,0x0c,0x04 - -# GFX11: v_lerp_u8 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x15,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -0x05,0x00,0x15,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf - -# GFX11: v_lerp_u8 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x15,0xd6,0x7b,0xfa,0xed,0x01] -0x05,0x00,0x15,0xd6,0x7b,0xfa,0xed,0x01 - -# GFX11: v_lerp_u8 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x15,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x15,0xd6,0x7d,0xe0,0xf5,0x01 - -# GFX11: v_lerp_u8 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x15,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x00,0x15,0xd6,0x7e,0x82,0xad,0x01 - -# GFX11: v_lerp_u8 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x15,0xd6,0x7f,0xf8,0xa8,0x01] -0x05,0x00,0x15,0xd6,0x7f,0xf8,0xa8,0x01 - -# GFX11: v_lerp_u8 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x15,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x15,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf - -# GFX11: v_lerp_u8 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x15,0xd6,0xc1,0xfe,0xf4,0x03] -0x05,0x00,0x15,0xd6,0xc1,0xfe,0xf4,0x03 - -# GFX11: v_lerp_u8 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x15,0xd6,0xf0,0xfa,0xc0,0x03] -0x05,0x00,0x15,0xd6,0xf0,0xfa,0xc0,0x03 +# GFX11: v_lerp_u8 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x15,0xd6,0xf0,0xfa,0xc0,0x03] +0x05,0x00,0x15,0xd6,0xf0,0xfa,0xc0,0x03 # GFX11: v_lerp_u8 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x15,0xd6,0xfd,0xd4,0x04,0x03] 0x05,0x00,0x15,0xd6,0xfd,0xd4,0x04,0x03 @@ -4874,96 +2136,6 @@ # GFX11: v_lerp_u8 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x15,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] 0xff,0x00,0x15,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_log_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xd7,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xd7,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_log_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xd7,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xd7,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_log_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xd7,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xd7,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_log_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xd7,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xd7,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_log_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd7,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xd7,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_log_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd7,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xd7,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_log_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd7,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xd7,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_log_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xd7,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xd7,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_log_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd7,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xd7,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_log_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd7,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xd7,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_log_f16_e64 v5, null ; encoding: [0x05,0x00,0xd7,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xd7,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_log_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xd7,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xd7,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_log_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xd7,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xd7,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_log_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xd7,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xd7,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_log_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xd7,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] -0xff,0x81,0xd7,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 - -# GFX11: v_log_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xa7,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xa7,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_log_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xa7,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xa7,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_log_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xa7,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xa7,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_log_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xa7,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xa7,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_log_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xa7,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xa7,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_log_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xa7,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xa7,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_log_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xa7,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xa7,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_log_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xa7,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xa7,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_log_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xa7,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xa7,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_log_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xa7,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xa7,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_log_f32_e64 v5, null ; encoding: [0x05,0x00,0xa7,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xa7,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_log_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xa7,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xa7,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_log_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xa7,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xa7,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_log_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xa7,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xa7,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_log_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xa7,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] -0xff,0x81,0xa7,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf - # GFX11: v_lshl_add_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x46,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x46,0xd6,0x01,0x05,0x0e,0x00 @@ -5099,51 +2271,6 @@ # GFX11: v_lshlrev_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x38,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0x00,0x38,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_lshlrev_b32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x18,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x18,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_lshlrev_b32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x18,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x18,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_lshlrev_b32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x18,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x18,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_lshlrev_b32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x18,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x18,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_lshlrev_b32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x18,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x18,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_lshlrev_b32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x18,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x18,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_lshlrev_b32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x18,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x18,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_lshlrev_b32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x18,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x18,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_lshlrev_b32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x18,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x18,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_lshlrev_b32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x18,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x18,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_lshlrev_b32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x18,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x18,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_lshlrev_b32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x18,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x18,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_lshlrev_b32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x18,0xd5,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x18,0xd5,0xf0,0xfa,0x00,0x00 - -# GFX11: v_lshlrev_b32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x18,0xd5,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x18,0xd5,0xfd,0xd4,0x00,0x00 - -# GFX11: v_lshlrev_b32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x18,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0x18,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf - # GFX11: v_lshlrev_b64 v[5:6], v1, vcc ; encoding: [0x05,0x00,0x3c,0xd7,0x01,0xd5,0x00,0x00] 0x05,0x00,0x3c,0xd7,0x01,0xd5,0x00,0x00 @@ -5216,51 +2343,6 @@ # GFX11: v_lshrrev_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x39,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0x00,0x39,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_lshrrev_b32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x19,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x19,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_lshrrev_b32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x19,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x19,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_lshrrev_b32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x19,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x19,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_lshrrev_b32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x19,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x19,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_lshrrev_b32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x19,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x19,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_lshrrev_b32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x19,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x19,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_lshrrev_b32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x19,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x19,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_lshrrev_b32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x19,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x19,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_lshrrev_b32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x19,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x19,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_lshrrev_b32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x19,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x19,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_lshrrev_b32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x19,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x19,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_lshrrev_b32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x19,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x19,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_lshrrev_b32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x19,0xd5,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x19,0xd5,0xf0,0xfa,0x00,0x00 - -# GFX11: v_lshrrev_b32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x19,0xd5,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x19,0xd5,0xfd,0xd4,0x00,0x00 - -# GFX11: v_lshrrev_b32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x19,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0x19,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf - # GFX11: v_lshrrev_b64 v[5:6], v1, vcc ; encoding: [0x05,0x00,0x3d,0xd7,0x01,0xd5,0x00,0x00] 0x05,0x00,0x3d,0xd7,0x01,0xd5,0x00,0x00 @@ -5946,96 +3028,6 @@ # GFX11: v_max3_u32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x1e,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] 0xff,0x00,0x1e,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_max_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x39,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x39,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_max_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x39,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x39,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_max_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x39,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x39,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_max_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x39,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x39,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_max_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x39,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x39,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_max_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x39,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x39,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 - -# GFX11: v_max_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x39,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x39,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_max_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x39,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x39,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_max_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x39,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x39,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_max_f16_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x39,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x39,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_max_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x39,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x39,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_max_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x39,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x39,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_max_f16_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x39,0xd5,0xf0,0xfa,0x00,0x48] -0x05,0x00,0x39,0xd5,0xf0,0xfa,0x00,0x48 - -# GFX11: v_max_f16_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x39,0xd5,0xfd,0xd4,0x00,0x30] -0x05,0x02,0x39,0xd5,0xfd,0xd4,0x00,0x30 - -# GFX11: v_max_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x39,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] -0xff,0x83,0x39,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00 - -# GFX11: v_max_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x10,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x10,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_max_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x10,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x10,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_max_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x10,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x10,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_max_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x10,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x10,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_max_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x10,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x10,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_max_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x10,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x10,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_max_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x10,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x10,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_max_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x10,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x10,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_max_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x10,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x10,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_max_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x10,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x10,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_max_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x10,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x10,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_max_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x10,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x10,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_max_f32_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x10,0xd5,0xf0,0xfa,0x00,0x48] -0x05,0x00,0x10,0xd5,0xf0,0xfa,0x00,0x48 - -# GFX11: v_max_f32_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x10,0xd5,0xfd,0xd4,0x00,0x30] -0x05,0x02,0x10,0xd5,0xfd,0xd4,0x00,0x30 - -# GFX11: v_max_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x10,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] -0xff,0x83,0x10,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf - # GFX11: v_max_f64 v[5:6], v[1:2], v[2:3] ; encoding: [0x05,0x00,0x2a,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x2a,0xd7,0x01,0x05,0x02,0x00 @@ -6117,51 +3109,6 @@ # GFX11: v_max_i16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x0a,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0x00,0x0a,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_max_i32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x12,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x12,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_max_i32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x12,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x12,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_max_i32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x12,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x12,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_max_i32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x12,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x12,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_max_i32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x12,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x12,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_max_i32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x12,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x12,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_max_i32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x12,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x12,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_max_i32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x12,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x12,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_max_i32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x12,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x12,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_max_i32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x12,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x12,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_max_i32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x12,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x12,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_max_i32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x12,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x12,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_max_i32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x12,0xd5,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x12,0xd5,0xf0,0xfa,0x00,0x00 - -# GFX11: v_max_i32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x12,0xd5,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x12,0xd5,0xfd,0xd4,0x00,0x00 - -# GFX11: v_max_i32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x12,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0x12,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf - # GFX11: v_max_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x09,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x09,0xd7,0x01,0x05,0x02,0x00 @@ -6207,51 +3154,6 @@ # GFX11: v_max_u16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x09,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0x00,0x09,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_max_u32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x14,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x14,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_max_u32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x14,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x14,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_max_u32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x14,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x14,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_max_u32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x14,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x14,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_max_u32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x14,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x14,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_max_u32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x14,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x14,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_max_u32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x14,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x14,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_max_u32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x14,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x14,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_max_u32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x14,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x14,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_max_u32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x14,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x14,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_max_u32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x14,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x14,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_max_u32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x14,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x14,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_max_u32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x14,0xd5,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x14,0xd5,0xf0,0xfa,0x00,0x00 - -# GFX11: v_max_u32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x14,0xd5,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x14,0xd5,0xfd,0xd4,0x00,0x00 - -# GFX11: v_max_u32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x14,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0x14,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf - # GFX11: v_maxmin_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x60,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x60,0xd6,0x01,0x05,0x0e,0x00 @@ -7062,96 +3964,6 @@ # GFX11: v_min3_u32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x1b,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] 0xff,0x00,0x1b,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_min_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x3a,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x3a,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_min_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x3a,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x3a,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_min_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x3a,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x3a,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_min_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x3a,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x3a,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_min_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x3a,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x3a,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_min_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x3a,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x3a,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 - -# GFX11: v_min_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x3a,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x3a,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_min_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x3a,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x3a,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_min_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x3a,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x3a,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_min_f16_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x3a,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x3a,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_min_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x3a,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x3a,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_min_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x3a,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x3a,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_min_f16_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x3a,0xd5,0xf0,0xfa,0x00,0x48] -0x05,0x00,0x3a,0xd5,0xf0,0xfa,0x00,0x48 - -# GFX11: v_min_f16_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x3a,0xd5,0xfd,0xd4,0x00,0x30] -0x05,0x02,0x3a,0xd5,0xfd,0xd4,0x00,0x30 - -# GFX11: v_min_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x3a,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] -0xff,0x83,0x3a,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00 - -# GFX11: v_min_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x0f,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x0f,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_min_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x0f,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x0f,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_min_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x0f,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x0f,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_min_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x0f,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x0f,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_min_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0f,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x0f,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_min_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x0f,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x0f,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_min_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0f,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x0f,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_min_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x0f,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x0f,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_min_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0f,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x0f,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_min_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x0f,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x0f,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_min_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x0f,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x0f,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_min_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x0f,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x0f,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_min_f32_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x0f,0xd5,0xf0,0xfa,0x00,0x48] -0x05,0x00,0x0f,0xd5,0xf0,0xfa,0x00,0x48 - -# GFX11: v_min_f32_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x0f,0xd5,0xfd,0xd4,0x00,0x30] -0x05,0x02,0x0f,0xd5,0xfd,0xd4,0x00,0x30 - -# GFX11: v_min_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x0f,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] -0xff,0x83,0x0f,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf - # GFX11: v_min_f64 v[5:6], v[1:2], v[2:3] ; encoding: [0x05,0x00,0x29,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x29,0xd7,0x01,0x05,0x02,0x00 @@ -7233,51 +4045,6 @@ # GFX11: v_min_i16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x0c,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0x00,0x0c,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_min_i32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x11,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x11,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_min_i32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x11,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x11,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_min_i32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x11,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x11,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_min_i32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x11,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x11,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_min_i32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x11,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x11,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_min_i32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x11,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x11,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_min_i32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x11,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x11,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_min_i32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x11,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x11,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_min_i32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x11,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x11,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_min_i32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x11,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x11,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_min_i32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x11,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x11,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_min_i32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x11,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x11,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_min_i32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x11,0xd5,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x11,0xd5,0xf0,0xfa,0x00,0x00 - -# GFX11: v_min_i32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x11,0xd5,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x11,0xd5,0xfd,0xd4,0x00,0x00 - -# GFX11: v_min_i32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x11,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0x11,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf - # GFX11: v_min_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x0b,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x0b,0xd7,0x01,0x05,0x02,0x00 @@ -7323,51 +4090,6 @@ # GFX11: v_min_u16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x0b,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0x00,0x0b,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_min_u32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x13,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x13,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_min_u32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x13,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x13,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_min_u32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x13,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x13,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_min_u32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x13,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x13,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_min_u32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x13,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x13,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_min_u32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x13,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x13,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_min_u32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x13,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x13,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_min_u32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x13,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x13,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_min_u32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x13,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x13,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_min_u32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x13,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x13,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_min_u32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x13,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x13,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_min_u32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x13,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x13,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_min_u32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x13,0xd5,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x13,0xd5,0xf0,0xfa,0x00,0x00 - -# GFX11: v_min_u32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x13,0xd5,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x13,0xd5,0xfd,0xd4,0x00,0x00 - -# GFX11: v_min_u32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x13,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0x13,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf - # GFX11: v_minmax_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x61,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x61,0xd6,0x01,0x05,0x0e,0x00 @@ -7548,114 +4270,6 @@ # GFX11: v_minmax_u32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x63,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] 0xff,0x00,0x63,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_mov_b32_e64 v5, v1 ; encoding: [0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_mov_b32_e64 v5, v255 ; encoding: [0x05,0x00,0x81,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0x81,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_mov_b32_e64 v5, s1 ; encoding: [0x05,0x00,0x81,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0x81,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_mov_b32_e64 v5, s105 ; encoding: [0x05,0x00,0x81,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0x81,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_mov_b32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x81,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x81,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_mov_b32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x81,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0x81,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_mov_b32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x81,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0x81,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_mov_b32_e64 v5, m0 ; encoding: [0x05,0x00,0x81,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0x81,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_mov_b32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x81,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x81,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_mov_b32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x81,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0x81,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_mov_b32_e64 v5, null ; encoding: [0x05,0x00,0x81,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x81,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_mov_b32_e64 v5, -1 ; encoding: [0x05,0x00,0x81,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x81,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_mov_b32_e64 v5, 0.5 ; encoding: [0x05,0x00,0x81,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0x81,0xd5,0xf0,0x00,0x00,0x00 - -# GFX11: v_mov_b32_e64 v5, src_scc ; encoding: [0x05,0x00,0x81,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0x81,0xd5,0xfd,0x00,0x00,0x00 - -# GFX11: v_mov_b32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0x81,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0x81,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_movreld_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_movreld_b32_e64 v5, v255 ; encoding: [0x05,0x00,0xc2,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xc2,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_movreld_b32_e64 v5, s1 ; encoding: [0x05,0x00,0xc2,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xc2,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_movreld_b32_e64 v5, s105 ; encoding: [0x05,0x00,0xc2,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xc2,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_movreld_b32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xc2,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xc2,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_movreld_b32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xc2,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xc2,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_movreld_b32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xc2,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xc2,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_movreld_b32_e64 v5, m0 ; encoding: [0x05,0x00,0xc2,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xc2,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_movreld_b32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xc2,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xc2,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_movreld_b32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xc2,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xc2,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_movreld_b32_e64 v5, null ; encoding: [0x05,0x00,0xc2,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xc2,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_movreld_b32_e64 v5, -1 ; encoding: [0x05,0x00,0xc2,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xc2,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_movreld_b32_e64 v5, 0.5 ; encoding: [0x05,0x00,0xc2,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0xc2,0xd5,0xf0,0x00,0x00,0x00 - -# GFX11: v_movreld_b32_e64 v5, src_scc ; encoding: [0x05,0x00,0xc2,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0xc2,0xd5,0xfd,0x00,0x00,0x00 - -# GFX11: v_movreld_b32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0xc2,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0xc2,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_movrels_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_movrels_b32_e64 v255, v255 ; encoding: [0xff,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00] -0xff,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_movrelsd_2_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_movrelsd_2_b32_e64 v255, v255 ; encoding: [0xff,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00] -0xff,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_movrelsd_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_movrelsd_b32_e64 v255, v255 ; encoding: [0xff,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00] -0xff,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00 - # GFX11: v_mqsad_pk_u16_u8 v[5:6], v[1:2], v2, ttmp[14:15] ; encoding: [0x05,0x00,0x3b,0xd6,0x01,0x05,0xea,0x01] 0x05,0x00,0x3b,0xd6,0x01,0x05,0xea,0x01 @@ -7791,141 +4405,6 @@ # GFX11: v_msad_u8 v255, 0xaf123456, vcc_hi, null clamp ; encoding: [0xff,0x80,0x39,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] 0xff,0x80,0x39,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_mul_dx9_zero_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x07,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x07,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_mul_dx9_zero_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x07,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x07,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_mul_dx9_zero_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x07,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x07,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_mul_dx9_zero_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x07,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x07,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_mul_dx9_zero_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x07,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x07,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_mul_dx9_zero_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x07,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x07,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_mul_dx9_zero_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x07,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x07,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_mul_dx9_zero_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x07,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x07,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_mul_dx9_zero_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x07,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x07,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_mul_dx9_zero_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x07,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x07,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_mul_dx9_zero_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x07,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x07,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_mul_dx9_zero_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x07,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x07,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_mul_dx9_zero_f32_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x07,0xd5,0xf0,0xfa,0x00,0x48] -0x05,0x00,0x07,0xd5,0xf0,0xfa,0x00,0x48 - -# GFX11: v_mul_dx9_zero_f32_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x07,0xd5,0xfd,0xd4,0x00,0x30] -0x05,0x02,0x07,0xd5,0xfd,0xd4,0x00,0x30 - -# GFX11: v_mul_dx9_zero_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x07,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] -0xff,0x83,0x07,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf - -# GFX11: v_mul_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x35,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x35,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_mul_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x35,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x35,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_mul_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x35,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x35,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_mul_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x35,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x35,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_mul_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x35,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x35,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_mul_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x35,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x35,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 - -# GFX11: v_mul_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x35,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x35,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_mul_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x35,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x35,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_mul_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x35,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x35,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_mul_f16_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x35,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x35,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_mul_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x35,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x35,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_mul_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x35,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x35,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_mul_f16_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x35,0xd5,0xf0,0xfa,0x00,0x48] -0x05,0x00,0x35,0xd5,0xf0,0xfa,0x00,0x48 - -# GFX11: v_mul_f16_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x35,0xd5,0xfd,0xd4,0x00,0x30] -0x05,0x02,0x35,0xd5,0xfd,0xd4,0x00,0x30 - -# GFX11: v_mul_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x35,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] -0xff,0x83,0x35,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00 - -# GFX11: v_mul_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x08,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x08,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_mul_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x08,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x08,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_mul_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x08,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x08,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_mul_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x08,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x08,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_mul_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x08,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x08,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_mul_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x08,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x08,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_mul_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x08,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x08,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_mul_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x08,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x08,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_mul_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x08,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x08,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_mul_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x08,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x08,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_mul_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x08,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x08,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_mul_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x08,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x08,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_mul_f32_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x08,0xd5,0xf0,0xfa,0x00,0x48] -0x05,0x00,0x08,0xd5,0xf0,0xfa,0x00,0x48 - -# GFX11: v_mul_f32_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x08,0xd5,0xfd,0xd4,0x00,0x30] -0x05,0x02,0x08,0xd5,0xfd,0xd4,0x00,0x30 - -# GFX11: v_mul_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x08,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] -0xff,0x83,0x08,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf - # GFX11: v_mul_f64 v[5:6], v[1:2], v[2:3] ; encoding: [0x05,0x00,0x28,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x28,0xd7,0x01,0x05,0x02,0x00 @@ -8007,51 +4486,6 @@ # GFX11: v_mul_hi_i32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x2e,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] 0xff,0x00,0x2e,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_mul_hi_i32_i24_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x0a,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x0a,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_mul_hi_i32_i24_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x0a,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x0a,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_mul_hi_i32_i24_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x0a,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x0a,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_mul_hi_i32_i24_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x0a,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x0a,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_mul_hi_i32_i24_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0a,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x0a,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_mul_hi_i32_i24_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x0a,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x0a,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_mul_hi_i32_i24_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0a,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x0a,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_mul_hi_i32_i24_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x0a,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x0a,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_mul_hi_i32_i24_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0a,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x0a,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_mul_hi_i32_i24_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x0a,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x0a,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_mul_hi_i32_i24_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x0a,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x0a,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_mul_hi_i32_i24_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x0a,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x0a,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_mul_hi_i32_i24_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x0a,0xd5,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x0a,0xd5,0xf0,0xfa,0x00,0x00 - -# GFX11: v_mul_hi_i32_i24_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0a,0xd5,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x0a,0xd5,0xfd,0xd4,0x00,0x00 - -# GFX11: v_mul_hi_i32_i24_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x0a,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0x0a,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf - # GFX11: v_mul_hi_u32 v5, v1, v2 ; encoding: [0x05,0x00,0x2d,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x2d,0xd7,0x01,0x05,0x02,0x00 @@ -8097,96 +4531,6 @@ # GFX11: v_mul_hi_u32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x2d,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] 0xff,0x00,0x2d,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_mul_hi_u32_u24_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x0c,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x0c,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_mul_hi_u32_u24_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x0c,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x0c,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_mul_hi_u32_u24_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x0c,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x0c,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_mul_hi_u32_u24_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x0c,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x0c,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_mul_hi_u32_u24_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0c,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x0c,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_mul_hi_u32_u24_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x0c,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x0c,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_mul_hi_u32_u24_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0c,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x0c,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_mul_hi_u32_u24_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x0c,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x0c,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_mul_hi_u32_u24_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0c,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x0c,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_mul_hi_u32_u24_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x0c,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x0c,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_mul_hi_u32_u24_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x0c,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x0c,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_mul_hi_u32_u24_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x0c,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x0c,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_mul_hi_u32_u24_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x0c,0xd5,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x0c,0xd5,0xf0,0xfa,0x00,0x00 - -# GFX11: v_mul_hi_u32_u24_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0c,0xd5,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x0c,0xd5,0xfd,0xd4,0x00,0x00 - -# GFX11: v_mul_hi_u32_u24_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x0c,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0x0c,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_mul_i32_i24_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x09,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x09,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_mul_i32_i24_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x09,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x09,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_mul_i32_i24_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x09,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x09,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_mul_i32_i24_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x09,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x09,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_mul_i32_i24_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x09,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x09,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_mul_i32_i24_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x09,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x09,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_mul_i32_i24_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x09,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x09,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_mul_i32_i24_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x09,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x09,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_mul_i32_i24_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x09,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x09,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_mul_i32_i24_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x09,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x09,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_mul_i32_i24_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x09,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x09,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_mul_i32_i24_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x09,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x09,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_mul_i32_i24_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x09,0xd5,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x09,0xd5,0xf0,0xfa,0x00,0x00 - -# GFX11: v_mul_i32_i24_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x09,0xd5,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x09,0xd5,0xfd,0xd4,0x00,0x00 - -# GFX11: v_mul_i32_i24_e64 v255, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0x80,0x09,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x80,0x09,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf - # GFX11: v_mul_lo_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x05,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x05,0xd7,0x01,0x05,0x02,0x00 @@ -8277,51 +4621,6 @@ # GFX11: v_mul_lo_u32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x2c,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] 0xff,0x00,0x2c,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_mul_u32_u24_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x0b,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x0b,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_mul_u32_u24_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x0b,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x0b,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_mul_u32_u24_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x0b,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x0b,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_mul_u32_u24_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x0b,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x0b,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_mul_u32_u24_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0b,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x0b,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_mul_u32_u24_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x0b,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x0b,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_mul_u32_u24_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0b,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x0b,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_mul_u32_u24_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x0b,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x0b,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_mul_u32_u24_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0b,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x0b,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_mul_u32_u24_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x0b,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x0b,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_mul_u32_u24_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x0b,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x0b,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_mul_u32_u24_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x0b,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x0b,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_mul_u32_u24_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x0b,0xd5,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x0b,0xd5,0xf0,0xfa,0x00,0x00 - -# GFX11: v_mul_u32_u24_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0b,0xd5,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x0b,0xd5,0xfd,0xd4,0x00,0x00 - -# GFX11: v_mul_u32_u24_e64 v255, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0x80,0x0b,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x80,0x0b,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf - # GFX11: v_mullit_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x18,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x18,0xd6,0x01,0x05,0x0e,0x00 @@ -8331,134 +4630,41 @@ # GFX11: v_mullit_f32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x18,0xd6,0x01,0xfe,0xff,0x01] 0x05,0x00,0x18,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_mullit_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x18,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x18,0xd6,0x69,0xd2,0xf8,0x01 - -# GFX11: v_mullit_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x18,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x18,0xd6,0x6a,0xf6,0x0c,0x04 - -# GFX11: v_mullit_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x18,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -0x05,0x00,0x18,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf - -# GFX11: v_mullit_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x18,0xd6,0x7b,0xfa,0xed,0xe1] -0x05,0x07,0x18,0xd6,0x7b,0xfa,0xed,0xe1 - -# GFX11: v_mullit_f32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x18,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x18,0xd6,0x7d,0xe0,0xf5,0x01 - -# GFX11: v_mullit_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x18,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x01,0x18,0xd6,0x7e,0x82,0xad,0x01 - -# GFX11: v_mullit_f32 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x18,0xd6,0x7f,0xf8,0xa8,0xa1] -0x05,0x05,0x18,0xd6,0x7f,0xf8,0xa8,0xa1 - -# GFX11: v_mullit_f32 v5, null, exec_lo, -|0xaf123456| ; encoding: [0x05,0x04,0x18,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] -0x05,0x04,0x18,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf - -# GFX11: v_mullit_f32 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x18,0xd6,0xc1,0xfe,0xf4,0xc3] -0x05,0x06,0x18,0xd6,0xc1,0xfe,0xf4,0xc3 - -# GFX11: v_mullit_f32 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x18,0xd6,0xf0,0xfa,0xc0,0x4b] -0x05,0x00,0x18,0xd6,0xf0,0xfa,0xc0,0x4b - -# GFX11: v_mullit_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x18,0xd6,0xfd,0xd4,0x04,0x33] -0x05,0x02,0x18,0xd6,0xfd,0xd4,0x04,0x33 - -# GFX11: v_mullit_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x18,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] -0xff,0x83,0x18,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf - -# GFX11: v_nop ; encoding: [0x00,0x00,0x80,0xd5,0x00,0x00,0x00,0x00] -0x00,0x00,0x80,0xd5,0x00,0x00,0x00,0x00 - -# GFX11: v_not_b16_e64 v5, v1 ; encoding: [0x05,0x00,0xe9,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xe9,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_not_b16_e64 v5, v255 ; encoding: [0x05,0x00,0xe9,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xe9,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_not_b16_e64 v5, s1 ; encoding: [0x05,0x00,0xe9,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xe9,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_not_b16_e64 v5, s105 ; encoding: [0x05,0x00,0xe9,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xe9,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_not_b16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xe9,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xe9,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_not_b16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xe9,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xe9,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_not_b16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xe9,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xe9,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_not_b16_e64 v5, m0 ; encoding: [0x05,0x00,0xe9,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xe9,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_not_b16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xe9,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xe9,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_not_b16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xe9,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xe9,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_not_b16_e64 v5, null ; encoding: [0x05,0x00,0xe9,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xe9,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_not_b16_e64 v5, -1 ; encoding: [0x05,0x00,0xe9,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xe9,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_not_b16_e64 v5, 0x3800 -0x05,0x00,0xe9,0xd5,0xf0,0x00,0x00,0x00 - -# GFX11: v_not_b16_e64 v5, src_scc ; encoding: [0x05,0x00,0xe9,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0xe9,0xd5,0xfd,0x00,0x00,0x00 - -# GFX11: v_not_b16_e64 v255, 0xfe0b ; encoding: [0xff,0x00,0xe9,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] -0xff,0x00,0xe9,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00 - -# GFX11: v_not_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xb7,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xb7,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_not_b32_e64 v5, v255 ; encoding: [0x05,0x00,0xb7,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xb7,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_not_b32_e64 v5, s1 ; encoding: [0x05,0x00,0xb7,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xb7,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_not_b32_e64 v5, s105 ; encoding: [0x05,0x00,0xb7,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xb7,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_mullit_f32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x18,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x18,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_not_b32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xb7,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xb7,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_mullit_f32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x18,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x18,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_not_b32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xb7,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xb7,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_mullit_f32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x18,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x18,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX11: v_not_b32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xb7,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xb7,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_mullit_f32 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x18,0xd6,0x7b,0xfa,0xed,0xe1] +0x05,0x07,0x18,0xd6,0x7b,0xfa,0xed,0xe1 -# GFX11: v_not_b32_e64 v5, m0 ; encoding: [0x05,0x00,0xb7,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xb7,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_mullit_f32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x18,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x18,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_not_b32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xb7,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xb7,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_mullit_f32 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x18,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x01,0x18,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_not_b32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xb7,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xb7,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_mullit_f32 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x18,0xd6,0x7f,0xf8,0xa8,0xa1] +0x05,0x05,0x18,0xd6,0x7f,0xf8,0xa8,0xa1 -# GFX11: v_not_b32_e64 v5, null ; encoding: [0x05,0x00,0xb7,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xb7,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_mullit_f32 v5, null, exec_lo, -|0xaf123456| ; encoding: [0x05,0x04,0x18,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf] +0x05,0x04,0x18,0xd6,0x7c,0xfc,0xfc,0x83,0x56,0x34,0x12,0xaf -# GFX11: v_not_b32_e64 v5, -1 ; encoding: [0x05,0x00,0xb7,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xb7,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_mullit_f32 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x18,0xd6,0xc1,0xfe,0xf4,0xc3] +0x05,0x06,0x18,0xd6,0xc1,0xfe,0xf4,0xc3 -# GFX11: v_not_b32_e64 v5, 0.5 ; encoding: [0x05,0x00,0xb7,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0xb7,0xd5,0xf0,0x00,0x00,0x00 +# GFX11: v_mullit_f32 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x18,0xd6,0xf0,0xfa,0xc0,0x4b] +0x05,0x00,0x18,0xd6,0xf0,0xfa,0xc0,0x4b -# GFX11: v_not_b32_e64 v5, src_scc ; encoding: [0x05,0x00,0xb7,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0xb7,0xd5,0xfd,0x00,0x00,0x00 +# GFX11: v_mullit_f32 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x18,0xd6,0xfd,0xd4,0x04,0x33] +0x05,0x02,0x18,0xd6,0xfd,0xd4,0x04,0x33 -# GFX11: v_not_b32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0xb7,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0xb7,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf +# GFX11: v_mullit_f32 v255, -|0xaf123456|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x18,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf] +0xff,0x83,0x18,0xd6,0xff,0xd6,0xf0,0x79,0x56,0x34,0x12,0xaf # GFX11: v_or3_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x58,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x58,0xd6,0x01,0x05,0x0e,0x00 @@ -8550,51 +4756,6 @@ # GFX11: v_or_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0x00,0x63,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_or_b32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x1c,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x1c,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_or_b32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x1c,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x1c,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_or_b32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x1c,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x1c,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_or_b32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x1c,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x1c,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_or_b32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x1c,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x1c,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_or_b32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x1c,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x1c,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_or_b32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x1c,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x1c,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_or_b32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x1c,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x1c,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_or_b32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x1c,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x1c,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_or_b32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x1c,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x1c,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_or_b32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x1c,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x1c,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_or_b32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x1c,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x1c,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_or_b32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x1c,0xd5,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x1c,0xd5,0xf0,0xfa,0x00,0x00 - -# GFX11: v_or_b32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x1c,0xd5,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x1c,0xd5,0xfd,0xd4,0x00,0x00 - -# GFX11: v_or_b32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x1c,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0x1c,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf - # GFX11: v_pack_b32_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x11,0xd7,0x01,0x05,0x02,0x00 @@ -8716,544 +4877,118 @@ 0x05,0x00,0x5b,0xd6,0x01,0x83,0xf5,0x03 # GFX11: v_permlane16_b32 v5, v1, 0.5, 0.5 ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0xe1,0xc1,0x03] -0x05,0x00,0x5b,0xd6,0x01,0xe1,0xc1,0x03 - -# GFX11: v_permlane16_b32 v5, v1, src_scc, -1 ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0xfb,0x05,0x03] -0x05,0x00,0x5b,0xd6,0x01,0xfb,0x05,0x03 - -# GFX11: v_permlane16_b32 v255, v255, 0xaf123456, null ; encoding: [0xff,0x00,0x5b,0xd6,0xff,0xff,0xf1,0x01,0x56,0x34,0x12,0xaf] -0xff,0x00,0x5b,0xd6,0xff,0xff,0xf1,0x01,0x56,0x34,0x12,0xaf - -# GFX11: v_permlanex16_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0x05,0x0c,0x00] -0x05,0x00,0x5c,0xd6,0x01,0x05,0x0c,0x00 - -# GFX11: v_permlanex16_b32 v5, v1, s105, s105 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd3,0xa4,0x01] -0x05,0x00,0x5c,0xd6,0x01,0xd3,0xa4,0x01 - -# GFX11: v_permlanex16_b32 v5, v1, ttmp15, ttmp15 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xf7,0xec,0x01] -0x05,0x00,0x5c,0xd6,0x01,0xf7,0xec,0x01 - -# GFX11: v_permlanex16_b32 v5, v1, vcc_hi, exec_hi ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd7,0xfc,0x01] -0x05,0x00,0x5c,0xd6,0x01,0xd7,0xfc,0x01 - -# GFX11: v_permlanex16_b32 v5, v1, vcc_lo, exec_lo ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd5,0xf8,0x01] -0x05,0x00,0x5c,0xd6,0x01,0xd5,0xf8,0x01 - -# GFX11: v_permlanex16_b32 v5, v1, m0, m0 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xfb,0xf4,0x01] -0x05,0x00,0x5c,0xd6,0x01,0xfb,0xf4,0x01 - -# GFX11: v_permlanex16_b32 v5, v1, exec_hi, vcc_hi ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xff,0xac,0x01] -0x05,0x00,0x5c,0xd6,0x01,0xff,0xac,0x01 - -# GFX11: v_permlanex16_b32 v5, v1, exec_lo, vcc_lo ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xfd,0xa8,0x01] -0x05,0x00,0x5c,0xd6,0x01,0xfd,0xa8,0x01 - -# GFX11: v_permlanex16_b32 v5, v1, null, 0xaf123456 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xf9,0xfc,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x5c,0xd6,0x01,0xf9,0xfc,0x03,0x56,0x34,0x12,0xaf - -# GFX11: v_permlanex16_b32 v5, v1, -1, src_scc ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0x83,0xf5,0x03] -0x05,0x00,0x5c,0xd6,0x01,0x83,0xf5,0x03 - -# GFX11: v_permlanex16_b32 v5, v1, 0.5, 0.5 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xe1,0xc1,0x03] -0x05,0x00,0x5c,0xd6,0x01,0xe1,0xc1,0x03 - -# GFX11: v_permlanex16_b32 v5, v1, src_scc, -1 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xfb,0x05,0x03] -0x05,0x00,0x5c,0xd6,0x01,0xfb,0x05,0x03 - -# GFX11: v_permlanex16_b32 v255, v255, 0xaf123456, null ; encoding: [0xff,0x00,0x5c,0xd6,0xff,0xff,0xf1,0x01,0x56,0x34,0x12,0xaf] -0xff,0x00,0x5c,0xd6,0xff,0xff,0xf1,0x01,0x56,0x34,0x12,0xaf - -# GFX11: v_pipeflush ; encoding: [0x00,0x00,0x9b,0xd5,0x00,0x00,0x00,0x00] -0x00,0x00,0x9b,0xd5,0x00,0x00,0x00,0x00 - -# GFX11: v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, ttmp[14:15] ; encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0xea,0x01] -0x05,0x00,0x3a,0xd6,0x01,0x05,0xea,0x01 - -# GFX11: v_qsad_pk_u16_u8 v[5:6], v[1:2], v255, ttmp[14:15] ; encoding: [0x05,0x00,0x3a,0xd6,0x01,0xff,0xeb,0x01] -0x05,0x00,0x3a,0xd6,0x01,0xff,0xeb,0x01 - -# GFX11: v_qsad_pk_u16_u8 v[5:6], v[1:2], s2, ttmp[14:15] ; encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0xe8,0x01] -0x05,0x00,0x3a,0xd6,0x01,0x05,0xe8,0x01 - -# GFX11: v_qsad_pk_u16_u8 v[5:6], v[1:2], s105, ttmp[14:15] ; encoding: [0x05,0x00,0x3a,0xd6,0x01,0xd3,0xe8,0x01] -0x05,0x00,0x3a,0xd6,0x01,0xd3,0xe8,0x01 - -# GFX11: v_qsad_pk_u16_u8 v[5:6], v[254:255], ttmp15, s[6:7] ; encoding: [0x05,0x00,0x3a,0xd6,0xfe,0xf7,0x18,0x00] -0x05,0x00,0x3a,0xd6,0xfe,0xf7,0x18,0x00 - -# GFX11: v_qsad_pk_u16_u8 v[5:6], s[2:3], vcc_hi, v[3:4] ; encoding: [0x05,0x00,0x3a,0xd6,0x02,0xd6,0x0c,0x04] -0x05,0x00,0x3a,0xd6,0x02,0xd6,0x0c,0x04 - -# GFX11: v_qsad_pk_u16_u8 v[5:6], s[104:105], vcc_lo, s[104:105] ; encoding: [0x05,0x00,0x3a,0xd6,0x68,0xd4,0xa0,0x01] -0x05,0x00,0x3a,0xd6,0x68,0xd4,0xa0,0x01 - -# GFX11: v_qsad_pk_u16_u8 v[5:6], vcc, m0, v[254:255] ; encoding: [0x05,0x00,0x3a,0xd6,0x6a,0xfa,0xf8,0x07] -0x05,0x00,0x3a,0xd6,0x6a,0xfa,0xf8,0x07 - -# GFX11: v_qsad_pk_u16_u8 v[5:6], ttmp[14:15], exec_hi, null ; encoding: [0x05,0x00,0x3a,0xd6,0x7a,0xfe,0xf0,0x01] -0x05,0x00,0x3a,0xd6,0x7a,0xfe,0xf0,0x01 - -# GFX11: v_qsad_pk_u16_u8 v[5:6], exec, exec_lo, exec ; encoding: [0x05,0x00,0x3a,0xd6,0x7e,0xfc,0xf8,0x01] -0x05,0x00,0x3a,0xd6,0x7e,0xfc,0xf8,0x01 - -# GFX11: v_qsad_pk_u16_u8 v[5:6], null, null, vcc ; encoding: [0x05,0x00,0x3a,0xd6,0x7c,0xf8,0xa8,0x01] -0x05,0x00,0x3a,0xd6,0x7c,0xf8,0xa8,0x01 - -# GFX11: v_qsad_pk_u16_u8 v[5:6], -1, -1, 0xaf123456 ; encoding: [0x05,0x00,0x3a,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x3a,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf - -# GFX11: v_qsad_pk_u16_u8 v[5:6], 0.5, 0.5, src_scc ; encoding: [0x05,0x00,0x3a,0xd6,0xf0,0xe0,0xf5,0x03] -0x05,0x00,0x3a,0xd6,0xf0,0xe0,0xf5,0x03 - -# GFX11: v_qsad_pk_u16_u8 v[5:6], src_scc, src_scc, 0.5 ; encoding: [0x05,0x00,0x3a,0xd6,0xfd,0xfa,0xc1,0x03] -0x05,0x00,0x3a,0xd6,0xfd,0xfa,0xc1,0x03 - -# GFX11: v_qsad_pk_u16_u8 v[254:255], 0xaf123456, 0xaf123456, -1 clamp ; encoding: [0xfe,0x80,0x3a,0xd6,0xff,0xfe,0x05,0x03,0x56,0x34,0x12,0xaf] -0xfe,0x80,0x3a,0xd6,0xff,0xfe,0x05,0x03,0x56,0x34,0x12,0xaf - -# GFX11: v_rcp_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xd4,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xd4,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_rcp_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xd4,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xd4,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_rcp_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xd4,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xd4,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_rcp_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xd4,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xd4,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_rcp_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd4,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xd4,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_rcp_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd4,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xd4,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_rcp_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd4,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xd4,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_rcp_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xd4,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xd4,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_rcp_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd4,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xd4,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_rcp_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd4,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xd4,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_rcp_f16_e64 v5, null ; encoding: [0x05,0x00,0xd4,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xd4,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_rcp_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xd4,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xd4,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_rcp_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xd4,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xd4,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_rcp_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xd4,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xd4,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_rcp_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xd4,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] -0xff,0x81,0xd4,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 - -# GFX11: v_rcp_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xaa,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xaa,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_rcp_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xaa,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xaa,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_rcp_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xaa,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xaa,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_rcp_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xaa,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xaa,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_rcp_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xaa,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xaa,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_rcp_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xaa,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xaa,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_rcp_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xaa,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xaa,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_rcp_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xaa,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xaa,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_rcp_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xaa,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xaa,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_rcp_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xaa,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xaa,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_rcp_f32_e64 v5, null ; encoding: [0x05,0x00,0xaa,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xaa,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_rcp_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xaa,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xaa,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_rcp_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xaa,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xaa,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_rcp_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xaa,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xaa,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_rcp_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xaa,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] -0xff,0x81,0xaa,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf - -# GFX11: v_rcp_f64_e64 v[5:6], v[1:2] ; encoding: [0x05,0x00,0xaf,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xaf,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_rcp_f64_e64 v[5:6], v[254:255] ; encoding: [0x05,0x00,0xaf,0xd5,0xfe,0x01,0x00,0x00] -0x05,0x00,0xaf,0xd5,0xfe,0x01,0x00,0x00 - -# GFX11: v_rcp_f64_e64 v[5:6], s[2:3] ; encoding: [0x05,0x00,0xaf,0xd5,0x02,0x00,0x00,0x00] -0x05,0x00,0xaf,0xd5,0x02,0x00,0x00,0x00 - -# GFX11: v_rcp_f64_e64 v[5:6], s[104:105] ; encoding: [0x05,0x00,0xaf,0xd5,0x68,0x00,0x00,0x00] -0x05,0x00,0xaf,0xd5,0x68,0x00,0x00,0x00 - -# GFX11: v_rcp_f64_e64 v[5:6], vcc ; encoding: [0x05,0x00,0xaf,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xaf,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_rcp_f64_e64 v[5:6], ttmp[14:15] ; encoding: [0x05,0x00,0xaf,0xd5,0x7a,0x00,0x00,0x00] -0x05,0x00,0xaf,0xd5,0x7a,0x00,0x00,0x00 - -# GFX11: v_rcp_f64_e64 v[5:6], exec ; encoding: [0x05,0x00,0xaf,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xaf,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_rcp_f64_e64 v[5:6], null ; encoding: [0x05,0x00,0xaf,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xaf,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_rcp_f64_e64 v[5:6], -1 ; encoding: [0x05,0x00,0xaf,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xaf,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_rcp_f64_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0xaf,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xaf,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_rcp_f64_e64 v[5:6], -|src_scc| mul:4 ; encoding: [0x05,0x01,0xaf,0xd5,0xfd,0x00,0x00,0x30] -0x05,0x01,0xaf,0xd5,0xfd,0x00,0x00,0x30 - -# GFX11: v_rcp_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0xaf,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] -0xfe,0x80,0xaf,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf - -# GFX11: v_rcp_iflag_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xab,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xab,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_rcp_iflag_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xab,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xab,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_rcp_iflag_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xab,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xab,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_rcp_iflag_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xab,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xab,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_rcp_iflag_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xab,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xab,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_rcp_iflag_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xab,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xab,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_rcp_iflag_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xab,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xab,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_rcp_iflag_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xab,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xab,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_rcp_iflag_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xab,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xab,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_rcp_iflag_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xab,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xab,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_rcp_iflag_f32_e64 v5, null ; encoding: [0x05,0x00,0xab,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xab,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_rcp_iflag_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xab,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xab,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_rcp_iflag_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xab,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xab,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_rcp_iflag_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xab,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xab,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_rcp_iflag_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xab,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] -0xff,0x81,0xab,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf - -# GFX11: v_readlane_b32 s5, v1, s2 ; encoding: [0x05,0x00,0x60,0xd7,0x01,0x05,0x00,0x00] -0x05,0x00,0x60,0xd7,0x01,0x05,0x00,0x00 - -# GFX11: v_readlane_b32 s5, v1, s105 ; encoding: [0x05,0x00,0x60,0xd7,0x01,0xd3,0x00,0x00] -0x05,0x00,0x60,0xd7,0x01,0xd3,0x00,0x00 - -# GFX11: v_readlane_b32 s105, v1, ttmp15 ; encoding: [0x69,0x00,0x60,0xd7,0x01,0xf7,0x00,0x00] -0x69,0x00,0x60,0xd7,0x01,0xf7,0x00,0x00 - -# GFX11: v_readlane_b32 vcc_lo, v1, vcc_hi ; encoding: [0x6a,0x00,0x60,0xd7,0x01,0xd7,0x00,0x00] -0x6a,0x00,0x60,0xd7,0x01,0xd7,0x00,0x00 - -# GFX11: v_readlane_b32 vcc_hi, v1, vcc_lo ; encoding: [0x6b,0x00,0x60,0xd7,0x01,0xd5,0x00,0x00] -0x6b,0x00,0x60,0xd7,0x01,0xd5,0x00,0x00 - -# GFX11: v_readlane_b32 ttmp15, v1, m0 ; encoding: [0x7b,0x00,0x60,0xd7,0x01,0xfb,0x00,0x00] -0x7b,0x00,0x60,0xd7,0x01,0xfb,0x00,0x00 - -# GFX11: v_readlane_b32 null, v255, null ; encoding: [0x7c,0x00,0x60,0xd7,0xff,0xf9,0x00,0x00] -0x7c,0x00,0x60,0xd7,0xff,0xf9,0x00,0x00 - -# GFX11: v_rndne_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xde,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xde,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_rndne_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xde,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xde,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_rndne_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xde,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xde,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_rndne_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xde,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xde,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_rndne_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xde,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xde,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_rndne_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xde,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xde,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_rndne_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xde,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xde,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_rndne_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xde,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xde,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_rndne_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xde,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xde,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_rndne_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xde,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xde,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_rndne_f16_e64 v5, null ; encoding: [0x05,0x00,0xde,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xde,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_rndne_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xde,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xde,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_rndne_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xde,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xde,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_rndne_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xde,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xde,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_rndne_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xde,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] -0xff,0x81,0xde,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 - -# GFX11: v_rndne_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xa3,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xa3,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_rndne_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xa3,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xa3,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_rndne_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xa3,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xa3,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_rndne_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xa3,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xa3,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_rndne_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xa3,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xa3,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_rndne_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xa3,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xa3,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_rndne_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xa3,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xa3,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_rndne_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xa3,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xa3,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_rndne_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xa3,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xa3,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_rndne_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xa3,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xa3,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_rndne_f32_e64 v5, null ; encoding: [0x05,0x00,0xa3,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xa3,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_rndne_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xa3,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xa3,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_rndne_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xa3,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xa3,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_rndne_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xa3,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xa3,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_rndne_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xa3,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] -0xff,0x81,0xa3,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf - -# GFX11: v_rndne_f64_e64 v[5:6], v[1:2] ; encoding: [0x05,0x00,0x99,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x99,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_rndne_f64_e64 v[5:6], v[254:255] ; encoding: [0x05,0x00,0x99,0xd5,0xfe,0x01,0x00,0x00] -0x05,0x00,0x99,0xd5,0xfe,0x01,0x00,0x00 - -# GFX11: v_rndne_f64_e64 v[5:6], s[2:3] ; encoding: [0x05,0x00,0x99,0xd5,0x02,0x00,0x00,0x00] -0x05,0x00,0x99,0xd5,0x02,0x00,0x00,0x00 - -# GFX11: v_rndne_f64_e64 v[5:6], s[104:105] ; encoding: [0x05,0x00,0x99,0xd5,0x68,0x00,0x00,0x00] -0x05,0x00,0x99,0xd5,0x68,0x00,0x00,0x00 - -# GFX11: v_rndne_f64_e64 v[5:6], vcc ; encoding: [0x05,0x00,0x99,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x99,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_rndne_f64_e64 v[5:6], ttmp[14:15] ; encoding: [0x05,0x00,0x99,0xd5,0x7a,0x00,0x00,0x00] -0x05,0x00,0x99,0xd5,0x7a,0x00,0x00,0x00 - -# GFX11: v_rndne_f64_e64 v[5:6], exec ; encoding: [0x05,0x00,0x99,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x99,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_rndne_f64_e64 v[5:6], null ; encoding: [0x05,0x00,0x99,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x99,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_rndne_f64_e64 v[5:6], -1 ; encoding: [0x05,0x00,0x99,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x99,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_rndne_f64_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0x99,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0x99,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_rndne_f64_e64 v[5:6], -|src_scc| mul:4 ; encoding: [0x05,0x01,0x99,0xd5,0xfd,0x00,0x00,0x30] -0x05,0x01,0x99,0xd5,0xfd,0x00,0x00,0x30 - -# GFX11: v_rndne_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0x99,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] -0xfe,0x80,0x99,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf - -# GFX11: v_rsq_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xd6,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xd6,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_rsq_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xd6,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xd6,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_rsq_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xd6,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xd6,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_rsq_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xd6,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xd6,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_rsq_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd6,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xd6,0xd5,0x6a,0x00,0x00,0x00 +0x05,0x00,0x5b,0xd6,0x01,0xe1,0xc1,0x03 -# GFX11: v_rsq_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd6,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xd6,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_permlane16_b32 v5, v1, src_scc, -1 ; encoding: [0x05,0x00,0x5b,0xd6,0x01,0xfb,0x05,0x03] +0x05,0x00,0x5b,0xd6,0x01,0xfb,0x05,0x03 -# GFX11: v_rsq_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd6,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xd6,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_permlane16_b32 v255, v255, 0xaf123456, null ; encoding: [0xff,0x00,0x5b,0xd6,0xff,0xff,0xf1,0x01,0x56,0x34,0x12,0xaf] +0xff,0x00,0x5b,0xd6,0xff,0xff,0xf1,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_rsq_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xd6,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xd6,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_permlanex16_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0x05,0x0c,0x00] +0x05,0x00,0x5c,0xd6,0x01,0x05,0x0c,0x00 -# GFX11: v_rsq_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd6,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xd6,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_permlanex16_b32 v5, v1, s105, s105 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd3,0xa4,0x01] +0x05,0x00,0x5c,0xd6,0x01,0xd3,0xa4,0x01 -# GFX11: v_rsq_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd6,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xd6,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_permlanex16_b32 v5, v1, ttmp15, ttmp15 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xf7,0xec,0x01] +0x05,0x00,0x5c,0xd6,0x01,0xf7,0xec,0x01 -# GFX11: v_rsq_f16_e64 v5, null ; encoding: [0x05,0x00,0xd6,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xd6,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_permlanex16_b32 v5, v1, vcc_hi, exec_hi ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd7,0xfc,0x01] +0x05,0x00,0x5c,0xd6,0x01,0xd7,0xfc,0x01 -# GFX11: v_rsq_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xd6,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xd6,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_permlanex16_b32 v5, v1, vcc_lo, exec_lo ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xd5,0xf8,0x01] +0x05,0x00,0x5c,0xd6,0x01,0xd5,0xf8,0x01 -# GFX11: v_rsq_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xd6,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xd6,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_permlanex16_b32 v5, v1, m0, m0 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xfb,0xf4,0x01] +0x05,0x00,0x5c,0xd6,0x01,0xfb,0xf4,0x01 -# GFX11: v_rsq_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xd6,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xd6,0xd5,0xfd,0x00,0x00,0x10 +# GFX11: v_permlanex16_b32 v5, v1, exec_hi, vcc_hi ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xff,0xac,0x01] +0x05,0x00,0x5c,0xd6,0x01,0xff,0xac,0x01 -# GFX11: v_rsq_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xd6,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] -0xff,0x81,0xd6,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 +# GFX11: v_permlanex16_b32 v5, v1, exec_lo, vcc_lo ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xfd,0xa8,0x01] +0x05,0x00,0x5c,0xd6,0x01,0xfd,0xa8,0x01 -# GFX11: v_rsq_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xae,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xae,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_permlanex16_b32 v5, v1, null, 0xaf123456 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xf9,0xfc,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x5c,0xd6,0x01,0xf9,0xfc,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_rsq_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xae,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xae,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_permlanex16_b32 v5, v1, -1, src_scc ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0x83,0xf5,0x03] +0x05,0x00,0x5c,0xd6,0x01,0x83,0xf5,0x03 -# GFX11: v_rsq_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xae,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xae,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_permlanex16_b32 v5, v1, 0.5, 0.5 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xe1,0xc1,0x03] +0x05,0x00,0x5c,0xd6,0x01,0xe1,0xc1,0x03 -# GFX11: v_rsq_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xae,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xae,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_permlanex16_b32 v5, v1, src_scc, -1 ; encoding: [0x05,0x00,0x5c,0xd6,0x01,0xfb,0x05,0x03] +0x05,0x00,0x5c,0xd6,0x01,0xfb,0x05,0x03 -# GFX11: v_rsq_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xae,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xae,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_permlanex16_b32 v255, v255, 0xaf123456, null ; encoding: [0xff,0x00,0x5c,0xd6,0xff,0xff,0xf1,0x01,0x56,0x34,0x12,0xaf] +0xff,0x00,0x5c,0xd6,0xff,0xff,0xf1,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_rsq_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xae,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xae,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, ttmp[14:15] ; encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0xea,0x01] +0x05,0x00,0x3a,0xd6,0x01,0x05,0xea,0x01 -# GFX11: v_rsq_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xae,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xae,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_qsad_pk_u16_u8 v[5:6], v[1:2], v255, ttmp[14:15] ; encoding: [0x05,0x00,0x3a,0xd6,0x01,0xff,0xeb,0x01] +0x05,0x00,0x3a,0xd6,0x01,0xff,0xeb,0x01 -# GFX11: v_rsq_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xae,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xae,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_qsad_pk_u16_u8 v[5:6], v[1:2], s2, ttmp[14:15] ; encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0xe8,0x01] +0x05,0x00,0x3a,0xd6,0x01,0x05,0xe8,0x01 -# GFX11: v_rsq_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xae,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xae,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_qsad_pk_u16_u8 v[5:6], v[1:2], s105, ttmp[14:15] ; encoding: [0x05,0x00,0x3a,0xd6,0x01,0xd3,0xe8,0x01] +0x05,0x00,0x3a,0xd6,0x01,0xd3,0xe8,0x01 -# GFX11: v_rsq_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xae,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xae,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_qsad_pk_u16_u8 v[5:6], v[254:255], ttmp15, s[6:7] ; encoding: [0x05,0x00,0x3a,0xd6,0xfe,0xf7,0x18,0x00] +0x05,0x00,0x3a,0xd6,0xfe,0xf7,0x18,0x00 -# GFX11: v_rsq_f32_e64 v5, null ; encoding: [0x05,0x00,0xae,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xae,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_qsad_pk_u16_u8 v[5:6], s[2:3], vcc_hi, v[3:4] ; encoding: [0x05,0x00,0x3a,0xd6,0x02,0xd6,0x0c,0x04] +0x05,0x00,0x3a,0xd6,0x02,0xd6,0x0c,0x04 -# GFX11: v_rsq_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xae,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xae,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_qsad_pk_u16_u8 v[5:6], s[104:105], vcc_lo, s[104:105] ; encoding: [0x05,0x00,0x3a,0xd6,0x68,0xd4,0xa0,0x01] +0x05,0x00,0x3a,0xd6,0x68,0xd4,0xa0,0x01 -# GFX11: v_rsq_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xae,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xae,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_qsad_pk_u16_u8 v[5:6], vcc, m0, v[254:255] ; encoding: [0x05,0x00,0x3a,0xd6,0x6a,0xfa,0xf8,0x07] +0x05,0x00,0x3a,0xd6,0x6a,0xfa,0xf8,0x07 -# GFX11: v_rsq_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xae,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xae,0xd5,0xfd,0x00,0x00,0x10 +# GFX11: v_qsad_pk_u16_u8 v[5:6], ttmp[14:15], exec_hi, null ; encoding: [0x05,0x00,0x3a,0xd6,0x7a,0xfe,0xf0,0x01] +0x05,0x00,0x3a,0xd6,0x7a,0xfe,0xf0,0x01 -# GFX11: v_rsq_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xae,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] -0xff,0x81,0xae,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf +# GFX11: v_qsad_pk_u16_u8 v[5:6], exec, exec_lo, exec ; encoding: [0x05,0x00,0x3a,0xd6,0x7e,0xfc,0xf8,0x01] +0x05,0x00,0x3a,0xd6,0x7e,0xfc,0xf8,0x01 -# GFX11: v_rsq_f64_e64 v[5:6], v[1:2] ; encoding: [0x05,0x00,0xb1,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xb1,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_qsad_pk_u16_u8 v[5:6], null, null, vcc ; encoding: [0x05,0x00,0x3a,0xd6,0x7c,0xf8,0xa8,0x01] +0x05,0x00,0x3a,0xd6,0x7c,0xf8,0xa8,0x01 -# GFX11: v_rsq_f64_e64 v[5:6], v[254:255] ; encoding: [0x05,0x00,0xb1,0xd5,0xfe,0x01,0x00,0x00] -0x05,0x00,0xb1,0xd5,0xfe,0x01,0x00,0x00 +# GFX11: v_qsad_pk_u16_u8 v[5:6], -1, -1, 0xaf123456 ; encoding: [0x05,0x00,0x3a,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x3a,0xd6,0xc1,0x82,0xfd,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_rsq_f64_e64 v[5:6], s[2:3] ; encoding: [0x05,0x00,0xb1,0xd5,0x02,0x00,0x00,0x00] -0x05,0x00,0xb1,0xd5,0x02,0x00,0x00,0x00 +# GFX11: v_qsad_pk_u16_u8 v[5:6], 0.5, 0.5, src_scc ; encoding: [0x05,0x00,0x3a,0xd6,0xf0,0xe0,0xf5,0x03] +0x05,0x00,0x3a,0xd6,0xf0,0xe0,0xf5,0x03 -# GFX11: v_rsq_f64_e64 v[5:6], s[104:105] ; encoding: [0x05,0x00,0xb1,0xd5,0x68,0x00,0x00,0x00] -0x05,0x00,0xb1,0xd5,0x68,0x00,0x00,0x00 +# GFX11: v_qsad_pk_u16_u8 v[5:6], src_scc, src_scc, 0.5 ; encoding: [0x05,0x00,0x3a,0xd6,0xfd,0xfa,0xc1,0x03] +0x05,0x00,0x3a,0xd6,0xfd,0xfa,0xc1,0x03 -# GFX11: v_rsq_f64_e64 v[5:6], vcc ; encoding: [0x05,0x00,0xb1,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xb1,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_qsad_pk_u16_u8 v[254:255], 0xaf123456, 0xaf123456, -1 clamp ; encoding: [0xfe,0x80,0x3a,0xd6,0xff,0xfe,0x05,0x03,0x56,0x34,0x12,0xaf] +0xfe,0x80,0x3a,0xd6,0xff,0xfe,0x05,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_rsq_f64_e64 v[5:6], ttmp[14:15] ; encoding: [0x05,0x00,0xb1,0xd5,0x7a,0x00,0x00,0x00] -0x05,0x00,0xb1,0xd5,0x7a,0x00,0x00,0x00 +# GFX11: v_readlane_b32 s5, v1, s2 ; encoding: [0x05,0x00,0x60,0xd7,0x01,0x05,0x00,0x00] +0x05,0x00,0x60,0xd7,0x01,0x05,0x00,0x00 -# GFX11: v_rsq_f64_e64 v[5:6], exec ; encoding: [0x05,0x00,0xb1,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xb1,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_readlane_b32 s5, v1, s105 ; encoding: [0x05,0x00,0x60,0xd7,0x01,0xd3,0x00,0x00] +0x05,0x00,0x60,0xd7,0x01,0xd3,0x00,0x00 -# GFX11: v_rsq_f64_e64 v[5:6], null ; encoding: [0x05,0x00,0xb1,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xb1,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_readlane_b32 s105, v1, ttmp15 ; encoding: [0x69,0x00,0x60,0xd7,0x01,0xf7,0x00,0x00] +0x69,0x00,0x60,0xd7,0x01,0xf7,0x00,0x00 -# GFX11: v_rsq_f64_e64 v[5:6], -1 ; encoding: [0x05,0x00,0xb1,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xb1,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_readlane_b32 vcc_lo, v1, vcc_hi ; encoding: [0x6a,0x00,0x60,0xd7,0x01,0xd7,0x00,0x00] +0x6a,0x00,0x60,0xd7,0x01,0xd7,0x00,0x00 -# GFX11: v_rsq_f64_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0xb1,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xb1,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_readlane_b32 vcc_hi, v1, vcc_lo ; encoding: [0x6b,0x00,0x60,0xd7,0x01,0xd5,0x00,0x00] +0x6b,0x00,0x60,0xd7,0x01,0xd5,0x00,0x00 -# GFX11: v_rsq_f64_e64 v[5:6], -|src_scc| mul:4 ; encoding: [0x05,0x01,0xb1,0xd5,0xfd,0x00,0x00,0x30] -0x05,0x01,0xb1,0xd5,0xfd,0x00,0x00,0x30 +# GFX11: v_readlane_b32 ttmp15, v1, m0 ; encoding: [0x7b,0x00,0x60,0xd7,0x01,0xfb,0x00,0x00] +0x7b,0x00,0x60,0xd7,0x01,0xfb,0x00,0x00 -# GFX11: v_rsq_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0xb1,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] -0xfe,0x80,0xb1,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf +# GFX11: v_readlane_b32 null, v255, null ; encoding: [0x7c,0x00,0x60,0xd7,0xff,0xf9,0x00,0x00] +0x7c,0x00,0x60,0xd7,0xff,0xf9,0x00,0x00 # GFX11: v_sad_hi_u8 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x23,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x23,0xd6,0x01,0x05,0x0e,0x00 @@ -9315,441 +5050,125 @@ # GFX11: v_sad_u16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x24,0xd6,0x6a,0xf6,0x0c,0x04] 0x05,0x00,0x24,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_sad_u16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x24,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x24,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 - -# GFX11: v_sad_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x24,0xd6,0x7b,0xfa,0xed,0x01] -0x05,0x00,0x24,0xd6,0x7b,0xfa,0xed,0x01 - -# GFX11: v_sad_u16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x24,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x24,0xd6,0x7d,0xe0,0xf5,0x01 - -# GFX11: v_sad_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x24,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x00,0x24,0xd6,0x7e,0x82,0xad,0x01 - -# GFX11: v_sad_u16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x24,0xd6,0x7f,0xf8,0xa8,0x01] -0x05,0x00,0x24,0xd6,0x7f,0xf8,0xa8,0x01 - -# GFX11: v_sad_u16 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x24,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x24,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf - -# GFX11: v_sad_u16 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x24,0xd6,0xc1,0xfe,0xf4,0x03] -0x05,0x00,0x24,0xd6,0xc1,0xfe,0xf4,0x03 - -# GFX11: v_sad_u16 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x24,0xd6,0xf0,0xfa,0xc0,0x03] -0x05,0x00,0x24,0xd6,0xf0,0xfa,0xc0,0x03 - -# GFX11: v_sad_u16 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x24,0xd6,0xfd,0xd4,0x04,0x03] -0x05,0x00,0x24,0xd6,0xfd,0xd4,0x04,0x03 - -# GFX11: v_sad_u16 v255, 0xfe0b, vcc_hi, null clamp ; encoding: [0xff,0x80,0x24,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] -0xff,0x80,0x24,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 - -# GFX11: v_sad_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x25,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x25,0xd6,0x01,0x05,0x0e,0x00 - -# GFX11: v_sad_u32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x25,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x25,0xd6,0xff,0x05,0xa4,0x01 - -# GFX11: v_sad_u32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x25,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x25,0xd6,0x01,0xfe,0xff,0x01 - -# GFX11: v_sad_u32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x25,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x25,0xd6,0x69,0xd2,0xf8,0x01 - -# GFX11: v_sad_u32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x25,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x25,0xd6,0x6a,0xf6,0x0c,0x04 - -# GFX11: v_sad_u32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x25,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -0x05,0x00,0x25,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf - -# GFX11: v_sad_u32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x25,0xd6,0x7b,0xfa,0xed,0x01] -0x05,0x00,0x25,0xd6,0x7b,0xfa,0xed,0x01 - -# GFX11: v_sad_u32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x25,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x25,0xd6,0x7d,0xe0,0xf5,0x01 - -# GFX11: v_sad_u32 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x25,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x00,0x25,0xd6,0x7e,0x82,0xad,0x01 - -# GFX11: v_sad_u32 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x25,0xd6,0x7f,0xf8,0xa8,0x01] -0x05,0x00,0x25,0xd6,0x7f,0xf8,0xa8,0x01 - -# GFX11: v_sad_u32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x25,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x25,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf - -# GFX11: v_sad_u32 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x25,0xd6,0xc1,0xfe,0xf4,0x03] -0x05,0x00,0x25,0xd6,0xc1,0xfe,0xf4,0x03 - -# GFX11: v_sad_u32 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x25,0xd6,0xf0,0xfa,0xc0,0x03] -0x05,0x00,0x25,0xd6,0xf0,0xfa,0xc0,0x03 - -# GFX11: v_sad_u32 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x25,0xd6,0xfd,0xd4,0x04,0x03] -0x05,0x00,0x25,0xd6,0xfd,0xd4,0x04,0x03 - -# GFX11: v_sad_u32 v255, 0xaf123456, vcc_hi, null clamp ; encoding: [0xff,0x80,0x25,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -0xff,0x80,0x25,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf - -# GFX11: v_sad_u8 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x22,0xd6,0x01,0x05,0x0e,0x00] -0x05,0x00,0x22,0xd6,0x01,0x05,0x0e,0x00 - -# GFX11: v_sad_u8 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x22,0xd6,0xff,0x05,0xa4,0x01] -0x05,0x00,0x22,0xd6,0xff,0x05,0xa4,0x01 - -# GFX11: v_sad_u8 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x22,0xd6,0x01,0xfe,0xff,0x01] -0x05,0x00,0x22,0xd6,0x01,0xfe,0xff,0x01 - -# GFX11: v_sad_u8 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x22,0xd6,0x69,0xd2,0xf8,0x01] -0x05,0x00,0x22,0xd6,0x69,0xd2,0xf8,0x01 - -# GFX11: v_sad_u8 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x22,0xd6,0x6a,0xf6,0x0c,0x04] -0x05,0x00,0x22,0xd6,0x6a,0xf6,0x0c,0x04 - -# GFX11: v_sad_u8 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x22,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] -0x05,0x00,0x22,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf - -# GFX11: v_sad_u8 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x22,0xd6,0x7b,0xfa,0xed,0x01] -0x05,0x00,0x22,0xd6,0x7b,0xfa,0xed,0x01 - -# GFX11: v_sad_u8 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x22,0xd6,0x7d,0xe0,0xf5,0x01] -0x05,0x00,0x22,0xd6,0x7d,0xe0,0xf5,0x01 - -# GFX11: v_sad_u8 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x22,0xd6,0x7e,0x82,0xad,0x01] -0x05,0x00,0x22,0xd6,0x7e,0x82,0xad,0x01 - -# GFX11: v_sad_u8 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x22,0xd6,0x7f,0xf8,0xa8,0x01] -0x05,0x00,0x22,0xd6,0x7f,0xf8,0xa8,0x01 - -# GFX11: v_sad_u8 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x22,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] -0x05,0x00,0x22,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf - -# GFX11: v_sad_u8 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x22,0xd6,0xc1,0xfe,0xf4,0x03] -0x05,0x00,0x22,0xd6,0xc1,0xfe,0xf4,0x03 - -# GFX11: v_sad_u8 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x22,0xd6,0xf0,0xfa,0xc0,0x03] -0x05,0x00,0x22,0xd6,0xf0,0xfa,0xc0,0x03 - -# GFX11: v_sad_u8 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x22,0xd6,0xfd,0xd4,0x04,0x03] -0x05,0x00,0x22,0xd6,0xfd,0xd4,0x04,0x03 - -# GFX11: v_sad_u8 v255, 0xaf123456, vcc_hi, null clamp ; encoding: [0xff,0x80,0x22,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -0xff,0x80,0x22,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf - -# GFX11: v_sat_pk_u8_i16_e64 v5, v1 ; encoding: [0x05,0x00,0xe2,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xe2,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_sat_pk_u8_i16_e64 v5, v255 ; encoding: [0x05,0x00,0xe2,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xe2,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_sat_pk_u8_i16_e64 v5, s1 ; encoding: [0x05,0x00,0xe2,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xe2,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_sat_pk_u8_i16_e64 v5, s105 ; encoding: [0x05,0x00,0xe2,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xe2,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_sat_pk_u8_i16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xe2,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xe2,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_sat_pk_u8_i16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xe2,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xe2,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_sat_pk_u8_i16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xe2,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xe2,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_sat_pk_u8_i16_e64 v5, m0 ; encoding: [0x05,0x00,0xe2,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xe2,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_sat_pk_u8_i16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xe2,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xe2,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_sat_pk_u8_i16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xe2,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xe2,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_sat_pk_u8_i16_e64 v5, null ; encoding: [0x05,0x00,0xe2,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xe2,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_sat_pk_u8_i16_e64 v5, -1 ; encoding: [0x05,0x00,0xe2,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xe2,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_sat_pk_u8_i16_e64 v5, 0.5 ; encoding: [0x05,0x00,0xe2,0xd5,0xf0,0x00,0x00,0x00] -0x05,0x00,0xe2,0xd5,0xf0,0x00,0x00,0x00 - -# GFX11: v_sat_pk_u8_i16_e64 v5, src_scc ; encoding: [0x05,0x00,0xe2,0xd5,0xfd,0x00,0x00,0x00] -0x05,0x00,0xe2,0xd5,0xfd,0x00,0x00,0x00 - -# GFX11: v_sat_pk_u8_i16_e64 v255, 0xfe0b ; encoding: [0xff,0x00,0xe2,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] -0xff,0x00,0xe2,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00 - -# GFX11: v_sin_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xe0,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xe0,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_sin_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xe0,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xe0,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_sin_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xe0,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xe0,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_sin_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xe0,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xe0,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_sin_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xe0,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xe0,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_sin_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xe0,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xe0,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_sin_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xe0,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xe0,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_sin_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xe0,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xe0,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_sin_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xe0,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xe0,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_sin_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xe0,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xe0,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_sin_f16_e64 v5, null ; encoding: [0x05,0x00,0xe0,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xe0,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_sin_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xe0,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xe0,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_sin_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xe0,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xe0,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_sin_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xe0,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xe0,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_sin_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xe0,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] -0xff,0x81,0xe0,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 - -# GFX11: v_sin_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xb5,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xb5,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_sin_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xb5,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xb5,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_sin_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xb5,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xb5,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_sin_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xb5,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xb5,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_sin_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xb5,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xb5,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_sin_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xb5,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xb5,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_sin_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xb5,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xb5,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_sin_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xb5,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xb5,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_sin_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xb5,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xb5,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_sin_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xb5,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xb5,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_sin_f32_e64 v5, null ; encoding: [0x05,0x00,0xb5,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xb5,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_sin_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xb5,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xb5,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_sin_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xb5,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xb5,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_sin_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xb5,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xb5,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_sin_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xb5,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] -0xff,0x81,0xb5,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf - -# GFX11: v_sqrt_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xd5,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xd5,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_sqrt_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xd5,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xd5,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_sqrt_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xd5,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xd5,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_sqrt_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xd5,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xd5,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_sqrt_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd5,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xd5,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_sqrt_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd5,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xd5,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_sqrt_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd5,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xd5,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_sqrt_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xd5,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xd5,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_sqrt_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd5,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xd5,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_sqrt_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd5,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xd5,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_sqrt_f16_e64 v5, null ; encoding: [0x05,0x00,0xd5,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xd5,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_sqrt_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xd5,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xd5,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_sqrt_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xd5,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xd5,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_sqrt_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xd5,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xd5,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_sqrt_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xd5,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] -0xff,0x81,0xd5,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 - -# GFX11: v_sqrt_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xb3,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xb3,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_sqrt_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xb3,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xb3,0xd5,0xff,0x01,0x00,0x00 +# GFX11: v_sad_u16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x24,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x24,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX11: v_sqrt_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xb3,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xb3,0xd5,0x01,0x00,0x00,0x00 +# GFX11: v_sad_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x24,0xd6,0x7b,0xfa,0xed,0x01] +0x05,0x00,0x24,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_sqrt_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xb3,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xb3,0xd5,0x69,0x00,0x00,0x00 +# GFX11: v_sad_u16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x24,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x24,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_sqrt_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xb3,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xb3,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_sad_u16 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x24,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x00,0x24,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_sqrt_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xb3,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xb3,0xd5,0x6b,0x00,0x00,0x00 +# GFX11: v_sad_u16 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x24,0xd6,0x7f,0xf8,0xa8,0x01] +0x05,0x00,0x24,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_sqrt_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xb3,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xb3,0xd5,0x7b,0x00,0x00,0x00 +# GFX11: v_sad_u16 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x24,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x24,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_sqrt_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xb3,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xb3,0xd5,0x7d,0x00,0x00,0x00 +# GFX11: v_sad_u16 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x24,0xd6,0xc1,0xfe,0xf4,0x03] +0x05,0x00,0x24,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX11: v_sqrt_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xb3,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xb3,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_sad_u16 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x24,0xd6,0xf0,0xfa,0xc0,0x03] +0x05,0x00,0x24,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX11: v_sqrt_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xb3,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xb3,0xd5,0x7f,0x00,0x00,0x00 +# GFX11: v_sad_u16 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x24,0xd6,0xfd,0xd4,0x04,0x03] +0x05,0x00,0x24,0xd6,0xfd,0xd4,0x04,0x03 -# GFX11: v_sqrt_f32_e64 v5, null ; encoding: [0x05,0x00,0xb3,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xb3,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_sad_u16 v255, 0xfe0b, vcc_hi, null clamp ; encoding: [0xff,0x80,0x24,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00] +0xff,0x80,0x24,0xd6,0xff,0xd6,0xf0,0x01,0x0b,0xfe,0x00,0x00 -# GFX11: v_sqrt_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xb3,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xb3,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_sad_u32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x25,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x25,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_sqrt_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xb3,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xb3,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_sad_u32 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x25,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x25,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_sqrt_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xb3,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xb3,0xd5,0xfd,0x00,0x00,0x10 +# GFX11: v_sad_u32 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x25,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x25,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_sqrt_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xb3,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] -0xff,0x81,0xb3,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf +# GFX11: v_sad_u32 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x25,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x25,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_sqrt_f64_e64 v[5:6], v[1:2] ; encoding: [0x05,0x00,0xb4,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xb4,0xd5,0x01,0x01,0x00,0x00 +# GFX11: v_sad_u32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x25,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x25,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_sqrt_f64_e64 v[5:6], v[254:255] ; encoding: [0x05,0x00,0xb4,0xd5,0xfe,0x01,0x00,0x00] -0x05,0x00,0xb4,0xd5,0xfe,0x01,0x00,0x00 +# GFX11: v_sad_u32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x25,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x25,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX11: v_sqrt_f64_e64 v[5:6], s[2:3] ; encoding: [0x05,0x00,0xb4,0xd5,0x02,0x00,0x00,0x00] -0x05,0x00,0xb4,0xd5,0x02,0x00,0x00,0x00 +# GFX11: v_sad_u32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x25,0xd6,0x7b,0xfa,0xed,0x01] +0x05,0x00,0x25,0xd6,0x7b,0xfa,0xed,0x01 -# GFX11: v_sqrt_f64_e64 v[5:6], s[104:105] ; encoding: [0x05,0x00,0xb4,0xd5,0x68,0x00,0x00,0x00] -0x05,0x00,0xb4,0xd5,0x68,0x00,0x00,0x00 +# GFX11: v_sad_u32 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x25,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x25,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_sqrt_f64_e64 v[5:6], vcc ; encoding: [0x05,0x00,0xb4,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xb4,0xd5,0x6a,0x00,0x00,0x00 +# GFX11: v_sad_u32 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x25,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x00,0x25,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_sqrt_f64_e64 v[5:6], ttmp[14:15] ; encoding: [0x05,0x00,0xb4,0xd5,0x7a,0x00,0x00,0x00] -0x05,0x00,0xb4,0xd5,0x7a,0x00,0x00,0x00 +# GFX11: v_sad_u32 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x25,0xd6,0x7f,0xf8,0xa8,0x01] +0x05,0x00,0x25,0xd6,0x7f,0xf8,0xa8,0x01 -# GFX11: v_sqrt_f64_e64 v[5:6], exec ; encoding: [0x05,0x00,0xb4,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xb4,0xd5,0x7e,0x00,0x00,0x00 +# GFX11: v_sad_u32 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x25,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x25,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_sqrt_f64_e64 v[5:6], null ; encoding: [0x05,0x00,0xb4,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xb4,0xd5,0x7c,0x00,0x00,0x00 +# GFX11: v_sad_u32 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x25,0xd6,0xc1,0xfe,0xf4,0x03] +0x05,0x00,0x25,0xd6,0xc1,0xfe,0xf4,0x03 -# GFX11: v_sqrt_f64_e64 v[5:6], -1 ; encoding: [0x05,0x00,0xb4,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xb4,0xd5,0xc1,0x00,0x00,0x00 +# GFX11: v_sad_u32 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x25,0xd6,0xf0,0xfa,0xc0,0x03] +0x05,0x00,0x25,0xd6,0xf0,0xfa,0xc0,0x03 -# GFX11: v_sqrt_f64_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0xb4,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xb4,0xd5,0xf0,0x00,0x00,0x08 +# GFX11: v_sad_u32 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x25,0xd6,0xfd,0xd4,0x04,0x03] +0x05,0x00,0x25,0xd6,0xfd,0xd4,0x04,0x03 -# GFX11: v_sqrt_f64_e64 v[5:6], -|src_scc| mul:4 ; encoding: [0x05,0x01,0xb4,0xd5,0xfd,0x00,0x00,0x30] -0x05,0x01,0xb4,0xd5,0xfd,0x00,0x00,0x30 +# GFX11: v_sad_u32 v255, 0xaf123456, vcc_hi, null clamp ; encoding: [0xff,0x80,0x25,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +0xff,0x80,0x25,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_sqrt_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0xb4,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] -0xfe,0x80,0xb4,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf +# GFX11: v_sad_u8 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x22,0xd6,0x01,0x05,0x0e,0x00] +0x05,0x00,0x22,0xd6,0x01,0x05,0x0e,0x00 -# W32: v_sub_co_ci_u32_e64 v5, s12, v1, 0xaf123456, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] -# W64: v_sub_co_ci_u32_e64 v5, s[12:13], v1, 0xaf123456, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] -0x05,0x0c,0x21,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf +# GFX11: v_sad_u8 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x22,0xd6,0xff,0x05,0xa4,0x01] +0x05,0x00,0x22,0xd6,0xff,0x05,0xa4,0x01 -# W32: v_sub_co_ci_u32_e64 v5, s12, v255, src_scc, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0xff,0xfb,0x19,0x00] -# W64: v_sub_co_ci_u32_e64 v5, s[12:13], v255, src_scc, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0xff,0xfb,0x19,0x00] -0x05,0x0c,0x21,0xd5,0xff,0xfb,0x19,0x00 +# GFX11: v_sad_u8 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x22,0xd6,0x01,0xfe,0xff,0x01] +0x05,0x00,0x22,0xd6,0x01,0xfe,0xff,0x01 -# W32: v_sub_co_ci_u32_e64 v5, s12, s105, s105, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0x69,0xd2,0x18,0x00] -# W64: v_sub_co_ci_u32_e64 v5, s[12:13], s105, s105, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0x69,0xd2,0x18,0x00] -0x05,0x0c,0x21,0xd5,0x69,0xd2,0x18,0x00 +# GFX11: v_sad_u8 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x22,0xd6,0x69,0xd2,0xf8,0x01] +0x05,0x00,0x22,0xd6,0x69,0xd2,0xf8,0x01 -# W32: v_sub_co_ci_u32_e64 v5, s12, vcc_lo, v2, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0x6a,0x04,0x1a,0x00] -# W64: v_sub_co_ci_u32_e64 v5, s[12:13], vcc_lo, v2, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0x6a,0x04,0x1a,0x00] -0x05,0x0c,0x21,0xd5,0x6a,0x04,0x1a,0x00 +# GFX11: v_sad_u8 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x22,0xd6,0x6a,0xf6,0x0c,0x04] +0x05,0x00,0x22,0xd6,0x6a,0xf6,0x0c,0x04 -# W32: v_sub_co_ci_u32_e64 v5, s12, vcc_hi, v255, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0x6b,0xfe,0x1b,0x00] -# W64: v_sub_co_ci_u32_e64 v5, s[12:13], vcc_hi, v255, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0x6b,0xfe,0x1b,0x00] -0x05,0x0c,0x21,0xd5,0x6b,0xfe,0x1b,0x00 +# GFX11: v_sad_u8 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x22,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +0x05,0x00,0x22,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# W32: v_sub_co_ci_u32_e64 v5, s12, ttmp15, ttmp15, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0x7b,0xf6,0x18,0x00] -# W64: v_sub_co_ci_u32_e64 v5, s[12:13], ttmp15, ttmp15, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0x7b,0xf6,0x18,0x00] -0x05,0x0c,0x21,0xd5,0x7b,0xf6,0x18,0x00 +# GFX11: v_sad_u8 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x22,0xd6,0x7b,0xfa,0xed,0x01] +0x05,0x00,0x22,0xd6,0x7b,0xfa,0xed,0x01 -# W32: v_sub_co_ci_u32_e64 v5, s12, m0, 0.5, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0x7d,0xe0,0x19,0x00] -# W64: v_sub_co_ci_u32_e64 v5, s[12:13], m0, 0.5, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0x7d,0xe0,0x19,0x00] -0x05,0x0c,0x21,0xd5,0x7d,0xe0,0x19,0x00 +# GFX11: v_sad_u8 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x22,0xd6,0x7d,0xe0,0xf5,0x01] +0x05,0x00,0x22,0xd6,0x7d,0xe0,0xf5,0x01 -# W32: v_sub_co_ci_u32_e64 v5, s12, exec_lo, exec_lo, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0x7e,0xfc,0x18,0x00] -# W64: v_sub_co_ci_u32_e64 v5, s[12:13], exec_lo, exec_lo, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0x7e,0xfc,0x18,0x00] -0x05,0x0c,0x21,0xd5,0x7e,0xfc,0x18,0x00 +# GFX11: v_sad_u8 v5, exec_lo, -1, vcc_hi ; encoding: [0x05,0x00,0x22,0xd6,0x7e,0x82,0xad,0x01] +0x05,0x00,0x22,0xd6,0x7e,0x82,0xad,0x01 -# W32: v_sub_co_ci_u32_e64 v5, s12, exec_hi, -1, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0x7f,0x82,0x19,0x00] -# W64: v_sub_co_ci_u32_e64 v5, s[12:13], exec_hi, -1, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0x7f,0x82,0x19,0x00] -0x05,0x0c,0x21,0xd5,0x7f,0x82,0x19,0x00 +# GFX11: v_sad_u8 v5, exec_hi, null, vcc_lo ; encoding: [0x05,0x00,0x22,0xd6,0x7f,0xf8,0xa8,0x01] +0x05,0x00,0x22,0xd6,0x7f,0xf8,0xa8,0x01 -# W32: v_sub_co_ci_u32_e64 v5, s12, null, exec_hi, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0x7c,0xfe,0x18,0x00] -# W64: v_sub_co_ci_u32_e64 v5, s[12:13], null, exec_hi, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0x7c,0xfe,0x18,0x00] -0x05,0x0c,0x21,0xd5,0x7c,0xfe,0x18,0x00 +# GFX11: v_sad_u8 v5, null, exec_lo, 0xaf123456 ; encoding: [0x05,0x00,0x22,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] +0x05,0x00,0x22,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf -# W32: v_sub_co_ci_u32_e64 v5, s104, -1, m0, s104 ; encoding: [0x05,0x68,0x21,0xd5,0xc1,0xfa,0xa0,0x01] -# W64: v_sub_co_ci_u32_e64 v5, s[104:105], -1, m0, s[104:105] ; encoding: [0x05,0x68,0x21,0xd5,0xc1,0xfa,0xa0,0x01] -0x05,0x68,0x21,0xd5,0xc1,0xfa,0xa0,0x01 +# GFX11: v_sad_u8 v5, -1, exec_hi, src_scc ; encoding: [0x05,0x00,0x22,0xd6,0xc1,0xfe,0xf4,0x03] +0x05,0x00,0x22,0xd6,0xc1,0xfe,0xf4,0x03 -# W32: v_sub_co_ci_u32_e64 v5, vcc_lo, 0.5, vcc_lo, vcc_lo ; encoding: [0x05,0x6a,0x21,0xd5,0xf0,0xd4,0xa8,0x01] -# W64: v_sub_co_ci_u32_e64 v5, vcc, 0.5, vcc_lo, vcc ; encoding: [0x05,0x6a,0x21,0xd5,0xf0,0xd4,0xa8,0x01] -0x05,0x6a,0x21,0xd5,0xf0,0xd4,0xa8,0x01 +# GFX11: v_sad_u8 v5, 0.5, m0, 0.5 ; encoding: [0x05,0x00,0x22,0xd6,0xf0,0xfa,0xc0,0x03] +0x05,0x00,0x22,0xd6,0xf0,0xfa,0xc0,0x03 -# W32: v_sub_co_ci_u32_e64 v5, ttmp14, src_scc, null, ttmp14 ; encoding: [0x05,0x7a,0x21,0xd5,0xfd,0xf8,0xe8,0x01] -# W64: v_sub_co_ci_u32_e64 v5, ttmp[14:15], src_scc, null, ttmp[14:15] ; encoding: [0x05,0x7a,0x21,0xd5,0xfd,0xf8,0xe8,0x01] -0x05,0x7a,0x21,0xd5,0xfd,0xf8,0xe8,0x01 +# GFX11: v_sad_u8 v5, src_scc, vcc_lo, -1 ; encoding: [0x05,0x00,0x22,0xd6,0xfd,0xd4,0x04,0x03] +0x05,0x00,0x22,0xd6,0xfd,0xd4,0x04,0x03 -# GFX11: v_sub_co_ci_u32_e64 v255, null, 0xaf123456, vcc_hi, null clamp ; encoding: [0xff,0xfc,0x21,0xd5,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -0xff,0xfc,0x21,0xd5,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf +# GFX11: v_sad_u8 v255, 0xaf123456, vcc_hi, null clamp ; encoding: [0xff,0x80,0x22,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +0xff,0x80,0x22,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf # W32: v_sub_co_u32 v5, s12, v1, v2 ; encoding: [0x05,0x0c,0x01,0xd7,0x01,0x05,0x02,0x00] # W64: v_sub_co_u32 v5, s[12:13], v1, v2 ; encoding: [0x05,0x0c,0x01,0xd7,0x01,0x05,0x02,0x00] @@ -9810,96 +5229,6 @@ # GFX11: v_sub_co_u32 v255, null, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0xfc,0x01,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] 0xff,0xfc,0x01,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_sub_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x33,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x33,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_sub_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x33,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x33,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_sub_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x33,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x33,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_sub_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x33,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x33,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_sub_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x33,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x33,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_sub_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x33,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x33,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 - -# GFX11: v_sub_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x33,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x33,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_sub_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x33,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x33,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_sub_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x33,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x33,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_sub_f16_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x33,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x33,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_sub_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x33,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x33,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_sub_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x33,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x33,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_sub_f16_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x33,0xd5,0xf0,0xfa,0x00,0x48] -0x05,0x00,0x33,0xd5,0xf0,0xfa,0x00,0x48 - -# GFX11: v_sub_f16_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x33,0xd5,0xfd,0xd4,0x00,0x30] -0x05,0x02,0x33,0xd5,0xfd,0xd4,0x00,0x30 - -# GFX11: v_sub_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x33,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] -0xff,0x83,0x33,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00 - -# GFX11: v_sub_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x04,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x04,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_sub_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x04,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x04,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_sub_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x04,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x04,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_sub_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x04,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x04,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_sub_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x04,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x04,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_sub_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x04,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x04,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_sub_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x04,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x04,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_sub_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x04,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x04,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_sub_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x04,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x04,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_sub_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x04,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x04,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_sub_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x04,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x04,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_sub_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x04,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x04,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_sub_f32_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x04,0xd5,0xf0,0xfa,0x00,0x48] -0x05,0x00,0x04,0xd5,0xf0,0xfa,0x00,0x48 - -# GFX11: v_sub_f32_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x04,0xd5,0xfd,0xd4,0x00,0x30] -0x05,0x02,0x04,0xd5,0xfd,0xd4,0x00,0x30 - -# GFX11: v_sub_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x04,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] -0xff,0x83,0x04,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf - # GFX11: v_sub_nc_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x0e,0xd7,0x01,0x05,0x02,0x00 @@ -10035,106 +5364,6 @@ # GFX11: v_sub_nc_u16 v255, 0xfe0b, vcc_hi op_sel:[0,0,1] clamp ; encoding: [0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0xc0,0x04,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 -# GFX11: v_sub_nc_u32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x26,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x26,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_sub_nc_u32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x26,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x26,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_sub_nc_u32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x26,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x26,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_sub_nc_u32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x26,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x26,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_sub_nc_u32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x26,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x26,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_sub_nc_u32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x26,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x26,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_sub_nc_u32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x26,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x26,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_sub_nc_u32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x26,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x26,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_sub_nc_u32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x26,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x26,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_sub_nc_u32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x26,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x26,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_sub_nc_u32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x26,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x26,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_sub_nc_u32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x26,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x26,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_sub_nc_u32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x26,0xd5,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x26,0xd5,0xf0,0xfa,0x00,0x00 - -# GFX11: v_sub_nc_u32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x26,0xd5,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x26,0xd5,0xfd,0xd4,0x00,0x00 - -# GFX11: v_sub_nc_u32_e64 v255, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0x80,0x26,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x80,0x26,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf - -# W32: v_subrev_co_ci_u32_e64 v5, s12, v1, 0xaf123456, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] -# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], v1, 0xaf123456, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] -0x05,0x0c,0x22,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf - -# W32: v_subrev_co_ci_u32_e64 v5, s12, v255, src_scc, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0xff,0xfb,0x19,0x00] -# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], v255, src_scc, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0xff,0xfb,0x19,0x00] -0x05,0x0c,0x22,0xd5,0xff,0xfb,0x19,0x00 - -# W32: v_subrev_co_ci_u32_e64 v5, s12, s105, s105, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0x69,0xd2,0x18,0x00] -# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], s105, s105, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0x69,0xd2,0x18,0x00] -0x05,0x0c,0x22,0xd5,0x69,0xd2,0x18,0x00 - -# W32: v_subrev_co_ci_u32_e64 v5, s12, vcc_lo, v2, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0x6a,0x04,0x1a,0x00] -# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], vcc_lo, v2, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0x6a,0x04,0x1a,0x00] -0x05,0x0c,0x22,0xd5,0x6a,0x04,0x1a,0x00 - -# W32: v_subrev_co_ci_u32_e64 v5, s12, vcc_hi, v255, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0x6b,0xfe,0x1b,0x00] -# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], vcc_hi, v255, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0x6b,0xfe,0x1b,0x00] -0x05,0x0c,0x22,0xd5,0x6b,0xfe,0x1b,0x00 - -# W32: v_subrev_co_ci_u32_e64 v5, s12, ttmp15, ttmp15, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0x7b,0xf6,0x18,0x00] -# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], ttmp15, ttmp15, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0x7b,0xf6,0x18,0x00] -0x05,0x0c,0x22,0xd5,0x7b,0xf6,0x18,0x00 - -# W32: v_subrev_co_ci_u32_e64 v5, s12, m0, 0.5, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0x7d,0xe0,0x19,0x00] -# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], m0, 0.5, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0x7d,0xe0,0x19,0x00] -0x05,0x0c,0x22,0xd5,0x7d,0xe0,0x19,0x00 - -# W32: v_subrev_co_ci_u32_e64 v5, s12, exec_lo, exec_lo, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0x7e,0xfc,0x18,0x00] -# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], exec_lo, exec_lo, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0x7e,0xfc,0x18,0x00] -0x05,0x0c,0x22,0xd5,0x7e,0xfc,0x18,0x00 - -# W32: v_subrev_co_ci_u32_e64 v5, s12, exec_hi, -1, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0x7f,0x82,0x19,0x00] -# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], exec_hi, -1, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0x7f,0x82,0x19,0x00] -0x05,0x0c,0x22,0xd5,0x7f,0x82,0x19,0x00 - -# W32: v_subrev_co_ci_u32_e64 v5, s12, null, exec_hi, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0x7c,0xfe,0x18,0x00] -# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], null, exec_hi, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0x7c,0xfe,0x18,0x00] -0x05,0x0c,0x22,0xd5,0x7c,0xfe,0x18,0x00 - -# W32: v_subrev_co_ci_u32_e64 v5, s104, -1, m0, s104 ; encoding: [0x05,0x68,0x22,0xd5,0xc1,0xfa,0xa0,0x01] -# W64: v_subrev_co_ci_u32_e64 v5, s[104:105], -1, m0, s[104:105] ; encoding: [0x05,0x68,0x22,0xd5,0xc1,0xfa,0xa0,0x01] -0x05,0x68,0x22,0xd5,0xc1,0xfa,0xa0,0x01 - -# W32: v_subrev_co_ci_u32_e64 v5, vcc_lo, 0.5, vcc_lo, vcc_lo ; encoding: [0x05,0x6a,0x22,0xd5,0xf0,0xd4,0xa8,0x01] -# W64: v_subrev_co_ci_u32_e64 v5, vcc, 0.5, vcc_lo, vcc ; encoding: [0x05,0x6a,0x22,0xd5,0xf0,0xd4,0xa8,0x01] -0x05,0x6a,0x22,0xd5,0xf0,0xd4,0xa8,0x01 - -# W32: v_subrev_co_ci_u32_e64 v5, ttmp14, src_scc, null, ttmp14 ; encoding: [0x05,0x7a,0x22,0xd5,0xfd,0xf8,0xe8,0x01] -# W64: v_subrev_co_ci_u32_e64 v5, ttmp[14:15], src_scc, null, ttmp[14:15] ; encoding: [0x05,0x7a,0x22,0xd5,0xfd,0xf8,0xe8,0x01] -0x05,0x7a,0x22,0xd5,0xfd,0xf8,0xe8,0x01 - -# GFX11: v_subrev_co_ci_u32_e64 v255, null, 0xaf123456, vcc_hi, null clamp ; encoding: [0xff,0xfc,0x22,0xd5,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] -0xff,0xfc,0x22,0xd5,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf - # W32: v_subrev_co_u32 v5, s12, v1, v2 ; encoding: [0x05,0x0c,0x02,0xd7,0x01,0x05,0x02,0x00] # W64: v_subrev_co_u32 v5, s[12:13], v1, v2 ; encoding: [0x05,0x0c,0x02,0xd7,0x01,0x05,0x02,0x00] 0x05,0x0c,0x02,0xd7,0x01,0x05,0x02,0x00 @@ -10194,141 +5423,6 @@ # GFX11: v_subrev_co_u32 v255, null, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0xfc,0x02,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] 0xff,0xfc,0x02,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf -# GFX11: v_subrev_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x34,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x34,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_subrev_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x34,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x34,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_subrev_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x34,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x34,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_subrev_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x34,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x34,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_subrev_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x34,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x34,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_subrev_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x34,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] -0x05,0x00,0x34,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 - -# GFX11: v_subrev_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x34,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x34,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_subrev_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x34,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x34,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_subrev_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x34,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x34,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_subrev_f16_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x34,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x34,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_subrev_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x34,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x34,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_subrev_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x34,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x34,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_subrev_f16_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x34,0xd5,0xf0,0xfa,0x00,0x48] -0x05,0x00,0x34,0xd5,0xf0,0xfa,0x00,0x48 - -# GFX11: v_subrev_f16_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x34,0xd5,0xfd,0xd4,0x00,0x30] -0x05,0x02,0x34,0xd5,0xfd,0xd4,0x00,0x30 - -# GFX11: v_subrev_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x34,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] -0xff,0x83,0x34,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00 - -# GFX11: v_subrev_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x05,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x05,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_subrev_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x05,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x05,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_subrev_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x05,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x05,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_subrev_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x05,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x05,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_subrev_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x05,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x05,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_subrev_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x05,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x05,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_subrev_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x05,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x05,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_subrev_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x05,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x05,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_subrev_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x05,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x05,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_subrev_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x05,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x01,0x05,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_subrev_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x05,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x05,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_subrev_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x05,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x05,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_subrev_f32_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x05,0xd5,0xf0,0xfa,0x00,0x48] -0x05,0x00,0x05,0xd5,0xf0,0xfa,0x00,0x48 - -# GFX11: v_subrev_f32_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x05,0xd5,0xfd,0xd4,0x00,0x30] -0x05,0x02,0x05,0xd5,0xfd,0xd4,0x00,0x30 - -# GFX11: v_subrev_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x05,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] -0xff,0x83,0x05,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf - -# GFX11: v_subrev_nc_u32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x27,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x27,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_subrev_nc_u32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x27,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x27,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_subrev_nc_u32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x27,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x27,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_subrev_nc_u32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x27,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x27,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_subrev_nc_u32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x27,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x27,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_subrev_nc_u32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x27,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x27,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_subrev_nc_u32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x27,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x27,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_subrev_nc_u32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x27,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x27,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_subrev_nc_u32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x27,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x27,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_subrev_nc_u32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x27,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x27,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_subrev_nc_u32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x27,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x27,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_subrev_nc_u32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x27,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x27,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_subrev_nc_u32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x27,0xd5,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x27,0xd5,0xf0,0xfa,0x00,0x00 - -# GFX11: v_subrev_nc_u32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x27,0xd5,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x27,0xd5,0xfd,0xd4,0x00,0x00 - -# GFX11: v_subrev_nc_u32_e64 v255, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0x80,0x27,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x80,0x27,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf - # GFX11: v_trig_preop_f64 v[5:6], v[1:2], v2 ; encoding: [0x05,0x00,0x2f,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x2f,0xd7,0x01,0x05,0x02,0x00 @@ -10374,132 +5468,6 @@ # GFX11: v_trig_preop_f64 v[254:255], 0xaf123456, 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0x2f,0xd7,0xff,0xfe,0x01,0x18,0x56,0x34,0x12,0xaf] 0xfe,0x80,0x2f,0xd7,0xff,0xfe,0x01,0x18,0x56,0x34,0x12,0xaf -# GFX11: v_trunc_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xdd,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xdd,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_trunc_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xdd,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xdd,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_trunc_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xdd,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xdd,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_trunc_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xdd,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xdd,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_trunc_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xdd,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xdd,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_trunc_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xdd,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xdd,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_trunc_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xdd,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xdd,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_trunc_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xdd,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xdd,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_trunc_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xdd,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xdd,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_trunc_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xdd,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xdd,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_trunc_f16_e64 v5, null ; encoding: [0x05,0x00,0xdd,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xdd,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_trunc_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xdd,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xdd,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_trunc_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xdd,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xdd,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_trunc_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xdd,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xdd,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_trunc_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xdd,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] -0xff,0x81,0xdd,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 - -# GFX11: v_trunc_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xa1,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0xa1,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_trunc_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xa1,0xd5,0xff,0x01,0x00,0x00] -0x05,0x00,0xa1,0xd5,0xff,0x01,0x00,0x00 - -# GFX11: v_trunc_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xa1,0xd5,0x01,0x00,0x00,0x00] -0x05,0x00,0xa1,0xd5,0x01,0x00,0x00,0x00 - -# GFX11: v_trunc_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xa1,0xd5,0x69,0x00,0x00,0x00] -0x05,0x00,0xa1,0xd5,0x69,0x00,0x00,0x00 - -# GFX11: v_trunc_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xa1,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0xa1,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_trunc_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xa1,0xd5,0x6b,0x00,0x00,0x00] -0x05,0x00,0xa1,0xd5,0x6b,0x00,0x00,0x00 - -# GFX11: v_trunc_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xa1,0xd5,0x7b,0x00,0x00,0x00] -0x05,0x00,0xa1,0xd5,0x7b,0x00,0x00,0x00 - -# GFX11: v_trunc_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xa1,0xd5,0x7d,0x00,0x00,0x00] -0x05,0x00,0xa1,0xd5,0x7d,0x00,0x00,0x00 - -# GFX11: v_trunc_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xa1,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0xa1,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_trunc_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xa1,0xd5,0x7f,0x00,0x00,0x00] -0x05,0x00,0xa1,0xd5,0x7f,0x00,0x00,0x00 - -# GFX11: v_trunc_f32_e64 v5, null ; encoding: [0x05,0x00,0xa1,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0xa1,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_trunc_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xa1,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0xa1,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_trunc_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xa1,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0xa1,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_trunc_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xa1,0xd5,0xfd,0x00,0x00,0x10] -0x05,0x00,0xa1,0xd5,0xfd,0x00,0x00,0x10 - -# GFX11: v_trunc_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xa1,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] -0xff,0x81,0xa1,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf - -# GFX11: v_trunc_f64_e64 v[5:6], v[1:2] ; encoding: [0x05,0x00,0x97,0xd5,0x01,0x01,0x00,0x00] -0x05,0x00,0x97,0xd5,0x01,0x01,0x00,0x00 - -# GFX11: v_trunc_f64_e64 v[5:6], v[254:255] ; encoding: [0x05,0x00,0x97,0xd5,0xfe,0x01,0x00,0x00] -0x05,0x00,0x97,0xd5,0xfe,0x01,0x00,0x00 - -# GFX11: v_trunc_f64_e64 v[5:6], s[2:3] ; encoding: [0x05,0x00,0x97,0xd5,0x02,0x00,0x00,0x00] -0x05,0x00,0x97,0xd5,0x02,0x00,0x00,0x00 - -# GFX11: v_trunc_f64_e64 v[5:6], s[104:105] ; encoding: [0x05,0x00,0x97,0xd5,0x68,0x00,0x00,0x00] -0x05,0x00,0x97,0xd5,0x68,0x00,0x00,0x00 - -# GFX11: v_trunc_f64_e64 v[5:6], vcc ; encoding: [0x05,0x00,0x97,0xd5,0x6a,0x00,0x00,0x00] -0x05,0x00,0x97,0xd5,0x6a,0x00,0x00,0x00 - -# GFX11: v_trunc_f64_e64 v[5:6], ttmp[14:15] ; encoding: [0x05,0x00,0x97,0xd5,0x7a,0x00,0x00,0x00] -0x05,0x00,0x97,0xd5,0x7a,0x00,0x00,0x00 - -# GFX11: v_trunc_f64_e64 v[5:6], exec ; encoding: [0x05,0x00,0x97,0xd5,0x7e,0x00,0x00,0x00] -0x05,0x00,0x97,0xd5,0x7e,0x00,0x00,0x00 - -# GFX11: v_trunc_f64_e64 v[5:6], null ; encoding: [0x05,0x00,0x97,0xd5,0x7c,0x00,0x00,0x00] -0x05,0x00,0x97,0xd5,0x7c,0x00,0x00,0x00 - -# GFX11: v_trunc_f64_e64 v[5:6], -1 ; encoding: [0x05,0x00,0x97,0xd5,0xc1,0x00,0x00,0x00] -0x05,0x00,0x97,0xd5,0xc1,0x00,0x00,0x00 - -# GFX11: v_trunc_f64_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0x97,0xd5,0xf0,0x00,0x00,0x08] -0x05,0x00,0x97,0xd5,0xf0,0x00,0x00,0x08 - -# GFX11: v_trunc_f64_e64 v[5:6], -|src_scc| mul:4 ; encoding: [0x05,0x01,0x97,0xd5,0xfd,0x00,0x00,0x30] -0x05,0x01,0x97,0xd5,0xfd,0x00,0x00,0x30 - -# GFX11: v_trunc_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0x97,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] -0xfe,0x80,0x97,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf - # GFX11: v_writelane_b32 v5, s1, s2 ; encoding: [0x05,0x00,0x61,0xd7,0x01,0x04,0x00,0x00] 0x05,0x00,0x61,0xd7,0x01,0x04,0x00,0x00 @@ -10584,51 +5552,6 @@ # GFX11: v_xad_u32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x45,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] 0xff,0x00,0x45,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf -# GFX11: v_xnor_b32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x1e,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x1e,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_xnor_b32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x1e,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x1e,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_xnor_b32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x1e,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x1e,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_xnor_b32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x1e,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x1e,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_xnor_b32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x1e,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x1e,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_xnor_b32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x1e,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x1e,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_xnor_b32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x1e,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x1e,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_xnor_b32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x1e,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x1e,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_xnor_b32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x1e,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x1e,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_xnor_b32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x1e,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x1e,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_xnor_b32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x1e,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x1e,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_xnor_b32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x1e,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x1e,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_xnor_b32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x1e,0xd5,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x1e,0xd5,0xf0,0xfa,0x00,0x00 - -# GFX11: v_xnor_b32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x1e,0xd5,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x1e,0xd5,0xfd,0xd4,0x00,0x00 - -# GFX11: v_xnor_b32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x1e,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0x1e,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf - # GFX11: v_xor3_b32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x40,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x40,0xd6,0x01,0x05,0x0e,0x00 @@ -10718,48 +5641,3 @@ # GFX11: v_xor_b16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x64,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0xff,0x00,0x64,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00 - -# GFX11: v_xor_b32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x1d,0xd5,0x01,0x05,0x02,0x00] -0x05,0x00,0x1d,0xd5,0x01,0x05,0x02,0x00 - -# GFX11: v_xor_b32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x1d,0xd5,0xff,0xff,0x03,0x00] -0x05,0x00,0x1d,0xd5,0xff,0xff,0x03,0x00 - -# GFX11: v_xor_b32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x1d,0xd5,0x01,0x04,0x00,0x00] -0x05,0x00,0x1d,0xd5,0x01,0x04,0x00,0x00 - -# GFX11: v_xor_b32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x1d,0xd5,0x69,0xd2,0x00,0x00] -0x05,0x00,0x1d,0xd5,0x69,0xd2,0x00,0x00 - -# GFX11: v_xor_b32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x1d,0xd5,0x6a,0xf6,0x00,0x00] -0x05,0x00,0x1d,0xd5,0x6a,0xf6,0x00,0x00 - -# GFX11: v_xor_b32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x1d,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] -0x05,0x00,0x1d,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf - -# GFX11: v_xor_b32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x1d,0xd5,0x7b,0xfa,0x01,0x00] -0x05,0x00,0x1d,0xd5,0x7b,0xfa,0x01,0x00 - -# GFX11: v_xor_b32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x1d,0xd5,0x7d,0xe0,0x01,0x00] -0x05,0x00,0x1d,0xd5,0x7d,0xe0,0x01,0x00 - -# GFX11: v_xor_b32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x1d,0xd5,0x7e,0x82,0x01,0x00] -0x05,0x00,0x1d,0xd5,0x7e,0x82,0x01,0x00 - -# GFX11: v_xor_b32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x1d,0xd5,0x7f,0xf8,0x00,0x00] -0x05,0x00,0x1d,0xd5,0x7f,0xf8,0x00,0x00 - -# GFX11: v_xor_b32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x1d,0xd5,0x7c,0xfc,0x00,0x00] -0x05,0x00,0x1d,0xd5,0x7c,0xfc,0x00,0x00 - -# GFX11: v_xor_b32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x1d,0xd5,0xc1,0xfe,0x00,0x00] -0x05,0x00,0x1d,0xd5,0xc1,0xfe,0x00,0x00 - -# GFX11: v_xor_b32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x1d,0xd5,0xf0,0xfa,0x00,0x00] -0x05,0x00,0x1d,0xd5,0xf0,0xfa,0x00,0x00 - -# GFX11: v_xor_b32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x1d,0xd5,0xfd,0xd4,0x00,0x00] -0x05,0x00,0x1d,0xd5,0xfd,0xd4,0x00,0x00 - -# GFX11: v_xor_b32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x1d,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -0xff,0x00,0x1d,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt index 2f9f85f130236..3dd7727a3dabd 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt @@ -1,5 +1,5 @@ # RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32 %s -# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64 %s +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64 %s # GFX11: v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff @@ -43,61 +43,6 @@ # GFX11: v_add3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x55,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x55,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] -# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] -0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff - -# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] -# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] -0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff - -# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] -# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] -0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff - -# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] -# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] -0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff - -# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] -# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] -0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff - -# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] -# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] -0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff - -# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] -# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] -0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff - -# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] -# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] -0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff - -# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] -# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] -0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff - -# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] -# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] -0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff - -# W32: v_add_co_ci_u32_e64_dpp v5, s104, v1, v2, s104 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x20,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] -# W64: v_add_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x20,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] -0x05,0x68,0x20,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff - -# W32: v_add_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x20,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] -# W64: v_add_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x20,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] -0x05,0x6a,0x20,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01 - -# W32: v_add_co_ci_u32_e64_dpp v5, ttmp14, v1, v2, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x7a,0x20,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] -# W64: v_add_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x7a,0x20,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] -0x05,0x7a,0x20,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13 - -# GFX11: v_add_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xfc,0x20,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] -0xff,0xfc,0x20,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30 - # W32: v_add_co_u32_e64_dpp v5, s12, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] # W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff @@ -153,90 +98,6 @@ # GFX11: v_add_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xfc,0x00,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0xfc,0x00,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_add_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_add_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_add_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_add_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_add_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_add_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_add_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_add_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_add_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_add_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_add_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_add_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x32,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] -0x05,0x01,0x32,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 - -# GFX11: v_add_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x32,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] -0x05,0x02,0x32,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 - -# GFX11: v_add_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x32,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] -0xff,0x83,0x32,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 - -# GFX11: v_add_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_add_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_add_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_add_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_add_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_add_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_add_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_add_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_add_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_add_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_add_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_add_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x03,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] -0x05,0x01,0x03,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 - -# GFX11: v_add_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x03,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] -0x05,0x02,0x03,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 - -# GFX11: v_add_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x03,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] -0xff,0x83,0x03,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 - # GFX11: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff @@ -321,48 +182,6 @@ # GFX11: v_add_nc_i32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x26,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0x80,0x26,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_add_nc_u32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x25,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x80,0x25,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - # GFX11: v_alignbit_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff @@ -489,48 +308,6 @@ # GFX11: v_and_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_and_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_and_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_and_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1b,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x1b,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - # GFX11: v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff @@ -615,48 +392,6 @@ # GFX11: v_ashrrev_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x3a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x3a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_ashrrev_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1a,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x1a,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - # GFX11: v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff @@ -867,216 +602,6 @@ # GFX11: v_bfm_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x1d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_bfrev_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_bfrev_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_bfrev_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 - -# GFX11: v_ceil_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_ceil_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_ceil_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_ceil_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_ceil_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_ceil_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_ceil_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_ceil_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_ceil_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_ceil_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_ceil_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_ceil_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_ceil_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_ceil_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xdc,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xdc,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_ceil_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_ceil_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_ceil_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_ceil_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_ceil_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_ceil_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_ceil_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_ceil_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_ceil_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_ceil_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_ceil_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_ceil_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_ceil_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_ceil_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xa2,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xa2,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cls_i32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cls_i32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cls_i32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cls_i32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cls_i32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cls_i32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cls_i32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_cls_i32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_cls_i32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_cls_i32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_cls_i32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_cls_i32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_cls_i32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_cls_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 - -# GFX11: v_clz_i32_u32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_clz_i32_u32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_clz_i32_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 - # W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] # W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff @@ -1138,193 +663,6 @@ # GFX11: v_cndmask_b16_e64_dpp v255, v255, v255, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x5d,0xd6,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30 -# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] -# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff - -# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] -# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff - -# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] -# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff - -# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] -# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff - -# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] -# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff - -# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] -# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff - -# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] -# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff - -# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] -# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff - -# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] -# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff - -# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] -# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff - -# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s104 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] -# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] -0x05,0x00,0x01,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff - -# W32: v_cndmask_b32_e64_dpp v5, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] -# W64: v_cndmask_b32_e64_dpp v5, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] -0x05,0x00,0x01,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01 - -# W32: v_cndmask_b32_e64_dpp v5, v1, v2, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] -# W64: v_cndmask_b32_e64_dpp v5, v1, v2, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] -0x05,0x00,0x01,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13 - -# GFX11: v_cndmask_b32_e64_dpp v255, v255, v255, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x01,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x01,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cndmask_b32_e64_dpp v5, -v1, |v2|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x05,0x02,0x01,0xd5,0xfa,0x04,0xf2,0x21,0x01,0x6f,0x0d,0x30] -0x05,0x02,0x01,0xd5,0xfa,0x04,0xf2,0x21,0x01,0x6f,0x0d,0x30 - -# GFX11: v_cndmask_b32_e64_dpp v5, |v1|, -v2, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x05,0x01,0x01,0xd5,0xfa,0x04,0xf2,0x41,0x01,0x6f,0x0d,0x30] -0x05,0x01,0x01,0xd5,0xfa,0x04,0xf2,0x41,0x01,0x6f,0x0d,0x30 - -# GFX11: v_cos_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cos_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cos_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cos_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cos_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cos_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cos_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_cos_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_cos_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_cos_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_cos_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_cos_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_cos_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_cos_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xe1,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xe1,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cos_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cos_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cos_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cos_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cos_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cos_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cos_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_cos_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_cos_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_cos_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_cos_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_cos_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_cos_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_cos_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xb6,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xb6,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_ctz_i32_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 - # GFX11: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff @@ -1493,4709 +831,1895 @@ # GFX11: v_cubetc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x0e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] 0xff,0x87,0x0e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 -# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 +# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x06,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +0x05,0x01,0x06,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 -# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 +# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x06,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +0x05,0x02,0x06,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 -# GFX11: v_cvt_f16_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 +# GFX11: v_cvt_pk_i16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x06,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +0xff,0x03,0x06,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 -# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 +# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 +# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX11: v_cvt_f16_i16_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0xd1,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] -0xff,0x80,0xd1,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30 +# GFX11: v_cvt_pk_i16_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x24,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x24,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 +# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x07,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +0x05,0x01,0x07,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 -# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 +# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x07,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +0x05,0x02,0x07,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 -# GFX11: v_cvt_f16_u16_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0xd0,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] -0xff,0x80,0xd0,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30 +# GFX11: v_cvt_pk_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x07,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +0xff,0x03,0x07,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 -# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 +# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 +# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX11: v_cvt_f32_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 +# GFX11: v_cvt_pk_u16_u32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x23,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x23,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x26,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x26,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x26,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +0x05,0x00,0x26,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +0x05,0x00,0x26,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +0x05,0x00,0x26,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 +# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +0x05,0x00,0x26,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 +# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] +0x05,0x00,0x26,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX11: v_cvt_f32_i32_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x85,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] -0xff,0x80,0x85,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30 +# GFX11: v_cvt_pk_u8_f32_e64_dpp v255, -|v255|, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x01,0x26,0xd6,0xfa,0xfe,0xf7,0x23,0xff,0x6f,0x0d,0x30] +0xff,0x01,0x26,0xd6,0xfa,0xfe,0xf7,0x23,0xff,0x6f,0x0d,0x30 -# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 +# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x21,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +0x05,0x01,0x21,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 -# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 +# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x21,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +0x05,0x02,0x21,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 -# GFX11: v_cvt_f32_u32_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x86,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] -0xff,0x80,0x86,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30 +# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x21,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +0xff,0x03,0x21,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 -# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 +# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x22,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +0x05,0x01,0x22,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 -# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 +# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x22,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +0x05,0x02,0x22,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 -# GFX11: v_cvt_f32_ubyte0_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x91,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] -0xff,0x80,0x91,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30 +# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x22,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +0xff,0x03,0x22,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 -# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX11: v_fma_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX11: v_fma_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x13,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX11: v_fma_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x13,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_fma_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x13,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_fma_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x13,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_fma_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x13,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +0x05,0x01,0x13,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff -# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_fma_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x13,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +0x05,0x02,0x13,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_fma_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x13,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +0x05,0x04,0x13,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_fma_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x13,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +0x05,0x03,0x13,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 +# GFX11: v_fma_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x13,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +0x05,0x05,0x13,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 -# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 +# GFX11: v_fma_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x13,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] +0x05,0x06,0x13,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 -# GFX11: v_cvt_f32_ubyte1_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x92,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] -0xff,0x80,0x92,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30 +# GFX11: v_fma_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x13,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] +0xff,0x87,0x13,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 -# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX11: v_ldexp_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX11: v_ldexp_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX11: v_ldexp_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_ldexp_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_ldexp_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_ldexp_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_ldexp_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_ldexp_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_ldexp_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 +# GFX11: v_ldexp_f32_e64_dpp v5, v1, v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x08,0x01,0x5f,0x01,0x01 -# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 +# GFX11: v_ldexp_f32_e64_dpp v5, v1, v2 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x10,0x01,0x60,0x01,0x13 -# GFX11: v_cvt_f32_ubyte2_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x93,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] -0xff,0x80,0x93,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30 +# GFX11: v_ldexp_f32_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x1c,0xd7,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0x1c,0xd7,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x0d,0x30 -# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX11: v_lerp_u8_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX11: v_lerp_u8_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x15,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX11: v_lerp_u8_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x15,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_lerp_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x15,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_lerp_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x15,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_lerp_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +0x05,0x00,0x15,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_lerp_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +0x05,0x00,0x15,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_lerp_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +0x05,0x00,0x15,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_lerp_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +0x05,0x00,0x15,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 +# GFX11: v_lerp_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +0x05,0x00,0x15,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 +# GFX11: v_lerp_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] +0x05,0x00,0x15,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX11: v_cvt_f32_ubyte3_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x94,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] -0xff,0x80,0x94,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30 +# GFX11: v_lerp_u8_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x15,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x15,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX11: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX11: v_lshl_add_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x46,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX11: v_lshl_add_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x46,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x46,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x46,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_lshl_add_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +0x05,0x00,0x46,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_lshl_add_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +0x05,0x00,0x46,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_lshl_add_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +0x05,0x00,0x46,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_lshl_add_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +0x05,0x00,0x46,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 +# GFX11: v_lshl_add_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +0x05,0x00,0x46,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 +# GFX11: v_lshl_add_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] +0x05,0x00,0x46,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX11: v_cvt_floor_i32_f32_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x01,0x8d,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] -0xff,0x01,0x8d,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 +# GFX11: v_lshl_add_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x46,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x46,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX11: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX11: v_lshl_or_b32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x56,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX11: v_lshl_or_b32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x56,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x56,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x56,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_lshl_or_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +0x05,0x00,0x56,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_lshl_or_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +0x05,0x00,0x56,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_lshl_or_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +0x05,0x00,0x56,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_lshl_or_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +0x05,0x00,0x56,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 +# GFX11: v_lshl_or_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +0x05,0x00,0x56,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 +# GFX11: v_lshl_or_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] +0x05,0x00,0x56,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX11: v_cvt_i16_f16_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xd3,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xd3,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 +# GFX11: v_lshl_or_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x56,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x56,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_lshlrev_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_lshlrev_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX11: v_lshlrev_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX11: v_lshlrev_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX11: v_lshlrev_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_lshlrev_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_lshlrev_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_lshlrev_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_lshlrev_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_lshlrev_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_lshlrev_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 +# GFX11: v_lshlrev_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 +# GFX11: v_lshlrev_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX11: v_cvt_i32_f32_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x88,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] -0xff,0x81,0x88,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 +# GFX11: v_lshlrev_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x38,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x38,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_lshrrev_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_lshrrev_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX11: v_lshrrev_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX11: v_lshrrev_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX11: v_lshrrev_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_lshrrev_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_lshrrev_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_lshrrev_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_lshrrev_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_lshrrev_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_lshrrev_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 +# GFX11: v_lshrrev_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 +# GFX11: v_lshrrev_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX11: v_cvt_i32_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 +# GFX11: v_lshrrev_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x39,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x39,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX11: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX11: v_mad_i32_i24_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX11: v_mad_i32_i24_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x0a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x0a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x0a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_mad_i32_i24_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +0x05,0x00,0x0a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_mad_i32_i24_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_mad_i32_i24_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_mad_i32_i24_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +0x05,0x00,0x0a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 +# GFX11: v_mad_i32_i24_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +0x05,0x00,0x0a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 +# GFX11: v_mad_i32_i24_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] +0x05,0x00,0x0a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX11: v_cvt_nearest_i32_f32_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x01,0x8c,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] -0xff,0x01,0x8c,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 +# GFX11: v_mad_i32_i24_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x0a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +0xff,0x80,0x0a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX11: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX11: v_mad_u32_u24_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX11: v_mad_u32_u24_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x0b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x0b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x0b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_mad_u32_u24_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +0x05,0x00,0x0b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_mad_u32_u24_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_mad_u32_u24_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_mad_u32_u24_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +0x05,0x00,0x0b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 +# GFX11: v_mad_u32_u24_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +0x05,0x00,0x0b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 +# GFX11: v_mad_u32_u24_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] +0x05,0x00,0x0b,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX11: v_cvt_norm_i16_f16_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x01,0xe3,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] -0xff,0x01,0xe3,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 +# GFX11: v_mad_u32_u24_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x0b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +0xff,0x80,0x0b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_max3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x1c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_max3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x1c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX11: v_max3_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x1c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX11: v_max3_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x1c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX11: v_max3_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x1c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_max3_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x1c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_max3_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x1c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_max3_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x1c,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +0x05,0x01,0x1c,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff -# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_max3_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x1c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +0x05,0x02,0x1c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_max3_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x1c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +0x05,0x04,0x1c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_max3_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x1c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +0x05,0x03,0x1c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 +# GFX11: v_max3_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x1c,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +0x05,0x05,0x1c,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 -# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 +# GFX11: v_max3_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x1c,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] +0x05,0x06,0x1c,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 -# GFX11: v_cvt_norm_u16_f16_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x01,0xe4,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] -0xff,0x01,0xe4,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 +# GFX11: v_max3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x1c,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] +0xff,0x87,0x1c,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 -# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX11: v_max3_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX11: v_max3_i32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX11: v_max3_i32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x1d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_max3_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x1d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_max3_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x1d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_max3_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +0x05,0x00,0x1d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_max3_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_max3_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_max3_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +0x05,0x00,0x1d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 +# GFX11: v_max3_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +0x05,0x00,0x1d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 +# GFX11: v_max3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] +0x05,0x00,0x1d,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX11: v_cvt_off_f32_i4_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x8e,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] -0xff,0x80,0x8e,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30 +# GFX11: v_max3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x1d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX11: v_max3_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX11: v_max3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX11: v_max3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x1e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_max3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x1e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX11: v_max3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x1e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_max3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +0x05,0x00,0x1e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX11: v_max3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_max3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX11: v_max3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +0x05,0x00,0x1e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x06,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] -0x05,0x01,0x06,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 +# GFX11: v_max3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +0x05,0x00,0x1e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX11: v_cvt_pk_i16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x06,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] -0x05,0x02,0x06,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 +# GFX11: v_max3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] +0x05,0x00,0x1e,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX11: v_cvt_pk_i16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x06,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -0xff,0x03,0x06,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_cvt_pk_i16_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x24,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x24,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x2f,0xd5,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] -0x05,0x01,0x2f,0xd5,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 - -# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x2f,0xd5,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] -0x05,0x02,0x2f,0xd5,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 - -# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x2f,0xd5,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -0xff,0x83,0x2f,0xd5,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x07,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] -0x05,0x01,0x07,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 - -# GFX11: v_cvt_pk_u16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x07,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] -0x05,0x02,0x07,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 - -# GFX11: v_cvt_pk_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x07,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -0xff,0x03,0x07,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_cvt_pk_u16_u32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x23,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x23,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff - -# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff - -# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff - -# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff - -# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -0x05,0x00,0x26,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff - -# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -0x05,0x00,0x26,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff - -# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -0x05,0x00,0x26,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff - -# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] -0x05,0x00,0x26,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff - -# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] -0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff - -# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] -0x05,0x00,0x26,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff - -# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] -0x05,0x00,0x26,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff - -# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] -0x05,0x00,0x26,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 - -# GFX11: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] -0x05,0x00,0x26,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 - -# GFX11: v_cvt_pk_u8_f32_e64_dpp v255, -|v255|, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x01,0x26,0xd6,0xfa,0xfe,0xf7,0x23,0xff,0x6f,0x0d,0x30] -0xff,0x01,0x26,0xd6,0xfa,0xfe,0xf7,0x23,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x21,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] -0x05,0x01,0x21,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 - -# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x21,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] -0x05,0x02,0x21,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 - -# GFX11: v_cvt_pknorm_i16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x21,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -0xff,0x03,0x21,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x22,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] -0x05,0x01,0x22,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 - -# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x22,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] -0x05,0x02,0x22,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 - -# GFX11: v_cvt_pknorm_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x22,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -0xff,0x03,0x22,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_cvt_u16_f16_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xd2,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xd2,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_cvt_u32_f32_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x87,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] -0xff,0x81,0x87,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 - -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_cvt_u32_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 - -# GFX11: v_exp_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_exp_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_exp_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_exp_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_exp_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_exp_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_exp_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_exp_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_exp_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_exp_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_exp_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_exp_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_exp_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_exp_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xd8,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xd8,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_exp_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_exp_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_exp_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_exp_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_exp_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_exp_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_exp_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_exp_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_exp_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_exp_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_exp_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_exp_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_exp_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_exp_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xa5,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xa5,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_floor_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_floor_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_floor_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_floor_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_floor_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_floor_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_floor_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_floor_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_floor_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_floor_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_floor_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_floor_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_floor_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_floor_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xdb,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xdb,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_floor_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_floor_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_floor_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_floor_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_floor_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_floor_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_floor_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_floor_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_floor_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_floor_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_floor_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_floor_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_floor_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_floor_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xa4,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xa4,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff - -# GFX11: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff - -# GFX11: v_fma_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff - -# GFX11: v_fma_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -0x05,0x00,0x13,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff - -# GFX11: v_fma_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -0x05,0x00,0x13,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff - -# GFX11: v_fma_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -0x05,0x00,0x13,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff - -# GFX11: v_fma_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -0x05,0x00,0x13,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff - -# GFX11: v_fma_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x13,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -0x05,0x01,0x13,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff - -# GFX11: v_fma_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x13,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -0x05,0x02,0x13,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff - -# GFX11: v_fma_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x13,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -0x05,0x04,0x13,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff - -# GFX11: v_fma_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x13,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -0x05,0x03,0x13,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff - -# GFX11: v_fma_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x13,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] -0x05,0x05,0x13,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 - -# GFX11: v_fma_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x13,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] -0x05,0x06,0x13,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 - -# GFX11: v_fma_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x13,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] -0xff,0x87,0x13,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 - -# GFX11: v_fract_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_fract_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_fract_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_fract_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_fract_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_fract_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_fract_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_fract_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_fract_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_fract_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_fract_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_fract_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_fract_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_fract_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xdf,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xdf,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_fract_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_fract_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_fract_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_fract_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_fract_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_fract_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_fract_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_fract_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_fract_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_fract_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_fract_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_fract_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_fract_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_fract_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xa0,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xa0,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_frexp_exp_i16_f16_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x01,0xda,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] -0xff,0x01,0xda,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 - -# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_frexp_exp_i32_f32_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x01,0xbf,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] -0xff,0x01,0xbf,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 - -# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_frexp_mant_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xd9,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xd9,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_frexp_mant_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xc0,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xc0,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_ldexp_f16_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x3b,0xd5,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0x3b,0xd5,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_ldexp_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_ldexp_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_ldexp_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_ldexp_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_ldexp_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_ldexp_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_ldexp_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_ldexp_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_ldexp_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_ldexp_f32_e64_dpp v5, v1, v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_ldexp_f32_e64_dpp v5, v1, v2 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_ldexp_f32_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x1c,0xd7,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0x1c,0xd7,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff - -# GFX11: v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff - -# GFX11: v_lerp_u8_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff - -# GFX11: v_lerp_u8_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -0x05,0x00,0x15,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff - -# GFX11: v_lerp_u8_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -0x05,0x00,0x15,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff - -# GFX11: v_lerp_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -0x05,0x00,0x15,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff - -# GFX11: v_lerp_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -0x05,0x00,0x15,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff - -# GFX11: v_lerp_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] -0x05,0x00,0x15,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff - -# GFX11: v_lerp_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] -0x05,0x00,0x15,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff - -# GFX11: v_lerp_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] -0x05,0x00,0x15,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff - -# GFX11: v_lerp_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] -0x05,0x00,0x15,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff - -# GFX11: v_lerp_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] -0x05,0x00,0x15,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 - -# GFX11: v_lerp_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] -0x05,0x00,0x15,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 - -# GFX11: v_lerp_u8_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x15,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x15,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 - -# GFX11: v_log_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_log_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_log_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_log_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_log_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_log_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_log_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_log_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_log_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_log_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_log_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_log_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_log_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_log_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xd7,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xd7,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_log_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_log_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_log_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_log_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_log_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_log_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_log_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_log_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_log_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_log_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_log_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_log_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_log_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_log_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xa7,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xa7,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff - -# GFX11: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff - -# GFX11: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff - -# GFX11: v_lshl_add_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -0x05,0x00,0x46,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff - -# GFX11: v_lshl_add_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -0x05,0x00,0x46,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff - -# GFX11: v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -0x05,0x00,0x46,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff - -# GFX11: v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -0x05,0x00,0x46,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff - -# GFX11: v_lshl_add_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] -0x05,0x00,0x46,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff - -# GFX11: v_lshl_add_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] -0x05,0x00,0x46,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff - -# GFX11: v_lshl_add_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] -0x05,0x00,0x46,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff - -# GFX11: v_lshl_add_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] -0x05,0x00,0x46,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff - -# GFX11: v_lshl_add_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] -0x05,0x00,0x46,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 - -# GFX11: v_lshl_add_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] -0x05,0x00,0x46,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 - -# GFX11: v_lshl_add_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x46,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x46,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 - -# GFX11: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff - -# GFX11: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff - -# GFX11: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff - -# GFX11: v_lshl_or_b32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -0x05,0x00,0x56,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff - -# GFX11: v_lshl_or_b32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -0x05,0x00,0x56,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff - -# GFX11: v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -0x05,0x00,0x56,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff - -# GFX11: v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -0x05,0x00,0x56,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff - -# GFX11: v_lshl_or_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] -0x05,0x00,0x56,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff - -# GFX11: v_lshl_or_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] -0x05,0x00,0x56,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff - -# GFX11: v_lshl_or_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] -0x05,0x00,0x56,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff - -# GFX11: v_lshl_or_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] -0x05,0x00,0x56,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff - -# GFX11: v_lshl_or_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] -0x05,0x00,0x56,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 - -# GFX11: v_lshl_or_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] -0x05,0x00,0x56,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 - -# GFX11: v_lshl_or_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x56,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x56,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 - -# GFX11: v_lshlrev_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_lshlrev_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_lshlrev_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_lshlrev_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_lshlrev_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_lshlrev_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_lshlrev_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_lshlrev_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_lshlrev_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_lshlrev_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_lshlrev_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_lshlrev_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_lshlrev_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_lshlrev_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x38,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x38,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - -# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_lshlrev_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x18,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x18,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - -# GFX11: v_lshrrev_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_lshrrev_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_lshrrev_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_lshrrev_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_lshrrev_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_lshrrev_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_lshrrev_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_lshrrev_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_lshrrev_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_lshrrev_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_lshrrev_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_lshrrev_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_lshrrev_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_lshrrev_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x39,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x39,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - -# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_lshrrev_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x19,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x19,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - -# GFX11: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff - -# GFX11: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff - -# GFX11: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff - -# GFX11: v_mad_i32_i24_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff - -# GFX11: v_mad_i32_i24_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -0x05,0x00,0x0a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff - -# GFX11: v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -0x05,0x00,0x0a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff - -# GFX11: v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -0x05,0x00,0x0a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff - -# GFX11: v_mad_i32_i24_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] -0x05,0x00,0x0a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff - -# GFX11: v_mad_i32_i24_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] -0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff - -# GFX11: v_mad_i32_i24_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] -0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff - -# GFX11: v_mad_i32_i24_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] -0x05,0x00,0x0a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff - -# GFX11: v_mad_i32_i24_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] -0x05,0x00,0x0a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 - -# GFX11: v_mad_i32_i24_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] -0x05,0x00,0x0a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 - -# GFX11: v_mad_i32_i24_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x0a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -0xff,0x80,0x0a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 - -# GFX11: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff - -# GFX11: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff - -# GFX11: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff - -# GFX11: v_mad_u32_u24_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff - -# GFX11: v_mad_u32_u24_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -0x05,0x00,0x0b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff - -# GFX11: v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -0x05,0x00,0x0b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff - -# GFX11: v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -0x05,0x00,0x0b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff - -# GFX11: v_mad_u32_u24_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] -0x05,0x00,0x0b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff - -# GFX11: v_mad_u32_u24_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] -0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff - -# GFX11: v_mad_u32_u24_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] -0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff - -# GFX11: v_mad_u32_u24_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] -0x05,0x00,0x0b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff - -# GFX11: v_mad_u32_u24_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] -0x05,0x00,0x0b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 - -# GFX11: v_mad_u32_u24_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] -0x05,0x00,0x0b,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 - -# GFX11: v_mad_u32_u24_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x0b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -0xff,0x80,0x0b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 - -# GFX11: v_max3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -0x05,0x00,0x1c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff - -# GFX11: v_max3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -0x05,0x00,0x1c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff - -# GFX11: v_max3_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -0x05,0x00,0x1c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff - -# GFX11: v_max3_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -0x05,0x00,0x1c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff - -# GFX11: v_max3_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -0x05,0x00,0x1c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff - -# GFX11: v_max3_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -0x05,0x00,0x1c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff - -# GFX11: v_max3_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -0x05,0x00,0x1c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff - -# GFX11: v_max3_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x1c,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -0x05,0x01,0x1c,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff - -# GFX11: v_max3_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x1c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -0x05,0x02,0x1c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff - -# GFX11: v_max3_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x1c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -0x05,0x04,0x1c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff - -# GFX11: v_max3_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x1c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -0x05,0x03,0x1c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff - -# GFX11: v_max3_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x1c,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] -0x05,0x05,0x1c,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 - -# GFX11: v_max3_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x1c,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] -0x05,0x06,0x1c,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 - -# GFX11: v_max3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x1c,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] -0xff,0x87,0x1c,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 - -# GFX11: v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff - -# GFX11: v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff - -# GFX11: v_max3_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff - -# GFX11: v_max3_i32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff - -# GFX11: v_max3_i32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -0x05,0x00,0x1d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff - -# GFX11: v_max3_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -0x05,0x00,0x1d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff - -# GFX11: v_max3_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -0x05,0x00,0x1d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff - -# GFX11: v_max3_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] -0x05,0x00,0x1d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff - -# GFX11: v_max3_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] -0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff - -# GFX11: v_max3_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] -0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff - -# GFX11: v_max3_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] -0x05,0x00,0x1d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff - -# GFX11: v_max3_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] -0x05,0x00,0x1d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 - -# GFX11: v_max3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] -0x05,0x00,0x1d,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 - -# GFX11: v_max3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x1d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 - -# GFX11: v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff - -# GFX11: v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff - -# GFX11: v_max3_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff - -# GFX11: v_max3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff - -# GFX11: v_max3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -0x05,0x00,0x1e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff - -# GFX11: v_max3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -0x05,0x00,0x1e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff - -# GFX11: v_max3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -0x05,0x00,0x1e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff - -# GFX11: v_max3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] -0x05,0x00,0x1e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff - -# GFX11: v_max3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] -0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff - -# GFX11: v_max3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] -0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff - -# GFX11: v_max3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] -0x05,0x00,0x1e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff - -# GFX11: v_max3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] -0x05,0x00,0x1e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 - -# GFX11: v_max3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] -0x05,0x00,0x1e,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 - -# GFX11: v_max3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x1e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 - -# GFX11: v_max_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_max_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_max_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_max_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_max_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_max_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_max_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_max_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_max_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_max_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_max_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_max_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x39,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] -0x05,0x01,0x39,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 - -# GFX11: v_max_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x39,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] -0x05,0x02,0x39,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 - -# GFX11: v_max_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x39,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] -0xff,0x83,0x39,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 - -# GFX11: v_max_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_max_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_max_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_max_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_max_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_max_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_max_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_max_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_max_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_max_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_max_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_max_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x10,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] -0x05,0x01,0x10,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 - -# GFX11: v_max_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x10,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] -0x05,0x02,0x10,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 - -# GFX11: v_max_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x10,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] -0xff,0x83,0x10,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 - -# GFX11: v_max_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_max_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_max_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_max_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_max_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_max_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_max_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_max_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_max_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_max_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_max_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_max_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_max_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_max_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x0a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - -# GFX11: v_max_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_max_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_max_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x12,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x12,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - -# GFX11: v_max_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_max_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_max_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_max_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_max_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_max_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_max_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_max_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_max_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_max_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_max_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_max_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_max_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_max_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x09,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x09,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - -# GFX11: v_max_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_max_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_max_u32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x14,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x14,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - -# GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff - -# GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff - -# GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff - -# GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -0x05,0x00,0x60,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff - -# GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -0x05,0x00,0x60,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff - -# GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -0x05,0x00,0x60,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff - -# GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -0x05,0x00,0x60,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff - -# GFX11: v_maxmin_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x60,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -0x05,0x01,0x60,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff - -# GFX11: v_maxmin_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x60,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -0x05,0x02,0x60,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff - -# GFX11: v_maxmin_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x60,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -0x05,0x04,0x60,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff - -# GFX11: v_maxmin_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x60,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -0x05,0x03,0x60,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff - -# GFX11: v_maxmin_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x60,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] -0x05,0x05,0x60,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 - -# GFX11: v_maxmin_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x60,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] -0x05,0x06,0x60,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 - -# GFX11: v_maxmin_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x60,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] -0xff,0x87,0x60,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 - -# GFX11: v_maxmin_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -0x05,0x00,0x5e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff - -# GFX11: v_maxmin_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -0x05,0x00,0x5e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff - -# GFX11: v_maxmin_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -0x05,0x00,0x5e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff - -# GFX11: v_maxmin_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -0x05,0x00,0x5e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff - -# GFX11: v_maxmin_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -0x05,0x00,0x5e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff - -# GFX11: v_maxmin_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -0x05,0x00,0x5e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff - -# GFX11: v_maxmin_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -0x05,0x00,0x5e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff - -# GFX11: v_maxmin_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x5e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -0x05,0x01,0x5e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff - -# GFX11: v_maxmin_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x5e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -0x05,0x02,0x5e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff - -# GFX11: v_maxmin_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x5e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -0x05,0x04,0x5e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff - -# GFX11: v_maxmin_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x5e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -0x05,0x03,0x5e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff - -# GFX11: v_maxmin_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x5e,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] -0x05,0x05,0x5e,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 - -# GFX11: v_maxmin_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x5e,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] -0x05,0x06,0x5e,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 - -# GFX11: v_maxmin_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x5e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] -0xff,0x87,0x5e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 - -# GFX11: v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff - -# GFX11: v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff - -# GFX11: v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff - -# GFX11: v_maxmin_i32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -0x05,0x00,0x64,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff - -# GFX11: v_maxmin_i32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -0x05,0x00,0x64,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff - -# GFX11: v_maxmin_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -0x05,0x00,0x64,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff - -# GFX11: v_maxmin_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -0x05,0x00,0x64,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff - -# GFX11: v_maxmin_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] -0x05,0x00,0x64,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff - -# GFX11: v_maxmin_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] -0x05,0x00,0x64,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff - -# GFX11: v_maxmin_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] -0x05,0x00,0x64,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff - -# GFX11: v_maxmin_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] -0x05,0x00,0x64,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff - -# GFX11: v_maxmin_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] -0x05,0x00,0x64,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 - -# GFX11: v_maxmin_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] -0x05,0x00,0x64,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 - -# GFX11: v_maxmin_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x64,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 - -# GFX11: v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff - -# GFX11: v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff - -# GFX11: v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff - -# GFX11: v_maxmin_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -0x05,0x00,0x62,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff - -# GFX11: v_maxmin_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -0x05,0x00,0x62,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff - -# GFX11: v_maxmin_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -0x05,0x00,0x62,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff - -# GFX11: v_maxmin_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -0x05,0x00,0x62,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff - -# GFX11: v_maxmin_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] -0x05,0x00,0x62,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff - -# GFX11: v_maxmin_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] -0x05,0x00,0x62,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff - -# GFX11: v_maxmin_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] -0x05,0x00,0x62,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff - -# GFX11: v_maxmin_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] -0x05,0x00,0x62,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff - -# GFX11: v_maxmin_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] -0x05,0x00,0x62,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 - -# GFX11: v_maxmin_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] -0x05,0x00,0x62,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 - -# GFX11: v_maxmin_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x62,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x62,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 - -# GFX11: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_mbcnt_hi_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x20,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x20,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - -# GFX11: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_mbcnt_lo_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1f,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x1f,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - -# GFX11: v_med3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -0x05,0x00,0x1f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff - -# GFX11: v_med3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -0x05,0x00,0x1f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff - -# GFX11: v_med3_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -0x05,0x00,0x1f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff - -# GFX11: v_med3_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -0x05,0x00,0x1f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff - -# GFX11: v_med3_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -0x05,0x00,0x1f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff - -# GFX11: v_med3_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -0x05,0x00,0x1f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff - -# GFX11: v_med3_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -0x05,0x00,0x1f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff - -# GFX11: v_med3_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x1f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -0x05,0x01,0x1f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff - -# GFX11: v_med3_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x1f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -0x05,0x02,0x1f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff - -# GFX11: v_med3_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x1f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -0x05,0x04,0x1f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff - -# GFX11: v_med3_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x1f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -0x05,0x03,0x1f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff - -# GFX11: v_med3_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x1f,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] -0x05,0x05,0x1f,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 - -# GFX11: v_med3_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x1f,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] -0x05,0x06,0x1f,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 - -# GFX11: v_med3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x1f,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] -0xff,0x87,0x1f,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 - -# GFX11: v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff - -# GFX11: v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff - -# GFX11: v_med3_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff - -# GFX11: v_med3_i32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -0x05,0x00,0x20,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff - -# GFX11: v_med3_i32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -0x05,0x00,0x20,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff - -# GFX11: v_med3_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -0x05,0x00,0x20,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff - -# GFX11: v_med3_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -0x05,0x00,0x20,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff - -# GFX11: v_med3_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] -0x05,0x00,0x20,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff - -# GFX11: v_med3_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] -0x05,0x00,0x20,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff - -# GFX11: v_med3_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] -0x05,0x00,0x20,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff - -# GFX11: v_med3_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] -0x05,0x00,0x20,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff - -# GFX11: v_med3_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] -0x05,0x00,0x20,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 - -# GFX11: v_med3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] -0x05,0x00,0x20,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 - -# GFX11: v_med3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x20,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x20,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 - -# GFX11: v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff - -# GFX11: v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff - -# GFX11: v_med3_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff - -# GFX11: v_med3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -0x05,0x00,0x21,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff - -# GFX11: v_med3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -0x05,0x00,0x21,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff - -# GFX11: v_med3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -0x05,0x00,0x21,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff - -# GFX11: v_med3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -0x05,0x00,0x21,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff - -# GFX11: v_med3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] -0x05,0x00,0x21,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff - -# GFX11: v_med3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] -0x05,0x00,0x21,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff - -# GFX11: v_med3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] -0x05,0x00,0x21,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff - -# GFX11: v_med3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] -0x05,0x00,0x21,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff - -# GFX11: v_med3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] -0x05,0x00,0x21,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 - -# GFX11: v_med3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] -0x05,0x00,0x21,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 - -# GFX11: v_med3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x21,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x21,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 - -# GFX11: v_min3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -0x05,0x00,0x19,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff - -# GFX11: v_min3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -0x05,0x00,0x19,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff - -# GFX11: v_min3_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -0x05,0x00,0x19,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff - -# GFX11: v_min3_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -0x05,0x00,0x19,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff - -# GFX11: v_min3_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -0x05,0x00,0x19,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff - -# GFX11: v_min3_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -0x05,0x00,0x19,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff - -# GFX11: v_min3_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -0x05,0x00,0x19,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff - -# GFX11: v_min3_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x19,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -0x05,0x01,0x19,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff - -# GFX11: v_min3_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x19,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -0x05,0x02,0x19,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff - -# GFX11: v_min3_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x19,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -0x05,0x04,0x19,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff - -# GFX11: v_min3_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x19,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -0x05,0x03,0x19,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff - -# GFX11: v_min3_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x19,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] -0x05,0x05,0x19,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 - -# GFX11: v_min3_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x19,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] -0x05,0x06,0x19,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 - -# GFX11: v_min3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x19,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] -0xff,0x87,0x19,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 - -# GFX11: v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff - -# GFX11: v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff - -# GFX11: v_min3_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff - -# GFX11: v_min3_i32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff - -# GFX11: v_min3_i32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -0x05,0x00,0x1a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff - -# GFX11: v_min3_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -0x05,0x00,0x1a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff - -# GFX11: v_min3_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -0x05,0x00,0x1a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff - -# GFX11: v_min3_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] -0x05,0x00,0x1a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff - -# GFX11: v_min3_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] -0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff - -# GFX11: v_min3_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] -0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff - -# GFX11: v_min3_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] -0x05,0x00,0x1a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff - -# GFX11: v_min3_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] -0x05,0x00,0x1a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 - -# GFX11: v_min3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] -0x05,0x00,0x1a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 - -# GFX11: v_min3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x1a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 - -# GFX11: v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff - -# GFX11: v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff - -# GFX11: v_min3_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff - -# GFX11: v_min3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff - -# GFX11: v_min3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -0x05,0x00,0x1b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff - -# GFX11: v_min3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -0x05,0x00,0x1b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff - -# GFX11: v_min3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -0x05,0x00,0x1b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff - -# GFX11: v_min3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] -0x05,0x00,0x1b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff - -# GFX11: v_min3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] -0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff - -# GFX11: v_min3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] -0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff - -# GFX11: v_min3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] -0x05,0x00,0x1b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff - -# GFX11: v_min3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] -0x05,0x00,0x1b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 - -# GFX11: v_min3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] -0x05,0x00,0x1b,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 - -# GFX11: v_min3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x1b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 - -# GFX11: v_min_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_min_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_min_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_min_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_min_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_min_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_min_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_min_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_min_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_min_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_min_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_min_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x3a,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] -0x05,0x01,0x3a,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 - -# GFX11: v_min_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x3a,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] -0x05,0x02,0x3a,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 - -# GFX11: v_min_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x3a,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] -0xff,0x83,0x3a,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 - -# GFX11: v_min_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_min_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_min_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_min_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_min_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_min_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_min_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_min_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_min_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_min_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_min_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_min_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x0f,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] -0x05,0x01,0x0f,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 - -# GFX11: v_min_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x0f,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] -0x05,0x02,0x0f,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 - -# GFX11: v_min_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x0f,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] -0xff,0x83,0x0f,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 - -# GFX11: v_min_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_min_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_min_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_min_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_min_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_min_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_min_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_min_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_min_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_min_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_min_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_min_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_min_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_min_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0c,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x0c,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - -# GFX11: v_min_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_min_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_min_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x11,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x11,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - -# GFX11: v_min_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_min_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_min_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_min_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_min_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_min_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_min_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_min_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_min_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_min_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_min_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_min_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_min_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_min_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x0b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - -# GFX11: v_min_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_min_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_min_u32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x13,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x13,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - -# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff - -# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff - -# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff - -# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -0x05,0x00,0x61,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff - -# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -0x05,0x00,0x61,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff - -# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -0x05,0x00,0x61,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff - -# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -0x05,0x00,0x61,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff - -# GFX11: v_minmax_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x61,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -0x05,0x01,0x61,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff - -# GFX11: v_minmax_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x61,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -0x05,0x02,0x61,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff - -# GFX11: v_minmax_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x61,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -0x05,0x04,0x61,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff - -# GFX11: v_minmax_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x61,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -0x05,0x03,0x61,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff - -# GFX11: v_minmax_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x61,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] -0x05,0x05,0x61,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 - -# GFX11: v_minmax_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x61,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] -0x05,0x06,0x61,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 - -# GFX11: v_minmax_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x61,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] -0xff,0x87,0x61,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 - -# GFX11: v_minmax_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -0x05,0x00,0x5f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff - -# GFX11: v_minmax_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -0x05,0x00,0x5f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff - -# GFX11: v_minmax_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -0x05,0x00,0x5f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff - -# GFX11: v_minmax_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -0x05,0x00,0x5f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff - -# GFX11: v_minmax_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -0x05,0x00,0x5f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff - -# GFX11: v_minmax_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -0x05,0x00,0x5f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff - -# GFX11: v_minmax_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -0x05,0x00,0x5f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff - -# GFX11: v_minmax_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x5f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -0x05,0x01,0x5f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff - -# GFX11: v_minmax_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x5f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -0x05,0x02,0x5f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff - -# GFX11: v_minmax_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x5f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -0x05,0x04,0x5f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff - -# GFX11: v_minmax_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x5f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -0x05,0x03,0x5f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff - -# GFX11: v_minmax_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x5f,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] -0x05,0x05,0x5f,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 - -# GFX11: v_minmax_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x5f,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] -0x05,0x06,0x5f,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 - -# GFX11: v_minmax_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x5f,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] -0xff,0x87,0x5f,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 - -# GFX11: v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff - -# GFX11: v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff - -# GFX11: v_minmax_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff - -# GFX11: v_minmax_i32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -0x05,0x00,0x65,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff - -# GFX11: v_minmax_i32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -0x05,0x00,0x65,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff - -# GFX11: v_minmax_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -0x05,0x00,0x65,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff - -# GFX11: v_minmax_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -0x05,0x00,0x65,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff - -# GFX11: v_minmax_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] -0x05,0x00,0x65,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff - -# GFX11: v_minmax_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] -0x05,0x00,0x65,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff - -# GFX11: v_minmax_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] -0x05,0x00,0x65,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff - -# GFX11: v_minmax_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] -0x05,0x00,0x65,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff - -# GFX11: v_minmax_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] -0x05,0x00,0x65,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 - -# GFX11: v_minmax_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] -0x05,0x00,0x65,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 - -# GFX11: v_minmax_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x65,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x65,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 - -# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff - -# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff - -# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff - -# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff - -# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -0x05,0x00,0x63,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff - -# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -0x05,0x00,0x63,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff - -# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -0x05,0x00,0x63,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff - -# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] -0x05,0x00,0x63,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff - -# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] -0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff - -# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] -0x05,0x00,0x63,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff - -# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] -0x05,0x00,0x63,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff - -# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] -0x05,0x00,0x63,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 - -# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] -0x05,0x00,0x63,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 - -# GFX11: v_minmax_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x63,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x63,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 - -# GFX11: v_mov_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_mov_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_mov_b32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_mov_b32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_mov_b32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_mov_b32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_mov_b32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_mov_b32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_mov_b32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_mov_b32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_mov_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_mov_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_mov_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_mov_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 - -# GFX11: v_movreld_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_movreld_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_movreld_b32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_movreld_b32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_movreld_b32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_movreld_b32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_movreld_b32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_movreld_b32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_movreld_b32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_movreld_b32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_movreld_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_movreld_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_movreld_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_movreld_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 +# GFX11: v_max3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x1e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_movrels_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_max_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_movrels_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_max_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_movrels_b32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX11: v_max_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX11: v_movrels_b32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX11: v_max_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX11: v_movrels_b32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX11: v_max_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX11: v_movrels_b32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_max_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_movrels_b32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_max_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX11: v_movrels_b32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_max_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_movrels_b32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_max_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX11: v_movrels_b32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_max_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_movrels_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_max_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX11: v_movrels_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 +# GFX11: v_max_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX11: v_movrels_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 +# GFX11: v_max_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX11: v_movrels_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 +# GFX11: v_max_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x0a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_max_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_max_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX11: v_max_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX11: v_max_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX11: v_max_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_max_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_max_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_max_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_max_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_max_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_max_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 +# GFX11: v_max_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 +# GFX11: v_max_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX11: v_movrelsd_2_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 +# GFX11: v_max_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x09,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x09,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_movrelsd_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_movrelsd_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x60,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x60,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x60,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x60,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_maxmin_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x60,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +0x05,0x01,0x60,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff -# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_maxmin_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x60,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +0x05,0x02,0x60,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_maxmin_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x60,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +0x05,0x04,0x60,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_maxmin_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x60,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +0x05,0x03,0x60,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 +# GFX11: v_maxmin_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x60,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +0x05,0x05,0x60,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 -# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 +# GFX11: v_maxmin_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x60,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] +0x05,0x06,0x60,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 -# GFX11: v_movrelsd_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 +# GFX11: v_maxmin_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x60,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] +0xff,0x87,0x60,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 -# GFX11: v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX11: v_maxmin_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x5e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX11: v_maxmin_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x5e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_msad_u8_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX11: v_maxmin_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x5e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX11: v_msad_u8_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -0x05,0x00,0x39,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX11: v_maxmin_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x5e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX11: v_msad_u8_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -0x05,0x00,0x39,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX11: v_maxmin_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x5e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_msad_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -0x05,0x00,0x39,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX11: v_maxmin_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x5e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_msad_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -0x05,0x00,0x39,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX11: v_maxmin_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x5e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_msad_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] -0x05,0x00,0x39,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff +# GFX11: v_maxmin_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x5e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +0x05,0x01,0x5e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff -# GFX11: v_msad_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] -0x05,0x00,0x39,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff +# GFX11: v_maxmin_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x5e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +0x05,0x02,0x5e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# GFX11: v_msad_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] -0x05,0x00,0x39,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff +# GFX11: v_maxmin_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x5e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +0x05,0x04,0x5e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# GFX11: v_msad_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] -0x05,0x00,0x39,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff +# GFX11: v_maxmin_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x5e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +0x05,0x03,0x5e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# GFX11: v_msad_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] -0x05,0x00,0x39,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 +# GFX11: v_maxmin_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x5e,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +0x05,0x05,0x5e,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 -# GFX11: v_msad_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] -0x05,0x00,0x39,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 +# GFX11: v_maxmin_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x5e,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] +0x05,0x06,0x5e,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 -# GFX11: v_msad_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x39,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -0xff,0x80,0x39,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX11: v_maxmin_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x5e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] +0xff,0x87,0x5e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 -# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX11: v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX11: v_maxmin_i32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x64,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX11: v_maxmin_i32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x64,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_maxmin_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x64,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX11: v_maxmin_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x64,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_maxmin_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +0x05,0x00,0x64,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX11: v_maxmin_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +0x05,0x00,0x64,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_maxmin_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +0x05,0x00,0x64,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX11: v_maxmin_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +0x05,0x00,0x64,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x07,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] -0x05,0x01,0x07,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 +# GFX11: v_maxmin_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +0x05,0x00,0x64,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x07,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] -0x05,0x02,0x07,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 +# GFX11: v_maxmin_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] +0x05,0x00,0x64,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX11: v_mul_dx9_zero_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x07,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] -0xff,0x83,0x07,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 +# GFX11: v_maxmin_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x64,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_mul_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_mul_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_mul_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX11: v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX11: v_mul_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX11: v_maxmin_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x62,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX11: v_mul_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX11: v_maxmin_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x62,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_mul_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_maxmin_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x62,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_mul_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX11: v_maxmin_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x62,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_mul_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_maxmin_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +0x05,0x00,0x62,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX11: v_mul_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX11: v_maxmin_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +0x05,0x00,0x62,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX11: v_mul_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_maxmin_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +0x05,0x00,0x62,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX11: v_mul_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX11: v_maxmin_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +0x05,0x00,0x62,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX11: v_mul_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x35,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] -0x05,0x01,0x35,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 +# GFX11: v_maxmin_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +0x05,0x00,0x62,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX11: v_mul_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x35,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] -0x05,0x02,0x35,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 +# GFX11: v_maxmin_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] +0x05,0x00,0x62,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX11: v_mul_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x35,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] -0xff,0x83,0x35,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 +# GFX11: v_maxmin_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x62,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x62,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_mul_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_mul_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_mul_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX11: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX11: v_mul_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX11: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX11: v_mul_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX11: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX11: v_mul_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_mul_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX11: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX11: v_mul_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_mul_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX11: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX11: v_mul_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_mul_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX11: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX11: v_mul_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x08,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] -0x05,0x01,0x08,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 +# GFX11: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX11: v_mul_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x08,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] -0x05,0x02,0x08,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 +# GFX11: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX11: v_mul_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x08,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] -0xff,0x83,0x08,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 +# GFX11: v_mbcnt_hi_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x20,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x20,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX11: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX11: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX11: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX11: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX11: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX11: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX11: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX11: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX11: v_mul_hi_i32_i24_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0a,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x0a,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX11: v_mbcnt_lo_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1f,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x1f,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_med3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x1f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_med3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x1f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX11: v_med3_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x1f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX11: v_med3_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x1f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX11: v_med3_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x1f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_med3_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x1f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX11: v_med3_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x1f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_med3_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x1f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +0x05,0x01,0x1f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff -# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX11: v_med3_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x1f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +0x05,0x02,0x1f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_med3_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x1f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +0x05,0x04,0x1f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX11: v_med3_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x1f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +0x05,0x03,0x1f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX11: v_med3_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x1f,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +0x05,0x05,0x1f,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 -# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX11: v_med3_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x1f,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] +0x05,0x06,0x1f,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 -# GFX11: v_mul_hi_u32_u24_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0c,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x0c,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX11: v_med3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x1f,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] +0xff,0x87,0x1f,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 -# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX11: v_med3_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX11: v_med3_i32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x20,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX11: v_med3_i32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x20,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_med3_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x20,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX11: v_med3_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x20,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_med3_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +0x05,0x00,0x20,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX11: v_med3_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +0x05,0x00,0x20,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_med3_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +0x05,0x00,0x20,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX11: v_med3_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +0x05,0x00,0x20,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX11: v_med3_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +0x05,0x00,0x20,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX11: v_med3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] +0x05,0x00,0x20,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX11: v_mul_i32_i24_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x09,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x80,0x09,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX11: v_med3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x20,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x20,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_mul_lo_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_mul_lo_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_mul_lo_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX11: v_med3_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX11: v_mul_lo_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX11: v_med3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x21,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX11: v_mul_lo_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX11: v_med3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x21,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_mul_lo_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_med3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x21,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_mul_lo_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX11: v_med3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x21,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_mul_lo_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_med3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +0x05,0x00,0x21,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX11: v_mul_lo_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX11: v_med3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +0x05,0x00,0x21,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX11: v_mul_lo_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_med3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +0x05,0x00,0x21,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX11: v_mul_lo_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX11: v_med3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +0x05,0x00,0x21,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX11: v_mul_lo_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX11: v_med3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +0x05,0x00,0x21,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX11: v_mul_lo_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX11: v_med3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] +0x05,0x00,0x21,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX11: v_mul_lo_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x05,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x05,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX11: v_med3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x21,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x21,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_min3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x19,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_min3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x19,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX11: v_min3_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x19,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX11: v_min3_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x19,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX11: v_min3_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x19,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_min3_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x19,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX11: v_min3_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x19,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_min3_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x19,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +0x05,0x01,0x19,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff -# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX11: v_min3_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x19,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +0x05,0x02,0x19,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_min3_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x19,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +0x05,0x04,0x19,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX11: v_min3_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x19,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +0x05,0x03,0x19,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX11: v_min3_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x19,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +0x05,0x05,0x19,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 -# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX11: v_min3_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x19,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] +0x05,0x06,0x19,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 -# GFX11: v_mul_u32_u24_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x0b,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x80,0x0b,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX11: v_min3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x19,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] +0xff,0x87,0x19,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 -# GFX11: v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX11: v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX11: v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_mullit_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX11: v_min3_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX11: v_mullit_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -0x05,0x00,0x18,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX11: v_min3_i32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX11: v_mullit_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -0x05,0x00,0x18,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX11: v_min3_i32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x1a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_mullit_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -0x05,0x00,0x18,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX11: v_min3_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x1a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_mullit_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -0x05,0x00,0x18,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX11: v_min3_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x1a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_mullit_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x18,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -0x05,0x01,0x18,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff +# GFX11: v_min3_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +0x05,0x00,0x1a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX11: v_mullit_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x18,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -0x05,0x02,0x18,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff +# GFX11: v_min3_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX11: v_mullit_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x18,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -0x05,0x04,0x18,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff +# GFX11: v_min3_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX11: v_mullit_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x18,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -0x05,0x03,0x18,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff +# GFX11: v_min3_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +0x05,0x00,0x1a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX11: v_mullit_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x18,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] -0x05,0x05,0x18,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 +# GFX11: v_min3_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +0x05,0x00,0x1a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX11: v_mullit_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x18,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] -0x05,0x06,0x18,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 +# GFX11: v_min3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] +0x05,0x00,0x1a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX11: v_mullit_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x18,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] -0xff,0x87,0x18,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 +# GFX11: v_min3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x1a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_not_b16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_not_b16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_not_b16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX11: v_min3_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX11: v_not_b16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX11: v_min3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX11: v_not_b16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX11: v_min3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x1b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_not_b16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_min3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x1b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_not_b16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_min3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x1b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_not_b16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_min3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +0x05,0x00,0x1b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX11: v_not_b16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_min3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX11: v_not_b16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_min3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX11: v_not_b16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_min3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +0x05,0x00,0x1b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX11: v_not_b16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 +# GFX11: v_min3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +0x05,0x00,0x1b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX11: v_not_b16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 +# GFX11: v_min3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] +0x05,0x00,0x1b,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX11: v_not_b16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 +# GFX11: v_min3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x1b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_not_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_min_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_not_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_min_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_not_b32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX11: v_min_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX11: v_not_b32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX11: v_min_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX11: v_not_b32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX11: v_min_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX11: v_not_b32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_min_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_not_b32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_min_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX11: v_not_b32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_min_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_not_b32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_min_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX11: v_not_b32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_min_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_not_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_min_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX11: v_not_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 +# GFX11: v_min_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX11: v_not_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 +# GFX11: v_min_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX11: v_not_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 +# GFX11: v_min_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0c,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x0c,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX11: v_min_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX11: v_min_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_or3_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX11: v_min_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX11: v_or3_b32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -0x05,0x00,0x58,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX11: v_min_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX11: v_or3_b32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -0x05,0x00,0x58,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX11: v_min_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX11: v_or3_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -0x05,0x00,0x58,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX11: v_min_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_or3_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -0x05,0x00,0x58,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX11: v_min_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX11: v_or3_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] -0x05,0x00,0x58,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff +# GFX11: v_min_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_or3_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] -0x05,0x00,0x58,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff +# GFX11: v_min_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX11: v_or3_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] -0x05,0x00,0x58,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff +# GFX11: v_min_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_or3_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] -0x05,0x00,0x58,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff +# GFX11: v_min_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX11: v_or3_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] -0x05,0x00,0x58,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 +# GFX11: v_min_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX11: v_or3_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] -0x05,0x00,0x58,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 +# GFX11: v_min_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX11: v_or3_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x58,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x58,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX11: v_min_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x0b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_or_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_or_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x61,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x61,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x61,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x61,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_minmax_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x61,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +0x05,0x01,0x61,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff -# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX11: v_minmax_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x61,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +0x05,0x02,0x61,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_minmax_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x61,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +0x05,0x04,0x61,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX11: v_minmax_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x61,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +0x05,0x03,0x61,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX11: v_minmax_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x61,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +0x05,0x05,0x61,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 -# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX11: v_minmax_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x61,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] +0x05,0x06,0x61,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 -# GFX11: v_or_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX11: v_minmax_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x61,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] +0xff,0x87,0x61,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 -# GFX11: v_or_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_minmax_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x5f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_or_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_minmax_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x5f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX11: v_minmax_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x5f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX11: v_minmax_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x5f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX11: v_minmax_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x5f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_minmax_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x5f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX11: v_minmax_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x5f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_minmax_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x5f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +0x05,0x01,0x5f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff -# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX11: v_minmax_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x5f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +0x05,0x02,0x5f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_minmax_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x5f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +0x05,0x04,0x5f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX11: v_minmax_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x5f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +0x05,0x03,0x5f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX11: v_minmax_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x5f,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +0x05,0x05,0x5f,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 -# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX11: v_minmax_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x5f,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] +0x05,0x06,0x5f,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 -# GFX11: v_or_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1c,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x1c,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX11: v_minmax_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x5f,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] +0xff,0x87,0x5f,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 -# GFX11: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX11: v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX11: v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_perm_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX11: v_minmax_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX11: v_perm_b32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -0x05,0x00,0x44,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX11: v_minmax_i32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x65,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX11: v_perm_b32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -0x05,0x00,0x44,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX11: v_minmax_i32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x65,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_perm_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -0x05,0x00,0x44,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX11: v_minmax_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x65,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_perm_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -0x05,0x00,0x44,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX11: v_minmax_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x65,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_perm_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] -0x05,0x00,0x44,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff +# GFX11: v_minmax_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +0x05,0x00,0x65,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX11: v_perm_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] -0x05,0x00,0x44,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff +# GFX11: v_minmax_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +0x05,0x00,0x65,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX11: v_perm_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] -0x05,0x00,0x44,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff +# GFX11: v_minmax_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +0x05,0x00,0x65,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX11: v_perm_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] -0x05,0x00,0x44,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff +# GFX11: v_minmax_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +0x05,0x00,0x65,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX11: v_perm_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] -0x05,0x00,0x44,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 +# GFX11: v_minmax_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +0x05,0x00,0x65,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX11: v_perm_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] -0x05,0x00,0x44,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 +# GFX11: v_minmax_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] +0x05,0x00,0x65,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX11: v_perm_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x44,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x44,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX11: v_minmax_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x65,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x65,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_rcp_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_rcp_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_rcp_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX11: v_rcp_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX11: v_rcp_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x63,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_rcp_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x63,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_rcp_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x63,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_rcp_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +0x05,0x00,0x63,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX11: v_rcp_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX11: v_rcp_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +0x05,0x00,0x63,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX11: v_rcp_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +0x05,0x00,0x63,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX11: v_rcp_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 +# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +0x05,0x00,0x63,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX11: v_rcp_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 +# GFX11: v_minmax_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] +0x05,0x00,0x63,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX11: v_rcp_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xd4,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xd4,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 +# GFX11: v_minmax_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x63,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x63,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_rcp_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_rcp_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_rcp_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX11: v_msad_u8_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX11: v_rcp_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX11: v_msad_u8_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x39,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX11: v_rcp_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX11: v_msad_u8_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x39,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_rcp_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_msad_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x39,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_rcp_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_msad_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x39,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_rcp_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_msad_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +0x05,0x00,0x39,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX11: v_rcp_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_msad_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +0x05,0x00,0x39,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX11: v_rcp_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_msad_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +0x05,0x00,0x39,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX11: v_rcp_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_msad_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +0x05,0x00,0x39,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX11: v_rcp_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 +# GFX11: v_msad_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +0x05,0x00,0x39,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX11: v_rcp_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 +# GFX11: v_msad_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] +0x05,0x00,0x39,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX11: v_rcp_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xaa,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xaa,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 +# GFX11: v_msad_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x39,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +0xff,0x80,0x39,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_mul_lo_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_mul_lo_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX11: v_mul_lo_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX11: v_mul_lo_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX11: v_mul_lo_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_mul_lo_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_mul_lo_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_mul_lo_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_mul_lo_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_mul_lo_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_mul_lo_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 +# GFX11: v_mul_lo_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 +# GFX11: v_mul_lo_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX11: v_rcp_iflag_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xab,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xab,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 +# GFX11: v_mul_lo_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x05,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x05,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_rndne_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_rndne_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_rndne_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX11: v_mullit_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX11: v_rndne_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX11: v_mullit_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x18,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX11: v_rndne_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX11: v_mullit_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x18,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_rndne_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_mullit_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x18,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_rndne_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_mullit_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x18,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_rndne_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_mullit_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x18,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +0x05,0x01,0x18,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff -# GFX11: v_rndne_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_mullit_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x18,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +0x05,0x02,0x18,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# GFX11: v_rndne_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_mullit_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x18,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +0x05,0x04,0x18,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# GFX11: v_rndne_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_mullit_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x18,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +0x05,0x03,0x18,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# GFX11: v_rndne_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 +# GFX11: v_mullit_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x18,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +0x05,0x05,0x18,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 -# GFX11: v_rndne_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 +# GFX11: v_mullit_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x18,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] +0x05,0x06,0x18,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 -# GFX11: v_rndne_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xde,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xde,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 +# GFX11: v_mullit_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x18,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] +0xff,0x87,0x18,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 -# GFX11: v_rndne_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_rndne_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_rndne_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX11: v_or3_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX11: v_rndne_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX11: v_or3_b32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x58,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX11: v_rndne_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX11: v_or3_b32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x58,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_rndne_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_or3_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x58,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_rndne_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_or3_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x58,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_rndne_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_or3_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +0x05,0x00,0x58,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX11: v_rndne_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_or3_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +0x05,0x00,0x58,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX11: v_rndne_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_or3_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +0x05,0x00,0x58,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX11: v_rndne_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_or3_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +0x05,0x00,0x58,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX11: v_rndne_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 +# GFX11: v_or3_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +0x05,0x00,0x58,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX11: v_rndne_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 +# GFX11: v_or3_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] +0x05,0x00,0x58,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX11: v_rndne_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xa3,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xa3,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 +# GFX11: v_or3_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x58,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x58,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_rsq_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_or_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_rsq_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_or_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_rsq_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX11: v_rsq_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX11: v_rsq_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX11: v_rsq_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_rsq_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX11: v_rsq_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_rsq_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX11: v_rsq_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_rsq_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX11: v_rsq_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 +# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX11: v_rsq_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 +# GFX11: v_or_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX11: v_rsq_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xd6,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xd6,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 +# GFX11: v_or_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_rsq_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_rsq_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_rsq_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX11: v_perm_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX11: v_rsq_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX11: v_perm_b32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x44,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX11: v_rsq_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX11: v_perm_b32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x44,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_rsq_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_perm_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x44,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_rsq_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_perm_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x44,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_rsq_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_perm_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +0x05,0x00,0x44,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX11: v_rsq_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_perm_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +0x05,0x00,0x44,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX11: v_rsq_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_perm_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +0x05,0x00,0x44,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX11: v_rsq_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_perm_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +0x05,0x00,0x44,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX11: v_rsq_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 +# GFX11: v_perm_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +0x05,0x00,0x44,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX11: v_rsq_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 +# GFX11: v_perm_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] +0x05,0x00,0x44,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX11: v_rsq_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xae,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xae,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 +# GFX11: v_perm_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x44,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x44,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 # GFX11: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff @@ -6254,381 +2778,116 @@ # GFX11: v_sad_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] 0x05,0x00,0x24,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_sad_u16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -0x05,0x00,0x24,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff - -# GFX11: v_sad_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -0x05,0x00,0x24,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff - -# GFX11: v_sad_u16_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] -0x05,0x00,0x24,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff - -# GFX11: v_sad_u16_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] -0x05,0x00,0x24,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff - -# GFX11: v_sad_u16_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] -0x05,0x00,0x24,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff - -# GFX11: v_sad_u16_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] -0x05,0x00,0x24,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff - -# GFX11: v_sad_u16_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] -0x05,0x00,0x24,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 - -# GFX11: v_sad_u16_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] -0x05,0x00,0x24,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 - -# GFX11: v_sad_u16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x24,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -0xff,0x80,0x24,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 - -# GFX11: v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff - -# GFX11: v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff - -# GFX11: v_sad_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff - -# GFX11: v_sad_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -0x05,0x00,0x25,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff - -# GFX11: v_sad_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -0x05,0x00,0x25,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff - -# GFX11: v_sad_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -0x05,0x00,0x25,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff - -# GFX11: v_sad_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -0x05,0x00,0x25,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff - -# GFX11: v_sad_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] -0x05,0x00,0x25,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff - -# GFX11: v_sad_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] -0x05,0x00,0x25,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff - -# GFX11: v_sad_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] -0x05,0x00,0x25,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff - -# GFX11: v_sad_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] -0x05,0x00,0x25,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff - -# GFX11: v_sad_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] -0x05,0x00,0x25,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 - -# GFX11: v_sad_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] -0x05,0x00,0x25,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 - -# GFX11: v_sad_u32_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x25,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -0xff,0x80,0x25,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 - -# GFX11: v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff - -# GFX11: v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff - -# GFX11: v_sad_u8_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff - -# GFX11: v_sad_u8_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -0x05,0x00,0x22,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff - -# GFX11: v_sad_u8_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -0x05,0x00,0x22,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff - -# GFX11: v_sad_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -0x05,0x00,0x22,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff - -# GFX11: v_sad_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -0x05,0x00,0x22,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff - -# GFX11: v_sad_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] -0x05,0x00,0x22,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff - -# GFX11: v_sad_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] -0x05,0x00,0x22,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff - -# GFX11: v_sad_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] -0x05,0x00,0x22,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff - -# GFX11: v_sad_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] -0x05,0x00,0x22,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff - -# GFX11: v_sad_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] -0x05,0x00,0x22,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 - -# GFX11: v_sad_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] -0x05,0x00,0x22,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 - -# GFX11: v_sad_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x22,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -0xff,0x80,0x22,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 - -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_sat_pk_u8_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 - -# GFX11: v_sin_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_sin_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_sin_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_sin_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_sin_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_sin_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_sin_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_sin_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_sin_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_sin_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_sin_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_sin_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_sin_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_sin_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xe0,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xe0,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_sin_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_sin_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_sin_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_sin_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_sin_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_sin_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_sin_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_sin_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_sin_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_sin_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_sin_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_sin_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_sin_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_sin_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xb5,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xb5,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_sqrt_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_sqrt_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_sqrt_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_sqrt_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_sqrt_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_sqrt_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_sad_u16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x24,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_sqrt_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_sad_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x24,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_sqrt_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_sad_u16_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +0x05,0x00,0x24,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX11: v_sqrt_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_sad_u16_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +0x05,0x00,0x24,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX11: v_sqrt_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_sad_u16_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +0x05,0x00,0x24,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX11: v_sqrt_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_sad_u16_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +0x05,0x00,0x24,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX11: v_sqrt_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 +# GFX11: v_sad_u16_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +0x05,0x00,0x24,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX11: v_sqrt_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 +# GFX11: v_sad_u16_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] +0x05,0x00,0x24,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX11: v_sqrt_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xd5,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xd5,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 +# GFX11: v_sad_u16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x24,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +0xff,0x80,0x24,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_sqrt_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_sqrt_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_sqrt_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff +# GFX11: v_sad_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX11: v_sqrt_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff +# GFX11: v_sad_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x25,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX11: v_sqrt_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff +# GFX11: v_sad_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x25,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_sqrt_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_sad_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x25,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_sqrt_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff +# GFX11: v_sad_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x25,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_sqrt_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_sad_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +0x05,0x00,0x25,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX11: v_sqrt_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff +# GFX11: v_sad_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +0x05,0x00,0x25,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX11: v_sqrt_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_sad_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +0x05,0x00,0x25,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX11: v_sqrt_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff +# GFX11: v_sad_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +0x05,0x00,0x25,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX11: v_sqrt_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 +# GFX11: v_sad_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +0x05,0x00,0x25,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX11: v_sqrt_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 +# GFX11: v_sad_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] +0x05,0x00,0x25,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX11: v_sqrt_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xb3,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xb3,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 +# GFX11: v_sad_u32_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x25,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +0xff,0x80,0x25,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] -# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] -0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff +# GFX11: v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] -# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] -0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff +# GFX11: v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] -# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] -0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff +# GFX11: v_sad_u8_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] -# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] -0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff +# GFX11: v_sad_u8_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +0x05,0x00,0x22,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] -# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] -0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff +# GFX11: v_sad_u8_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +0x05,0x00,0x22,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] -# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] -0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff +# GFX11: v_sad_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +0x05,0x00,0x22,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] -# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] -0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff +# GFX11: v_sad_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x22,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] -# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] -0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff +# GFX11: v_sad_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +0x05,0x00,0x22,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] -# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] -0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff +# GFX11: v_sad_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +0x05,0x00,0x22,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] -# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] -0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff +# GFX11: v_sad_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +0x05,0x00,0x22,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# W32: v_sub_co_ci_u32_e64_dpp v5, s104, v1, v2, s104 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x21,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] -# W64: v_sub_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x21,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] -0x05,0x68,0x21,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff +# GFX11: v_sad_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +0x05,0x00,0x22,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# W32: v_sub_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x21,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] -# W64: v_sub_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x21,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] -0x05,0x6a,0x21,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01 +# GFX11: v_sad_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +0x05,0x00,0x22,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# W32: v_sub_co_ci_u32_e64_dpp v5, ttmp14, v1, v2, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x7a,0x21,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] -# W64: v_sub_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x7a,0x21,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] -0x05,0x7a,0x21,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13 +# GFX11: v_sad_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] +0x05,0x00,0x22,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX11: v_sub_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xfc,0x21,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] -0xff,0xfc,0x21,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30 +# GFX11: v_sad_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x22,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +0xff,0x80,0x22,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 # W32: v_sub_co_u32_e64_dpp v5, s12, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] # W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -6685,90 +2944,6 @@ # GFX11: v_sub_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xfc,0x01,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0xfc,0x01,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_sub_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_sub_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_sub_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_sub_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_sub_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_sub_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_sub_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_sub_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_sub_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_sub_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_sub_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_sub_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x33,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] -0x05,0x01,0x33,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 - -# GFX11: v_sub_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x33,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] -0x05,0x02,0x33,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 - -# GFX11: v_sub_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x33,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] -0xff,0x83,0x33,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 - -# GFX11: v_sub_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_sub_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_sub_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_sub_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_sub_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_sub_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_sub_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_sub_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_sub_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_sub_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_sub_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_sub_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x04,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] -0x05,0x01,0x04,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 - -# GFX11: v_sub_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x04,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] -0x05,0x02,0x04,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 - -# GFX11: v_sub_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x04,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] -0xff,0x83,0x04,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 - # GFX11: v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff @@ -6811,103 +2986,6 @@ # GFX11: v_sub_nc_i32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x25,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0x80,0x25,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_sub_nc_u32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x26,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x80,0x26,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - -# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] -# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] -0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff - -# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] -# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] -0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff - -# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] -# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] -0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff - -# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] -# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] -0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff - -# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] -# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] -0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff - -# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] -# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] -0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff - -# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] -# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] -0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff - -# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] -# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] -0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff - -# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] -# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] -0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff - -# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] -# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] -0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff - -# W32: v_subrev_co_ci_u32_e64_dpp v5, s104, v1, v2, s104 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x22,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] -# W64: v_subrev_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x22,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] -0x05,0x68,0x22,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff - -# W32: v_subrev_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x22,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] -# W64: v_subrev_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x22,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] -0x05,0x6a,0x22,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01 - -# W32: v_subrev_co_ci_u32_e64_dpp v5, ttmp14, v1, v2, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x7a,0x22,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] -# W64: v_subrev_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x7a,0x22,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] -0x05,0x7a,0x22,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13 - -# GFX11: v_subrev_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xfc,0x22,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] -0xff,0xfc,0x22,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30 - # W32: v_subrev_co_u32_e64_dpp v5, s12, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] # W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff @@ -6963,216 +3041,6 @@ # GFX11: v_subrev_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xfc,0x02,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0xfc,0x02,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_subrev_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x34,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] -0x05,0x01,0x34,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 - -# GFX11: v_subrev_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x34,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] -0x05,0x02,0x34,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 - -# GFX11: v_subrev_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x34,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] -0xff,0x83,0x34,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 - -# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_subrev_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x05,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] -0x05,0x01,0x05,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 - -# GFX11: v_subrev_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x05,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] -0x05,0x02,0x05,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 - -# GFX11: v_subrev_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x05,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] -0xff,0x83,0x05,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 - -# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_subrev_nc_u32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x27,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x80,0x27,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - -# GFX11: v_trunc_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_trunc_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_trunc_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_trunc_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_trunc_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_trunc_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_trunc_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_trunc_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_trunc_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_trunc_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_trunc_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_trunc_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_trunc_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_trunc_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xdd,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xdd,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - -# GFX11: v_trunc_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_trunc_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_trunc_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_trunc_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_trunc_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_trunc_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_trunc_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_trunc_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_trunc_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_trunc_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_trunc_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_trunc_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] -0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 - -# GFX11: v_trunc_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] -0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 - -# GFX11: v_trunc_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xa1,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] -0xff,0x81,0xa1,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 - # GFX11: v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff @@ -7215,48 +3083,6 @@ # GFX11: v_xad_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x45,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x45,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_xnor_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1e,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x1e,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - # GFX11: v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff @@ -7341,48 +3167,6 @@ # GFX11: v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX11: v_xor_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_xor_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 - -# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 - -# GFX11: v_xor_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1d,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -0xff,0x00,0x1d,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 - # GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff @@ -7635,90 +3419,6 @@ # GFX11: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] 0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 -# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_fmac_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x36,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] -0x05,0x01,0x36,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 - -# GFX11: v_fmac_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x36,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] -0x05,0x02,0x36,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 - -# GFX11: v_fmac_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x36,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] -0xff,0x83,0x36,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 - -# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff - -# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff - -# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff - -# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff - -# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff - -# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff - -# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff - -# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff - -# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff - -# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff - -# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff - -# GFX11: v_fmac_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x2b,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] -0x05,0x01,0x2b,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 - -# GFX11: v_fmac_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x2b,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] -0x05,0x02,0x2b,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 - -# GFX11: v_fmac_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x2b,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] -0xff,0x83,0x2b,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 - # GFX11: v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt new file mode 100644 index 0000000000000..19a5c36118337 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt @@ -0,0 +1,2605 @@ +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11 %s + +# GFX11: v_bfrev_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_bfrev_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_bfrev_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_bfrev_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0xb8,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_ceil_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_ceil_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_ceil_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_ceil_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_ceil_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_ceil_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_ceil_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_ceil_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_ceil_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_ceil_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_ceil_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_ceil_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_ceil_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xdc,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_ceil_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xdc,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xdc,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_ceil_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_ceil_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_ceil_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_ceil_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_ceil_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_ceil_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_ceil_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_ceil_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_ceil_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_ceil_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_ceil_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_ceil_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_ceil_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xa2,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_ceil_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xa2,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xa2,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cls_i32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cls_i32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cls_i32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cls_i32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cls_i32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cls_i32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cls_i32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cls_i32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cls_i32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cls_i32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cls_i32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cls_i32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_cls_i32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_cls_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0xbb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_clz_i32_u32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_clz_i32_u32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_clz_i32_u32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_clz_i32_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0xb9,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cos_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cos_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cos_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cos_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cos_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cos_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cos_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cos_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cos_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cos_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cos_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cos_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_cos_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xe1,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_cos_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xe1,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xe1,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cos_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cos_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cos_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cos_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cos_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cos_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cos_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cos_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cos_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cos_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cos_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cos_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_cos_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xb6,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_cos_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xb6,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xb6,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_ctz_i32_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0xba,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0x8a,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_f16_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_f16_i16_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0xd1,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] +0xff,0x80,0xd1,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xd0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_f16_u16_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0xd0,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] +0xff,0x80,0xd0,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0x8b,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_f32_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_f32_i32_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x85,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] +0xff,0x80,0x85,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0x86,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_f32_u32_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x86,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] +0xff,0x80,0x86,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0x91,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_f32_ubyte0_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x91,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] +0xff,0x80,0x91,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0x92,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_f32_ubyte1_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x92,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] +0xff,0x80,0x92,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0x93,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_f32_ubyte2_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x93,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] +0xff,0x80,0x93,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0x94,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_f32_ubyte3_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x94,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] +0xff,0x80,0x94,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x8d,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_floor_i32_f32_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x01,0x8d,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] +0xff,0x01,0x8d,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0xd3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_i16_f16_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xd3,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xd3,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x88,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_i32_f32_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x88,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] +0xff,0x81,0x88,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_i32_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0xea,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x8c,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_nearest_i32_f32_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x01,0x8c,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] +0xff,0x01,0x8c,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0xe3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_norm_i16_f16_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x01,0xe3,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] +0xff,0x01,0xe3,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0xe4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_norm_u16_f16_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x01,0xe4,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] +0xff,0x01,0xe4,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0x8e,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_off_f32_i4_e64_dpp v255, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x8e,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30] +0xff,0x80,0x8e,0xd5,0xfa,0x00,0x00,0x18,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0xd2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_u16_f16_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xd2,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xd2,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x87,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_u32_f32_e64_dpp v255, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x87,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] +0xff,0x81,0x87,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_u32_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0xeb,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_exp_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_exp_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_exp_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_exp_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_exp_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_exp_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_exp_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_exp_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_exp_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_exp_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_exp_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_exp_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_exp_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xd8,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_exp_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xd8,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xd8,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_exp_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_exp_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_exp_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_exp_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_exp_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_exp_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_exp_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_exp_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_exp_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_exp_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_exp_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_exp_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_exp_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xa5,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_exp_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xa5,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xa5,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_floor_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_floor_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_floor_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_floor_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_floor_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_floor_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_floor_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_floor_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_floor_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_floor_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_floor_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_floor_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_floor_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xdb,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_floor_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xdb,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xdb,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_floor_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_floor_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_floor_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_floor_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_floor_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_floor_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_floor_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_floor_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_floor_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_floor_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_floor_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_floor_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_floor_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xa4,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_floor_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xa4,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xa4,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_fract_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_fract_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_fract_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_fract_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_fract_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_fract_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_fract_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_fract_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_fract_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_fract_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_fract_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_fract_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_fract_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xdf,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_fract_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xdf,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xdf,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_fract_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_fract_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_fract_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_fract_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_fract_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_fract_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_fract_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_fract_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_fract_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_fract_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_fract_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_fract_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_fract_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xa0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_fract_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xa0,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xa0,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0xda,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_frexp_exp_i16_f16_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x01,0xda,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] +0xff,0x01,0xda,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 + +# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0xbf,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_frexp_exp_i32_f32_e64_dpp v255, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x01,0xbf,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30] +0xff,0x01,0xbf,0xd5,0xfa,0x00,0x00,0x20,0xff,0x6f,0x0d,0x30 + +# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xd9,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_frexp_mant_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xd9,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xd9,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xc0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_frexp_mant_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xc0,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xc0,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_log_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_log_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_log_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_log_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_log_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_log_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_log_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_log_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_log_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_log_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_log_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_log_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_log_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xd7,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_log_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xd7,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xd7,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_log_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_log_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_log_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_log_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_log_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_log_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_log_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_log_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_log_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_log_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_log_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_log_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_log_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xa7,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_log_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xa7,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xa7,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_mov_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_mov_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_mov_b32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_mov_b32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_mov_b32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_mov_b32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_mov_b32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_mov_b32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_mov_b32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_mov_b32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_mov_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_mov_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_mov_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_mov_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x81,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_movreld_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_movreld_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_movreld_b32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_movreld_b32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_movreld_b32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_movreld_b32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_movreld_b32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_movreld_b32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_movreld_b32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_movreld_b32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_movreld_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_movreld_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_movreld_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_movreld_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0xc2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_movrels_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_movrels_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_movrels_b32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_movrels_b32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_movrels_b32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_movrels_b32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_movrels_b32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_movrels_b32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_movrels_b32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_movrels_b32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_movrels_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_movrels_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_movrels_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_movrels_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0xc3,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_movrelsd_2_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0xc8,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_movrelsd_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_movrelsd_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_movrelsd_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_movrelsd_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0xc4,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_not_b16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_not_b16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_not_b16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_not_b16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_not_b16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_not_b16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_not_b16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_not_b16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_not_b16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_not_b16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_not_b16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_not_b16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_not_b16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_not_b16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0xe9,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_not_b32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_not_b32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_not_b32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_not_b32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_not_b32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_not_b32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_not_b32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_not_b32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_not_b32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_not_b32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_not_b32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_not_b32_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_not_b32_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_not_b32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0xb7,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_rcp_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_rcp_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_rcp_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_rcp_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_rcp_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_rcp_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_rcp_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_rcp_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_rcp_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_rcp_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_rcp_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_rcp_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_rcp_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xd4,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_rcp_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xd4,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xd4,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_rcp_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_rcp_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_rcp_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_rcp_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_rcp_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_rcp_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_rcp_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_rcp_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_rcp_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_rcp_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_rcp_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_rcp_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_rcp_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xaa,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_rcp_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xaa,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xaa,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xab,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_rcp_iflag_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xab,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xab,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_rndne_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_rndne_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_rndne_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_rndne_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_rndne_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_rndne_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_rndne_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_rndne_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_rndne_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_rndne_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_rndne_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_rndne_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_rndne_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xde,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_rndne_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xde,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xde,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_rndne_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_rndne_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_rndne_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_rndne_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_rndne_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_rndne_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_rndne_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_rndne_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_rndne_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_rndne_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_rndne_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_rndne_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_rndne_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xa3,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_rndne_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xa3,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xa3,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_rsq_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_rsq_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_rsq_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_rsq_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_rsq_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_rsq_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_rsq_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_rsq_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_rsq_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_rsq_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_rsq_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_rsq_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_rsq_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xd6,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_rsq_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xd6,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xd6,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_rsq_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_rsq_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_rsq_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_rsq_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_rsq_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_rsq_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_rsq_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_rsq_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_rsq_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_rsq_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_rsq_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_rsq_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_rsq_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xae,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_rsq_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xae,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xae,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_sat_pk_u8_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0xe2,0xd5,0xfa,0x00,0x00,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_sin_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_sin_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_sin_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_sin_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_sin_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_sin_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_sin_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_sin_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_sin_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_sin_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_sin_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_sin_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_sin_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xe0,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_sin_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xe0,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xe0,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_sin_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_sin_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_sin_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_sin_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_sin_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_sin_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_sin_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_sin_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_sin_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_sin_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_sin_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_sin_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_sin_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xb5,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_sin_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xb5,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xb5,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_sqrt_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_sqrt_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_sqrt_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_sqrt_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_sqrt_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_sqrt_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_sqrt_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_sqrt_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_sqrt_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_sqrt_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_sqrt_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_sqrt_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_sqrt_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xd5,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_sqrt_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xd5,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xd5,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_sqrt_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_sqrt_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_sqrt_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_sqrt_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_sqrt_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_sqrt_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_sqrt_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_sqrt_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_sqrt_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_sqrt_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_sqrt_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_sqrt_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_sqrt_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xb3,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_sqrt_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xb3,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xb3,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_trunc_f16_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_trunc_f16_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_trunc_f16_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_trunc_f16_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_trunc_f16_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_trunc_f16_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_trunc_f16_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_trunc_f16_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_trunc_f16_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_trunc_f16_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_trunc_f16_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_trunc_f16_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_trunc_f16_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xdd,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_trunc_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xdd,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xdd,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_trunc_f32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_trunc_f32_e64_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_trunc_f32_e64_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_trunc_f32_e64_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_trunc_f32_e64_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_trunc_f32_e64_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_trunc_f32_e64_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_trunc_f32_e64_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_trunc_f32_e64_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_trunc_f32_e64_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_trunc_f32_e64_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_trunc_f32_e64_dpp v5, v1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_trunc_f32_e64_dpp v5, v1 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0xa1,0xd5,0xfa,0x00,0x00,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_trunc_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0xa1,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0xa1,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop2.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop2.txt new file mode 100644 index 0000000000000..69a7122e43831 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop2.txt @@ -0,0 +1,1698 @@ +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32 %s +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64 %s + +# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff + +# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff + +# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff + +# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff + +# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff + +# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff + +# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff + +# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff + +# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff + +# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +0x05,0x0c,0x20,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff + +# W32: v_add_co_ci_u32_e64_dpp v5, s104, v1, v2, s104 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x20,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +# W64: v_add_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x20,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +0x05,0x68,0x20,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff + +# W32: v_add_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x20,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] +# W64: v_add_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x20,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] +0x05,0x6a,0x20,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01 + +# W32: v_add_co_ci_u32_e64_dpp v5, ttmp14, v1, v2, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x7a,0x20,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] +# W64: v_add_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x7a,0x20,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] +0x05,0x7a,0x20,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13 + +# GFX11: v_add_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xfc,0x20,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] +0xff,0xfc,0x20,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30 + +# GFX11: v_add_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_add_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_add_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_add_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_add_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_add_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_add_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_add_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_add_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_add_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_add_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_add_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x32,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] +0x05,0x01,0x32,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 + +# GFX11: v_add_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x32,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] +0x05,0x02,0x32,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 + +# GFX11: v_add_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x32,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] +0xff,0x83,0x32,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 + +# GFX11: v_add_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_add_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_add_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_add_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_add_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_add_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_add_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_add_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_add_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_add_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_add_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_add_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x03,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] +0x05,0x01,0x03,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 + +# GFX11: v_add_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x03,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] +0x05,0x02,0x03,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 + +# GFX11: v_add_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x03,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] +0xff,0x83,0x03,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 + +# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_add_nc_u32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x25,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x80,0x25,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_and_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_and_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_and_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_and_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1b,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x1b,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_ashrrev_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1a,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x1a,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff + +# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff + +# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff + +# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff + +# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff + +# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff + +# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff + +# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff + +# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff + +# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff + +# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s104 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +0x05,0x00,0x01,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff + +# W32: v_cndmask_b32_e64_dpp v5, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] +# W64: v_cndmask_b32_e64_dpp v5, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] +0x05,0x00,0x01,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01 + +# W32: v_cndmask_b32_e64_dpp v5, v1, v2, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] +# W64: v_cndmask_b32_e64_dpp v5, v1, v2, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] +0x05,0x00,0x01,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13 + +# GFX11: v_cndmask_b32_e64_dpp v255, v255, v255, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x01,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x01,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30 + +# GFX11: v_cndmask_b32_e64_dpp v5, -v1, |v2|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x05,0x02,0x01,0xd5,0xfa,0x04,0xf2,0x21,0x01,0x6f,0x0d,0x30] +0x05,0x02,0x01,0xd5,0xfa,0x04,0xf2,0x21,0x01,0x6f,0x0d,0x30 + +# GFX11: v_cndmask_b32_e64_dpp v5, |v1|, -v2, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x05,0x01,0x01,0xd5,0xfa,0x04,0xf2,0x41,0x01,0x6f,0x0d,0x30] +0x05,0x01,0x01,0xd5,0xfa,0x04,0xf2,0x41,0x01,0x6f,0x0d,0x30 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x2f,0xd5,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +0x05,0x01,0x2f,0xd5,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x2f,0xd5,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +0x05,0x02,0x2f,0xd5,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x2f,0xd5,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +0xff,0x83,0x2f,0xd5,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 + +# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x36,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_fmac_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x36,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] +0x05,0x01,0x36,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 + +# GFX11: v_fmac_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x36,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] +0x05,0x02,0x36,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 + +# GFX11: v_fmac_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x36,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] +0xff,0x83,0x36,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 + +# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x2b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_fmac_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x2b,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] +0x05,0x01,0x2b,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 + +# GFX11: v_fmac_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x2b,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] +0x05,0x02,0x2b,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 + +# GFX11: v_fmac_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x2b,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] +0xff,0x83,0x2b,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 + +# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x08,0x01,0x5f,0x01,0x01] +0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x08,0x01,0x5f,0x01,0x01 + +# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x10,0x01,0x60,0x01,0x13] +0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x10,0x01,0x60,0x01,0x13 + +# GFX11: v_ldexp_f16_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x3b,0xd5,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x0d,0x30] +0xff,0x81,0x3b,0xd5,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x0d,0x30 + +# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_lshlrev_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x18,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x18,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_lshrrev_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x19,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x19,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_max_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_max_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_max_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_max_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_max_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_max_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_max_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_max_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_max_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_max_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_max_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_max_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x39,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] +0x05,0x01,0x39,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 + +# GFX11: v_max_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x39,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] +0x05,0x02,0x39,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 + +# GFX11: v_max_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x39,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] +0xff,0x83,0x39,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 + +# GFX11: v_max_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_max_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_max_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_max_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_max_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_max_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_max_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_max_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_max_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_max_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_max_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x10,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_max_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x10,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] +0x05,0x01,0x10,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 + +# GFX11: v_max_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x10,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] +0x05,0x02,0x10,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 + +# GFX11: v_max_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x10,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] +0xff,0x83,0x10,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 + +# GFX11: v_max_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_max_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_max_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_max_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x12,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x12,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_max_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_max_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_max_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_max_u32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x14,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x14,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_min_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_min_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_min_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_min_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_min_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_min_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_min_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_min_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_min_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_min_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_min_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_min_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x3a,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] +0x05,0x01,0x3a,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 + +# GFX11: v_min_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x3a,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] +0x05,0x02,0x3a,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 + +# GFX11: v_min_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x3a,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] +0xff,0x83,0x3a,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 + +# GFX11: v_min_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_min_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_min_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_min_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_min_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_min_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_min_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_min_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_min_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_min_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_min_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x0f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_min_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x0f,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] +0x05,0x01,0x0f,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 + +# GFX11: v_min_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x0f,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] +0x05,0x02,0x0f,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 + +# GFX11: v_min_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x0f,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] +0xff,0x83,0x0f,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 + +# GFX11: v_min_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_min_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_min_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_min_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x11,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x11,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_min_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_min_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_min_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_min_u32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x13,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x13,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x07,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] +0x05,0x01,0x07,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 + +# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x07,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] +0x05,0x02,0x07,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 + +# GFX11: v_mul_dx9_zero_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x07,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] +0xff,0x83,0x07,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 + +# GFX11: v_mul_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_mul_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_mul_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_mul_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_mul_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_mul_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_mul_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_mul_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_mul_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_mul_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_mul_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_mul_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x35,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] +0x05,0x01,0x35,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 + +# GFX11: v_mul_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x35,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] +0x05,0x02,0x35,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 + +# GFX11: v_mul_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x35,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] +0xff,0x83,0x35,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 + +# GFX11: v_mul_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_mul_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_mul_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_mul_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_mul_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_mul_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_mul_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_mul_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_mul_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_mul_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_mul_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_mul_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x08,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] +0x05,0x01,0x08,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 + +# GFX11: v_mul_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x08,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] +0x05,0x02,0x08,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 + +# GFX11: v_mul_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x08,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] +0xff,0x83,0x08,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 + +# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_mul_hi_i32_i24_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0a,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x0a,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_mul_hi_u32_u24_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0c,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x0c,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_mul_i32_i24_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x09,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x80,0x09,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_mul_u32_u24_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x0b,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x80,0x0b,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_or_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_or_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_or_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_or_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1c,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x1c,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff + +# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff + +# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff + +# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff + +# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff + +# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff + +# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff + +# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff + +# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff + +# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +0x05,0x0c,0x21,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff + +# W32: v_sub_co_ci_u32_e64_dpp v5, s104, v1, v2, s104 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x21,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +# W64: v_sub_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x21,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +0x05,0x68,0x21,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff + +# W32: v_sub_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x21,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] +# W64: v_sub_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x21,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] +0x05,0x6a,0x21,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01 + +# W32: v_sub_co_ci_u32_e64_dpp v5, ttmp14, v1, v2, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x7a,0x21,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] +# W64: v_sub_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x7a,0x21,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] +0x05,0x7a,0x21,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13 + +# GFX11: v_sub_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xfc,0x21,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] +0xff,0xfc,0x21,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30 + +# GFX11: v_sub_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_sub_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_sub_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_sub_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_sub_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_sub_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_sub_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_sub_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_sub_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_sub_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_sub_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_sub_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x33,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] +0x05,0x01,0x33,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 + +# GFX11: v_sub_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x33,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] +0x05,0x02,0x33,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 + +# GFX11: v_sub_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x33,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] +0xff,0x83,0x33,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 + +# GFX11: v_sub_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_sub_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_sub_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_sub_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_sub_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_sub_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_sub_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_sub_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_sub_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_sub_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_sub_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_sub_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x04,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] +0x05,0x01,0x04,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 + +# GFX11: v_sub_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x04,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] +0x05,0x02,0x04,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 + +# GFX11: v_sub_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x04,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] +0xff,0x83,0x04,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 + +# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_sub_nc_u32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x26,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x80,0x26,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff + +# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff + +# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff + +# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff + +# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff + +# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff + +# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff + +# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff + +# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff + +# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +0x05,0x0c,0x22,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff + +# W32: v_subrev_co_ci_u32_e64_dpp v5, s104, v1, v2, s104 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x22,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +# W64: v_subrev_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x22,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +0x05,0x68,0x22,0xd5,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff + +# W32: v_subrev_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x22,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] +# W64: v_subrev_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x22,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] +0x05,0x6a,0x22,0xd5,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01 + +# W32: v_subrev_co_ci_u32_e64_dpp v5, ttmp14, v1, v2, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x7a,0x22,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] +# W64: v_subrev_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x7a,0x22,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] +0x05,0x7a,0x22,0xd5,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13 + +# GFX11: v_subrev_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xfc,0x22,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] +0xff,0xfc,0x22,0xd5,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30 + +# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_subrev_f16_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x34,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] +0x05,0x01,0x34,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 + +# GFX11: v_subrev_f16_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x34,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] +0x05,0x02,0x34,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 + +# GFX11: v_subrev_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x34,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] +0xff,0x83,0x34,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 + +# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_subrev_f32_e64_dpp v5, |v1|, -v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x05,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01] +0x05,0x01,0x05,0xd5,0xfa,0x04,0x02,0x48,0x01,0x5f,0x01,0x01 + +# GFX11: v_subrev_f32_e64_dpp v5, -v1, |v2| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x05,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13] +0x05,0x02,0x05,0xd5,0xfa,0x04,0x02,0x30,0x01,0x60,0x01,0x13 + +# GFX11: v_subrev_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x83,0x05,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30] +0xff,0x83,0x05,0xd5,0xfa,0xfe,0x03,0x78,0xff,0x6f,0x0d,0x30 + +# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_subrev_nc_u32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x27,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x80,0x27,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_xnor_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1e,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x1e,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 + +# GFX11: v_xor_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff + +# GFX11: v_xor_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff + +# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff + +# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff + +# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff + +# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff + +# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff + +# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff + +# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff + +# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff + +# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff + +# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 + +# GFX11: v_xor_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 + +# GFX11: v_xor_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1d,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +0xff,0x00,0x1d,0xd5,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt index 525940b74f73e..3f4f44a479fd5 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt @@ -1,5 +1,5 @@ # RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32 %s -# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64 %s +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64 %s # GFX11: v_add3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x55,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 @@ -37,25 +37,6 @@ # GFX11: v_add3_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x55,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0xff,0x00,0x55,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x20,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] -# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x20,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] -0x05,0x0c,0x20,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05 - -# W32: v_add_co_ci_u32_e64_dpp v5, s104, v1, v2, s104 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x68,0x20,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] -# W64: v_add_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x68,0x20,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] -0x05,0x68,0x20,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05 - -# W32: v_add_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x6a,0x20,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -# W64: v_add_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x6a,0x20,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -0x05,0x6a,0x20,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 - -# W32: v_add_co_ci_u32_e64_dpp v5, ttmp14, v1, v2, ttmp14 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7a,0x20,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] -# W64: v_add_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7a,0x20,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] -0x05,0x7a,0x20,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05 - -# GFX11: v_add_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xfc,0x20,0xd5,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] -0xff,0xfc,0x20,0xd5,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00 - # W32: v_add_co_u32_e64_dpp v5, s12, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x0c,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -75,30 +56,6 @@ # GFX11: v_add_co_u32_e64_dpp v255, null, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xfc,0x00,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0xfc,0x00,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_add_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x32,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_add_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x32,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] -0x05,0x01,0x32,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 - -# GFX11: v_add_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x32,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] -0x05,0x02,0x32,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 - -# GFX11: v_add_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x32,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] -0xff,0x83,0x32,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 - -# GFX11: v_add_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x03,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_add_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x03,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] -0x05,0x01,0x03,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 - -# GFX11: v_add_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x03,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] -0x05,0x02,0x03,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 - -# GFX11: v_add_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x03,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] -0xff,0x83,0x03,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 - # GFX11: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x47,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x47,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 @@ -141,12 +98,6 @@ # GFX11: v_add_nc_i32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x26,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x80,0x26,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x25,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x25,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_add_nc_u32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x25,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -0xff,0x80,0x25,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 - # GFX11: v_alignbit_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x16,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x16,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 @@ -219,12 +170,6 @@ # GFX11: v_and_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x62,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_and_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x1b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_and_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1b,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0x1b,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 - # GFX11: v_and_or_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x57,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x57,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 @@ -267,12 +212,6 @@ # GFX11: v_ashrrev_i16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x3a,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x3a,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x1a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_ashrrev_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1a,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0x1a,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 - # GFX11: v_bcnt_u32_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x1e,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -393,48 +332,6 @@ # GFX11: v_bfm_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x1d,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_bfrev_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xb8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_bfrev_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xb8,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0xb8,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 - -# GFX11: v_ceil_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_ceil_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_ceil_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_ceil_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xdc,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xdc,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_ceil_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xa2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_ceil_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa2,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xa2,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_ceil_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa2,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xa2,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_ceil_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xa2,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xa2,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_cls_i32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xbb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xbb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cls_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xbb,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0xbb,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 - -# GFX11: v_clz_i32_u32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xb9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_clz_i32_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xb9,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0xb9,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 - # W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] # W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x5d,0xd6,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05 @@ -460,61 +357,6 @@ # GFX11: v_cndmask_b16_e64_dpp v255, v255, v255, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] 0xff,0x00,0x5d,0xd6,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00 -# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] -# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x01,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05 - -# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s104 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] -# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] -0x05,0x00,0x01,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05 - -# W32: v_cndmask_b32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -# W64: v_cndmask_b32_e64_dpp v5, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -0x05,0x00,0x01,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 - -# W32: v_cndmask_b32_e64_dpp v5, v1, v2, ttmp14 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] -# W64: v_cndmask_b32_e64_dpp v5, v1, v2, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] -0x05,0x00,0x01,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05 - -# GFX11: v_cndmask_b32_e64_dpp v255, v255, v255, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x01,0xd5,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] -0xff,0x00,0x01,0xd5,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00 - -# GFX11: v_cndmask_b32_e64_dpp v5, -v1, |v2|, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x05,0x02,0x01,0xd5,0xea,0x04,0xf2,0x21,0x01,0x00,0x00,0x00] -0x05,0x02,0x01,0xd5,0xea,0x04,0xf2,0x21,0x01,0x00,0x00,0x00 - -# GFX11: v_cndmask_b32_e64_dpp v5, |v1|, -v2, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x05,0x01,0x01,0xd5,0xea,0x04,0xf2,0x41,0x01,0x00,0x00,0x00] -0x05,0x01,0x01,0xd5,0xea,0x04,0xf2,0x41,0x01,0x00,0x00,0x00 - -# GFX11: v_cos_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xe1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cos_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe1,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xe1,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_cos_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe1,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xe1,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_cos_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xe1,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xe1,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_cos_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb6,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xb6,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cos_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb6,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xb6,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_cos_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb6,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xb6,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_cos_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xb6,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xb6,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xba,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xba,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_ctz_i32_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xba,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0xba,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 - # GFX11: v_cubeid_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x0c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 @@ -659,180 +501,6 @@ # GFX11: v_cubetc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x87,0x0e,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] 0xff,0x87,0x0e,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00 -# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f16_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f16_i16_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0xd1,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00] -0xff,0x80,0xd1,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00 - -# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xd0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xd0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd0,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xd0,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f16_u16_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0xd0,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00] -0xff,0x80,0xd0,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00 - -# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_i32_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x85,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00] -0xff,0x80,0x85,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00 - -# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x86,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x86,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x86,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0x86,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x86,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0x86,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_u32_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x86,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00] -0xff,0x80,0x86,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00 - -# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x91,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x91,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x91,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0x91,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x91,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0x91,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_ubyte0_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x91,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00] -0xff,0x80,0x91,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00 - -# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x92,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x92,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x92,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0x92,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x92,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0x92,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_ubyte1_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x92,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00] -0xff,0x80,0x92,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00 - -# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x93,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x93,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x93,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0x93,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x93,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0x93,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_ubyte2_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x93,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00] -0xff,0x80,0x93,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00 - -# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x94,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x94,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x94,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0x94,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x94,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0x94,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_f32_ubyte3_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x94,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00] -0xff,0x80,0x94,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00 - -# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8d,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x8d,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_floor_i32_f32_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x01,0x8d,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] -0xff,0x01,0x8d,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 - -# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xd3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_i16_f16_e64_dpp v255, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xd3,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] -0xff,0x81,0xd3,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 - -# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x88,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x88,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_i32_f32_e64_dpp v255, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x88,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] -0xff,0x81,0x88,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 - -# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xea,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xea,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_i32_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xea,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0xea,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 - -# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8c,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x8c,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_nearest_i32_f32_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x01,0x8c,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] -0xff,0x01,0x8c,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 - -# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xe3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_norm_i16_f16_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x01,0xe3,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] -0xff,0x01,0xe3,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 - -# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xe4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_norm_u16_f16_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x01,0xe4,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] -0xff,0x01,0xe4,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 - -# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8e,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x8e,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8e,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0x8e,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8e,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0x8e,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_off_f32_i4_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x8e,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00] -0xff,0x80,0x8e,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00 - # GFX11: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x06,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x06,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -851,18 +519,6 @@ # GFX11: v_cvt_pk_i16_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x24,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x24,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2f,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x2f,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x2f,0xd5,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] -0x05,0x01,0x2f,0xd5,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x2f,0xd5,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] -0x05,0x02,0x2f,0xd5,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x2f,0xd5,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] -0xff,0x83,0x2f,0xd5,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00 - # GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x07,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x07,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -941,72 +597,6 @@ # GFX11: v_cvt_pknorm_u16_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x03,0x22,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] 0xff,0x03,0x22,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00 -# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xd2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_u16_f16_e64_dpp v255, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xd2,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] -0xff,0x81,0xd2,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 - -# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x87,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x87,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_u32_f32_e64_dpp v255, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x87,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] -0xff,0x81,0x87,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 - -# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xeb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xeb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_cvt_u32_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xeb,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0xeb,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 - -# GFX11: v_exp_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xd8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_exp_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd8,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xd8,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_exp_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd8,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xd8,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_exp_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xd8,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xd8,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_exp_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa5,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xa5,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_exp_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa5,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xa5,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_exp_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa5,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xa5,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_exp_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xa5,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xa5,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_floor_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_floor_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_floor_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_floor_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xdb,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xdb,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_floor_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xa4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_floor_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa4,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xa4,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_floor_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa4,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xa4,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_floor_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xa4,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xa4,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - # GFX11: v_fma_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x13,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 @@ -1043,78 +633,6 @@ # GFX11: v_fma_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x87,0x13,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] 0xff,0x87,0x13,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00 -# GFX11: v_fract_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdf,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xdf,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_fract_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdf,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xdf,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_fract_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdf,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xdf,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_fract_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xdf,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xdf,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_fract_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xa0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_fract_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xa0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_fract_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa0,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xa0,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_fract_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xa0,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xa0,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xda,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xda,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_frexp_exp_i16_f16_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x01,0xda,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] -0xff,0x01,0xda,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 - -# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xbf,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xbf,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_frexp_exp_i32_f32_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x01,0xbf,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] -0xff,0x01,0xbf,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xd9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd9,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xd9,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd9,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xd9,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_frexp_mant_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xd9,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xd9,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xc0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xc0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc0,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xc0,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_frexp_mant_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xc0,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xc0,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_ldexp_f16_e64_dpp v255, -|v255|, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x3b,0xd5,0xea,0xfe,0x03,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0x3b,0xd5,0xea,0xfe,0x03,0x38,0xff,0x00,0x00,0x00 - # GFX11: v_ldexp_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1c,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x1c,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -1163,30 +681,6 @@ # GFX11: v_lerp_u8_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x15,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0xff,0x00,0x15,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX11: v_log_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd7,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xd7,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_log_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd7,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xd7,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_log_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd7,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xd7,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_log_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xd7,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xd7,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_log_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa7,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xa7,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_log_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa7,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xa7,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_log_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa7,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xa7,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_log_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xa7,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xa7,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - # GFX11: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x46,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x46,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 @@ -1265,24 +759,12 @@ # GFX11: v_lshlrev_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x38,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x38,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x18,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x18,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_lshlrev_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x18,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0x18,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 - # GFX11: v_lshrrev_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x39,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x39,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 # GFX11: v_lshrrev_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x39,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x39,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x19,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x19,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_lshrrev_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x19,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0x19,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 - # GFX11: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x0a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 @@ -1463,54 +945,18 @@ # GFX11: v_max3_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0xff,0x00,0x1e,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX11: v_max_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x39,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x39,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_max_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x39,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] -0x05,0x01,0x39,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 - -# GFX11: v_max_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x39,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] -0x05,0x02,0x39,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 - -# GFX11: v_max_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x39,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] -0xff,0x83,0x39,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 - -# GFX11: v_max_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x10,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x10,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_max_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x10,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] -0x05,0x01,0x10,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 - -# GFX11: v_max_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x10,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] -0x05,0x02,0x10,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 - -# GFX11: v_max_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x10,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] -0xff,0x83,0x10,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 - # GFX11: v_max_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0a,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x0a,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 # GFX11: v_max_i16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x0a,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x0a,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_max_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x12,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_max_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x12,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0x12,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 - # GFX11: v_max_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x09,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x09,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 # GFX11: v_max_u16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x09,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x09,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_max_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x14,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x14,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_max_u32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x14,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0x14,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 - # GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x60,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 @@ -1883,54 +1329,18 @@ # GFX11: v_min3_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0xff,0x00,0x1b,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX11: v_min_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x3a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_min_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x3a,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] -0x05,0x01,0x3a,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 - -# GFX11: v_min_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x3a,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] -0x05,0x02,0x3a,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 - -# GFX11: v_min_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x3a,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] -0xff,0x83,0x3a,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 - -# GFX11: v_min_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0f,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x0f,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_min_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x0f,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] -0x05,0x01,0x0f,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 - -# GFX11: v_min_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x0f,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] -0x05,0x02,0x0f,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 - -# GFX11: v_min_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x0f,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] -0xff,0x83,0x0f,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 - # GFX11: v_min_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x0c,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 # GFX11: v_min_i16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x0c,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x0c,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_min_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x11,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x11,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_min_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x11,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0x11,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 - # GFX11: v_min_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0b,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x0b,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 # GFX11: v_min_u16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x0b,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x0b,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_min_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x13,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_min_u32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x13,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0x13,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 - # GFX11: v_minmax_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x61,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 @@ -2075,36 +1485,6 @@ # GFX11: v_minmax_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x63,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0xff,0x00,0x63,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX11: v_mov_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_mov_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x81,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0x81,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 - -# GFX11: v_movreld_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_movreld_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xc2,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0xc2,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 - -# GFX11: v_movrels_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_movrels_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xc3,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0xc3,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 - -# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_movrelsd_2_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xc8,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0xc8,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 - -# GFX11: v_movrelsd_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_movrelsd_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xc4,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0xc4,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 - # GFX11: v_msad_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x39,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x39,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 @@ -2141,72 +1521,12 @@ # GFX11: v_msad_u8_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x39,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0xff,0x80,0x39,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x07,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x07,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x07,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] -0x05,0x01,0x07,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 - -# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x07,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] -0x05,0x02,0x07,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 - -# GFX11: v_mul_dx9_zero_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x07,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] -0xff,0x83,0x07,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 - -# GFX11: v_mul_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x35,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x35,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_mul_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x35,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] -0x05,0x01,0x35,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 - -# GFX11: v_mul_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x35,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] -0x05,0x02,0x35,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 - -# GFX11: v_mul_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x35,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] -0xff,0x83,0x35,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 - -# GFX11: v_mul_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x08,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x08,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_mul_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x08,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] -0x05,0x01,0x08,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 - -# GFX11: v_mul_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x08,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] -0x05,0x02,0x08,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 - -# GFX11: v_mul_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x08,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] -0xff,0x83,0x08,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 - -# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x0a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_mul_hi_i32_i24_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x0a,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0x0a,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 - -# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x0c,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_mul_hi_u32_u24_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x0c,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0x0c,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 - -# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x09,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x09,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_mul_i32_i24_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x09,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -0xff,0x80,0x09,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 - # GFX11: v_mul_lo_u16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x05,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x05,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 # GFX11: v_mul_lo_u16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x05,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x05,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x0b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_mul_u32_u24_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x0b,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -0xff,0x80,0x0b,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 - # GFX11: v_mullit_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x18,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x18,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 @@ -2243,18 +1563,6 @@ # GFX11: v_mullit_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x87,0x18,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] 0xff,0x87,0x18,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00 -# GFX11: v_not_b16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xe9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_not_b16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xe9,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0xe9,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 - -# GFX11: v_not_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb7,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xb7,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_not_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xb7,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0xb7,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 - # GFX11: v_or3_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x58,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x58,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 @@ -2297,12 +1605,6 @@ # GFX11: v_or_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x63,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_or_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1c,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x1c,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_or_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1c,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0x1c,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 - # GFX11: v_perm_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x44,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x44,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 @@ -2339,90 +1641,6 @@ # GFX11: v_perm_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x44,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0xff,0x00,0x44,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX11: v_rcp_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xd4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_rcp_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd4,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xd4,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_rcp_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd4,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xd4,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_rcp_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xd4,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xd4,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_rcp_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xaa,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xaa,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_rcp_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xaa,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xaa,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_rcp_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xaa,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xaa,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_rcp_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xaa,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xaa,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xab,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xab,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xab,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xab,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xab,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xab,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_rcp_iflag_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xab,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xab,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_rndne_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xde,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xde,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_rndne_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xde,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xde,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_rndne_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xde,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xde,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_rndne_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xde,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xde,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_rndne_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xa3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_rndne_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa3,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xa3,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_rndne_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa3,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xa3,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_rndne_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xa3,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xa3,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_rsq_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd6,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xd6,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_rsq_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd6,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xd6,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_rsq_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd6,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xd6,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_rsq_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xd6,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xd6,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_rsq_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xae,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xae,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_rsq_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xae,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xae,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_rsq_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xae,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xae,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_rsq_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xae,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xae,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - # GFX11: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x23,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x23,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 @@ -2567,79 +1785,6 @@ # GFX11: v_sad_u8_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x22,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0xff,0x80,0x22,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xe2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_sat_pk_u8_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xe2,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0xe2,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 - -# GFX11: v_sin_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xe0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_sin_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xe0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_sin_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe0,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xe0,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_sin_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xe0,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xe0,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_sin_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb5,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xb5,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_sin_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb5,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xb5,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_sin_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb5,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xb5,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_sin_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xb5,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xb5,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_sqrt_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd5,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xd5,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_sqrt_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd5,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xd5,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_sqrt_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd5,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xd5,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_sqrt_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xd5,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xd5,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_sqrt_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xb3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_sqrt_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb3,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xb3,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_sqrt_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb3,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xb3,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_sqrt_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xb3,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xb3,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x21,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] -# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x21,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] -0x05,0x0c,0x21,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05 - -# W32: v_sub_co_ci_u32_e64_dpp v5, s104, v1, v2, s104 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x68,0x21,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] -# W64: v_sub_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x68,0x21,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] -0x05,0x68,0x21,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05 - -# W32: v_sub_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x6a,0x21,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -# W64: v_sub_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x6a,0x21,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -0x05,0x6a,0x21,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 - -# W32: v_sub_co_ci_u32_e64_dpp v5, ttmp14, v1, v2, ttmp14 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7a,0x21,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] -# W64: v_sub_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7a,0x21,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] -0x05,0x7a,0x21,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05 - -# GFX11: v_sub_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xfc,0x21,0xd5,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] -0xff,0xfc,0x21,0xd5,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00 - # W32: v_sub_co_u32_e64_dpp v5, s12, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x0c,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -2659,61 +1804,12 @@ # GFX11: v_sub_co_u32_e64_dpp v255, null, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xfc,0x01,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0xfc,0x01,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_sub_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x33,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x33,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_sub_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x33,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] -0x05,0x01,0x33,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 - -# GFX11: v_sub_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x33,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] -0x05,0x02,0x33,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 - -# GFX11: v_sub_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x33,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] -0xff,0x83,0x33,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 - -# GFX11: v_sub_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x04,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_sub_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x04,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] -0x05,0x01,0x04,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 - -# GFX11: v_sub_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x04,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] -0x05,0x02,0x04,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 - -# GFX11: v_sub_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x04,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] -0xff,0x83,0x04,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 - # GFX11: v_sub_nc_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x25,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x25,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 # GFX11: v_sub_nc_i32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x25,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x80,0x25,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x26,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x26,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_sub_nc_u32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x26,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -0xff,0x80,0x26,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 - -# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x22,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] -# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x22,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] -0x05,0x0c,0x22,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05 - -# W32: v_subrev_co_ci_u32_e64_dpp v5, s104, v1, v2, s104 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x68,0x22,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] -# W64: v_subrev_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x68,0x22,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] -0x05,0x68,0x22,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05 - -# W32: v_subrev_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x6a,0x22,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -# W64: v_subrev_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x6a,0x22,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -0x05,0x6a,0x22,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 - -# W32: v_subrev_co_ci_u32_e64_dpp v5, ttmp14, v1, v2, ttmp14 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7a,0x22,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] -# W64: v_subrev_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7a,0x22,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] -0x05,0x7a,0x22,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05 - -# GFX11: v_subrev_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xfc,0x22,0xd5,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] -0xff,0xfc,0x22,0xd5,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00 - # W32: v_subrev_co_u32_e64_dpp v5, s12, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x0c,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -2733,60 +1829,6 @@ # GFX11: v_subrev_co_u32_e64_dpp v255, null, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xfc,0x02,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0xfc,0x02,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x34,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x34,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_subrev_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x34,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] -0x05,0x01,0x34,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 - -# GFX11: v_subrev_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x34,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] -0x05,0x02,0x34,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 - -# GFX11: v_subrev_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x34,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] -0xff,0x83,0x34,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 - -# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x05,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x05,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_subrev_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x05,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] -0x05,0x01,0x05,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 - -# GFX11: v_subrev_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x05,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] -0x05,0x02,0x05,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 - -# GFX11: v_subrev_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x05,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] -0xff,0x83,0x05,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 - -# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x27,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x27,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_subrev_nc_u32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x27,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -0xff,0x80,0x27,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 - -# GFX11: v_trunc_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdd,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xdd,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_trunc_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdd,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xdd,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_trunc_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdd,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xdd,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_trunc_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xdd,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xdd,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - -# GFX11: v_trunc_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0xa1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_trunc_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa1,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] -0x05,0x00,0xa1,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 - -# GFX11: v_trunc_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa1,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] -0x05,0x00,0xa1,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 - -# GFX11: v_trunc_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xa1,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] -0xff,0x81,0xa1,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 - # GFX11: v_xad_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x45,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x45,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 @@ -2823,12 +1865,6 @@ # GFX11: v_xad_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x45,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0xff,0x00,0x45,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 -# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1e,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x1e,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_xnor_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1e,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0x1e,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 - # GFX11: v_xor3_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x40,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x40,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 @@ -2871,12 +1907,6 @@ # GFX11: v_xor_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x64,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -# GFX11: v_xor_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1d,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x1d,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_xor_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1d,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -0xff,0x00,0x1d,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 - # GFX11: v_add_nc_i16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x0d,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -3015,30 +2045,6 @@ # GFX11: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] 0xff,0xc7,0x48,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00 -# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x36,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x36,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_fmac_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x36,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] -0x05,0x01,0x36,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 - -# GFX11: v_fmac_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x36,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] -0x05,0x02,0x36,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 - -# GFX11: v_fmac_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x36,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] -0xff,0x83,0x36,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 - -# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -0x05,0x00,0x2b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 - -# GFX11: v_fmac_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x2b,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] -0x05,0x01,0x2b,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 - -# GFX11: v_fmac_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x2b,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] -0x05,0x02,0x2b,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 - -# GFX11: v_fmac_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x2b,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] -0xff,0x83,0x2b,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 - # GFX11: v_mad_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt new file mode 100644 index 0000000000000..4ea57003eeeb9 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt @@ -0,0 +1,601 @@ +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11 %s + +# GFX11: v_bfrev_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xb8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_bfrev_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xb8,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0xb8,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_ceil_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_ceil_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_ceil_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xdc,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_ceil_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xdc,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xdc,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_ceil_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xa2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_ceil_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa2,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xa2,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_ceil_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa2,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xa2,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_ceil_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xa2,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xa2,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_cls_i32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xbb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xbb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cls_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xbb,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0xbb,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_clz_i32_u32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xb9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_clz_i32_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xb9,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0xb9,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_cos_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xe1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cos_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe1,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xe1,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_cos_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe1,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xe1,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_cos_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xe1,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xe1,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_cos_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb6,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xb6,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cos_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb6,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xb6,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_cos_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb6,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xb6,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_cos_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xb6,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xb6,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_ctz_i32_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xba,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xba,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_ctz_i32_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xba,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0xba,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f16_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0x8a,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f16_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f16_i16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f16_i16_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0xd1,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00] +0xff,0x80,0xd1,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xd0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xd0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f16_u16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd0,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xd0,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f16_u16_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0xd0,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00] +0xff,0x80,0xd0,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0x8b,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_i32_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x85,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00] +0xff,0x80,0x85,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x86,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x86,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x86,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0x86,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_u32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x86,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0x86,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_u32_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x86,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00] +0xff,0x80,0x86,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x91,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x91,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x91,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0x91,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_ubyte0_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x91,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0x91,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_ubyte0_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x91,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00] +0xff,0x80,0x91,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x92,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x92,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x92,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0x92,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_ubyte1_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x92,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0x92,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_ubyte1_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x92,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00] +0xff,0x80,0x92,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x93,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x93,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x93,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0x93,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_ubyte2_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x93,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0x93,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_ubyte2_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x93,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00] +0xff,0x80,0x93,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x94,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x94,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x94,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0x94,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_ubyte3_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x94,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0x94,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_f32_ubyte3_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x94,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00] +0xff,0x80,0x94,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_floor_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8d,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x8d,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_floor_i32_f32_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x01,0x8d,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] +0xff,0x01,0x8d,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_i16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xd3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_i16_f16_e64_dpp v255, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xd3,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] +0xff,0x81,0xd3,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x88,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x88,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_i32_f32_e64_dpp v255, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x88,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] +0xff,0x81,0x88,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xea,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xea,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_i32_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xea,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0xea,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_nearest_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8c,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x8c,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_nearest_i32_f32_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x01,0x8c,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] +0xff,0x01,0x8c,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_i16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xe3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_norm_i16_f16_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x01,0xe3,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] +0xff,0x01,0xe3,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_u16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xe4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_norm_u16_f16_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x01,0xe4,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] +0xff,0x01,0xe4,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8e,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x8e,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8e,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0x8e,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_off_f32_i4_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x8e,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0x8e,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_off_f32_i4_e64_dpp v255, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x8e,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00] +0xff,0x80,0x8e,0xd5,0xea,0x00,0x00,0x18,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_u16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xd2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_u16_f16_e64_dpp v255, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xd2,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] +0xff,0x81,0xd2,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x87,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x87,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_u32_f32_e64_dpp v255, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x87,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] +0xff,0x81,0x87,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_u16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xeb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xeb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_u32_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xeb,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0xeb,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_exp_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xd8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_exp_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd8,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xd8,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_exp_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd8,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xd8,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_exp_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xd8,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xd8,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_exp_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa5,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xa5,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_exp_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa5,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xa5,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_exp_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa5,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xa5,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_exp_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xa5,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xa5,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_floor_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_floor_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_floor_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xdb,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_floor_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xdb,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xdb,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_floor_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xa4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_floor_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa4,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xa4,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_floor_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa4,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xa4,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_floor_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xa4,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xa4,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_fract_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdf,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xdf,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_fract_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdf,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xdf,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_fract_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdf,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xdf,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_fract_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xdf,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xdf,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_fract_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xa0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_fract_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xa0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_fract_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa0,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xa0,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_fract_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xa0,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xa0,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i16_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xda,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xda,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_frexp_exp_i16_f16_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x01,0xda,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] +0xff,0x01,0xda,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xbf,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xbf,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_frexp_exp_i32_f32_e64_dpp v255, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x01,0xbf,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00] +0xff,0x01,0xbf,0xd5,0xea,0x00,0x00,0x20,0xff,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xd9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd9,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xd9,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_frexp_mant_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd9,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xd9,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_frexp_mant_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xd9,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xd9,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xc0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xc0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_frexp_mant_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc0,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xc0,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_frexp_mant_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xc0,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xc0,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_log_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd7,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xd7,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_log_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd7,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xd7,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_log_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd7,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xd7,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_log_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xd7,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xd7,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_log_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa7,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xa7,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_log_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa7,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xa7,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_log_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa7,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xa7,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_log_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xa7,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xa7,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_mov_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x81,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_mov_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x81,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0x81,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_movreld_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xc2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_movreld_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xc2,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0xc2,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_movrels_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xc3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_movrels_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xc3,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0xc3,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_movrelsd_2_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xc8,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_movrelsd_2_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xc8,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0xc8,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_movrelsd_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xc4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_movrelsd_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xc4,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0xc4,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_not_b16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xe9,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_not_b16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xe9,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0xe9,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_not_b32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb7,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xb7,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_not_b32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xb7,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0xb7,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_rcp_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xd4,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_rcp_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd4,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xd4,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_rcp_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd4,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xd4,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_rcp_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xd4,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xd4,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_rcp_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xaa,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xaa,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_rcp_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xaa,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xaa,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_rcp_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xaa,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xaa,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_rcp_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xaa,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xaa,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xab,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xab,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xab,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xab,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_rcp_iflag_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xab,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xab,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_rcp_iflag_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xab,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xab,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_rndne_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xde,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xde,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_rndne_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xde,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xde,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_rndne_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xde,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xde,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_rndne_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xde,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xde,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_rndne_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xa3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_rndne_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa3,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xa3,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_rndne_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa3,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xa3,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_rndne_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xa3,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xa3,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_rsq_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd6,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xd6,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_rsq_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd6,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xd6,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_rsq_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd6,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xd6,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_rsq_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xd6,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xd6,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_rsq_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xae,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xae,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_rsq_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xae,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xae,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_rsq_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xae,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xae,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_rsq_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xae,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xae,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_sat_pk_u8_i16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xe2,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_sat_pk_u8_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0xe2,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0xe2,0xd5,0xea,0x00,0x00,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_sin_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xe0,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_sin_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xe0,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_sin_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xe0,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xe0,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_sin_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xe0,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xe0,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_sin_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb5,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xb5,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_sin_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb5,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xb5,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_sin_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb5,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xb5,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_sin_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xb5,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xb5,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_sqrt_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd5,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xd5,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_sqrt_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd5,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xd5,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_sqrt_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd5,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xd5,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_sqrt_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xd5,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xd5,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_sqrt_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xb3,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_sqrt_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb3,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xb3,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_sqrt_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xb3,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xb3,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_sqrt_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xb3,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xb3,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_trunc_f16_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdd,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xdd,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_trunc_f16_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdd,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xdd,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_trunc_f16_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xdd,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xdd,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_trunc_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xdd,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xdd,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_trunc_f32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0xa1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_trunc_f32_e64_dpp v5, v1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa1,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0xa1,0xd5,0xe9,0x00,0x00,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_trunc_f32_e64_dpp v5, v1 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xa1,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0xa1,0xd5,0xe9,0x00,0x00,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_trunc_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0xa1,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0xa1,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop2.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop2.txt new file mode 100644 index 0000000000000..f0165884ba71f --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop2.txt @@ -0,0 +1,396 @@ +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32 %s +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64 %s + +# W32: v_add_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x20,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] +# W64: v_add_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x20,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] +0x05,0x0c,0x20,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05 + +# W32: v_add_co_ci_u32_e64_dpp v5, s104, v1, v2, s104 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x68,0x20,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] +# W64: v_add_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x68,0x20,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] +0x05,0x68,0x20,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05 + +# W32: v_add_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x6a,0x20,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64: v_add_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x6a,0x20,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +0x05,0x6a,0x20,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 + +# W32: v_add_co_ci_u32_e64_dpp v5, ttmp14, v1, v2, ttmp14 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7a,0x20,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] +# W64: v_add_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7a,0x20,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] +0x05,0x7a,0x20,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05 + +# GFX11: v_add_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xfc,0x20,0xd5,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] +0xff,0xfc,0x20,0xd5,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00 + +# GFX11: v_add_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x32,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_add_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x32,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] +0x05,0x01,0x32,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 + +# GFX11: v_add_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x32,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] +0x05,0x02,0x32,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 + +# GFX11: v_add_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x32,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] +0xff,0x83,0x32,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 + +# GFX11: v_add_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x03,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_add_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x03,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] +0x05,0x01,0x03,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 + +# GFX11: v_add_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x03,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] +0x05,0x02,0x03,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 + +# GFX11: v_add_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x03,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] +0xff,0x83,0x03,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 + +# GFX11: v_add_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x25,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x25,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_add_nc_u32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x25,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0x80,0x25,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_and_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x1b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_and_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1b,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0x1b,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_ashrrev_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x1a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_ashrrev_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1a,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0x1a,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] +# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x01,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05 + +# W32: v_cndmask_b32_e64_dpp v5, v1, v2, s104 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] +# W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] +0x05,0x00,0x01,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05 + +# W32: v_cndmask_b32_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64: v_cndmask_b32_e64_dpp v5, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +0x05,0x00,0x01,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 + +# W32: v_cndmask_b32_e64_dpp v5, v1, v2, ttmp14 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] +# W64: v_cndmask_b32_e64_dpp v5, v1, v2, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] +0x05,0x00,0x01,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05 + +# GFX11: v_cndmask_b32_e64_dpp v255, v255, v255, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x01,0xd5,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] +0xff,0x00,0x01,0xd5,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00 + +# GFX11: v_cndmask_b32_e64_dpp v5, -v1, |v2|, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x05,0x02,0x01,0xd5,0xea,0x04,0xf2,0x21,0x01,0x00,0x00,0x00] +0x05,0x02,0x01,0xd5,0xea,0x04,0xf2,0x21,0x01,0x00,0x00,0x00 + +# GFX11: v_cndmask_b32_e64_dpp v5, |v1|, -v2, null dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x05,0x01,0x01,0xd5,0xea,0x04,0xf2,0x41,0x01,0x00,0x00,0x00] +0x05,0x01,0x01,0xd5,0xea,0x04,0xf2,0x41,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2f,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x2f,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x2f,0xd5,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +0x05,0x01,0x2f,0xd5,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x2f,0xd5,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +0x05,0x02,0x2f,0xd5,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x2f,0xd5,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +0xff,0x83,0x2f,0xd5,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00 + +# GFX11: v_fmac_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x36,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x36,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_fmac_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x36,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] +0x05,0x01,0x36,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 + +# GFX11: v_fmac_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x36,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] +0x05,0x02,0x36,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 + +# GFX11: v_fmac_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x36,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] +0xff,0x83,0x36,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 + +# GFX11: v_fmac_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x2b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_fmac_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x2b,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] +0x05,0x01,0x2b,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 + +# GFX11: v_fmac_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x2b,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] +0x05,0x02,0x2b,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 + +# GFX11: v_fmac_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x2b,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] +0xff,0x83,0x2b,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 + +# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x08,0x01,0x77,0x39,0x05 + +# GFX11: v_ldexp_f16_e64_dpp v5, v1, v2 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x10,0x01,0x77,0x39,0x05 + +# GFX11: v_ldexp_f16_e64_dpp v255, -|v255|, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x3b,0xd5,0xea,0xfe,0x03,0x38,0xff,0x00,0x00,0x00] +0xff,0x81,0x3b,0xd5,0xea,0xfe,0x03,0x38,0xff,0x00,0x00,0x00 + +# GFX11: v_lshlrev_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x18,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x18,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_lshlrev_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x18,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0x18,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_lshrrev_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x19,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x19,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_lshrrev_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x19,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0x19,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_max_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x39,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x39,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_max_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x39,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] +0x05,0x01,0x39,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 + +# GFX11: v_max_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x39,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] +0x05,0x02,0x39,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 + +# GFX11: v_max_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x39,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] +0xff,0x83,0x39,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 + +# GFX11: v_max_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x10,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x10,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_max_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x10,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] +0x05,0x01,0x10,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 + +# GFX11: v_max_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x10,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] +0x05,0x02,0x10,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 + +# GFX11: v_max_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x10,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] +0xff,0x83,0x10,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 + +# GFX11: v_max_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x12,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_max_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x12,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0x12,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_max_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x14,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x14,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_max_u32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x14,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0x14,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_min_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x3a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_min_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x3a,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] +0x05,0x01,0x3a,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 + +# GFX11: v_min_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x3a,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] +0x05,0x02,0x3a,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 + +# GFX11: v_min_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x3a,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] +0xff,0x83,0x3a,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 + +# GFX11: v_min_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0f,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x0f,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_min_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x0f,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] +0x05,0x01,0x0f,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 + +# GFX11: v_min_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x0f,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] +0x05,0x02,0x0f,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 + +# GFX11: v_min_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x0f,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] +0xff,0x83,0x0f,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 + +# GFX11: v_min_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x11,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x11,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_min_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x11,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0x11,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_min_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x13,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_min_u32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x13,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0x13,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x07,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x07,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x07,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] +0x05,0x01,0x07,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 + +# GFX11: v_mul_dx9_zero_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x07,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] +0x05,0x02,0x07,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 + +# GFX11: v_mul_dx9_zero_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x07,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] +0xff,0x83,0x07,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 + +# GFX11: v_mul_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x35,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x35,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_mul_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x35,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] +0x05,0x01,0x35,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 + +# GFX11: v_mul_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x35,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] +0x05,0x02,0x35,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 + +# GFX11: v_mul_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x35,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] +0xff,0x83,0x35,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 + +# GFX11: v_mul_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x08,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x08,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_mul_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x08,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] +0x05,0x01,0x08,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 + +# GFX11: v_mul_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x08,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] +0x05,0x02,0x08,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 + +# GFX11: v_mul_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x08,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] +0xff,0x83,0x08,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 + +# GFX11: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x0a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_mul_hi_i32_i24_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x0a,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0x0a,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x0c,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_mul_hi_u32_u24_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x0c,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0x0c,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_mul_i32_i24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x09,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x09,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_mul_i32_i24_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x09,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0x80,0x09,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_mul_u32_u24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x0b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_mul_u32_u24_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x0b,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0x80,0x0b,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_or_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1c,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x1c,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_or_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1c,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0x1c,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# W32: v_sub_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x21,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] +# W64: v_sub_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x21,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] +0x05,0x0c,0x21,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05 + +# W32: v_sub_co_ci_u32_e64_dpp v5, s104, v1, v2, s104 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x68,0x21,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] +# W64: v_sub_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x68,0x21,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] +0x05,0x68,0x21,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05 + +# W32: v_sub_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x6a,0x21,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64: v_sub_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x6a,0x21,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +0x05,0x6a,0x21,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 + +# W32: v_sub_co_ci_u32_e64_dpp v5, ttmp14, v1, v2, ttmp14 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7a,0x21,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] +# W64: v_sub_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7a,0x21,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] +0x05,0x7a,0x21,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05 + +# GFX11: v_sub_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xfc,0x21,0xd5,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] +0xff,0xfc,0x21,0xd5,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00 + +# GFX11: v_sub_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x33,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x33,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_sub_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x33,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] +0x05,0x01,0x33,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 + +# GFX11: v_sub_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x33,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] +0x05,0x02,0x33,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 + +# GFX11: v_sub_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x33,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] +0xff,0x83,0x33,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 + +# GFX11: v_sub_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x04,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_sub_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x04,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] +0x05,0x01,0x04,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 + +# GFX11: v_sub_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x04,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] +0x05,0x02,0x04,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 + +# GFX11: v_sub_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x04,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] +0xff,0x83,0x04,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 + +# GFX11: v_sub_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x26,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x26,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_sub_nc_u32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x26,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0x80,0x26,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# W32: v_subrev_co_ci_u32_e64_dpp v5, s12, v1, v2, s6 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x22,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] +# W64: v_subrev_co_ci_u32_e64_dpp v5, s[12:13], v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0c,0x22,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] +0x05,0x0c,0x22,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05 + +# W32: v_subrev_co_ci_u32_e64_dpp v5, s104, v1, v2, s104 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x68,0x22,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] +# W64: v_subrev_co_ci_u32_e64_dpp v5, s[104:105], v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x68,0x22,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] +0x05,0x68,0x22,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05 + +# W32: v_subrev_co_ci_u32_e64_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x6a,0x22,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64: v_subrev_co_ci_u32_e64_dpp v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x6a,0x22,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +0x05,0x6a,0x22,0xd5,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 + +# W32: v_subrev_co_ci_u32_e64_dpp v5, ttmp14, v1, v2, ttmp14 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7a,0x22,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] +# W64: v_subrev_co_ci_u32_e64_dpp v5, ttmp[14:15], v1, v2, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7a,0x22,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05] +0x05,0x7a,0x22,0xd5,0xe9,0x04,0xea,0x01,0x01,0x77,0x39,0x05 + +# GFX11: v_subrev_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xfc,0x22,0xd5,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00] +0xff,0xfc,0x22,0xd5,0xea,0xfe,0xf3,0x01,0xff,0x00,0x00,0x00 + +# GFX11: v_subrev_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x34,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x34,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_subrev_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x34,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] +0x05,0x01,0x34,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 + +# GFX11: v_subrev_f16_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x34,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] +0x05,0x02,0x34,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 + +# GFX11: v_subrev_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x34,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] +0xff,0x83,0x34,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 + +# GFX11: v_subrev_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x05,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x05,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_subrev_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x05,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] +0x05,0x01,0x05,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 + +# GFX11: v_subrev_f32_e64_dpp v5, -v1, |v2| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x05,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05] +0x05,0x02,0x05,0xd5,0xe9,0x04,0x02,0x30,0x01,0x77,0x39,0x05 + +# GFX11: v_subrev_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x83,0x05,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00] +0xff,0x83,0x05,0xd5,0xea,0xfe,0x03,0x78,0xff,0x00,0x00,0x00 + +# GFX11: v_subrev_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x27,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x27,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_subrev_nc_u32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x27,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0x80,0x27,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_xnor_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1e,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x1e,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_xnor_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1e,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0x1e,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 + +# GFX11: v_xor_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1d,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x1d,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 + +# GFX11: v_xor_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1d,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] +0xff,0x00,0x1d,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt new file mode 100644 index 0000000000000..cba7fa924be2c --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt @@ -0,0 +1,3283 @@ +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11 %s + +# GFX11: v_bfrev_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xb8,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xb8,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_bfrev_b32_e64 v5, v255 ; encoding: [0x05,0x00,0xb8,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xb8,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_bfrev_b32_e64 v5, s1 ; encoding: [0x05,0x00,0xb8,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xb8,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_bfrev_b32_e64 v5, s105 ; encoding: [0x05,0x00,0xb8,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xb8,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_bfrev_b32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xb8,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xb8,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_bfrev_b32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xb8,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xb8,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_bfrev_b32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xb8,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xb8,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_bfrev_b32_e64 v5, m0 ; encoding: [0x05,0x00,0xb8,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xb8,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_bfrev_b32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xb8,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xb8,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_bfrev_b32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xb8,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xb8,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_bfrev_b32_e64 v5, null ; encoding: [0x05,0x00,0xb8,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xb8,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_bfrev_b32_e64 v5, -1 ; encoding: [0x05,0x00,0xb8,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xb8,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_bfrev_b32_e64 v5, 0.5 ; encoding: [0x05,0x00,0xb8,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0xb8,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_bfrev_b32_e64 v5, src_scc ; encoding: [0x05,0x00,0xb8,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0xb8,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_bfrev_b32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0xb8,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0xb8,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_ceil_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xdc,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xdc,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_ceil_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xdc,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xdc,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_ceil_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xdc,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xdc,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_ceil_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xdc,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xdc,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_ceil_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xdc,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xdc,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_ceil_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xdc,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xdc,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_ceil_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xdc,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xdc,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_ceil_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xdc,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xdc,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_ceil_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xdc,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xdc,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_ceil_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xdc,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xdc,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_ceil_f16_e64 v5, null ; encoding: [0x05,0x00,0xdc,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xdc,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_ceil_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xdc,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xdc,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_ceil_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xdc,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xdc,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_ceil_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xdc,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xdc,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_ceil_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xdc,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] +0xff,0x81,0xdc,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 + +# GFX11: v_ceil_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xa2,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xa2,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_ceil_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xa2,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xa2,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_ceil_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xa2,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xa2,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_ceil_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xa2,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xa2,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_ceil_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xa2,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xa2,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_ceil_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xa2,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xa2,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_ceil_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xa2,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xa2,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_ceil_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xa2,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xa2,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_ceil_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xa2,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xa2,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_ceil_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xa2,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xa2,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_ceil_f32_e64 v5, null ; encoding: [0x05,0x00,0xa2,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xa2,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_ceil_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xa2,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xa2,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_ceil_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xa2,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xa2,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_ceil_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xa2,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xa2,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_ceil_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xa2,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +0xff,0x81,0xa2,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf + +# GFX11: v_ceil_f64_e64 v[5:6], v[1:2] ; encoding: [0x05,0x00,0x98,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x98,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_ceil_f64_e64 v[5:6], v[254:255] ; encoding: [0x05,0x00,0x98,0xd5,0xfe,0x01,0x00,0x00] +0x05,0x00,0x98,0xd5,0xfe,0x01,0x00,0x00 + +# GFX11: v_ceil_f64_e64 v[5:6], s[2:3] ; encoding: [0x05,0x00,0x98,0xd5,0x02,0x00,0x00,0x00] +0x05,0x00,0x98,0xd5,0x02,0x00,0x00,0x00 + +# GFX11: v_ceil_f64_e64 v[5:6], s[104:105] ; encoding: [0x05,0x00,0x98,0xd5,0x68,0x00,0x00,0x00] +0x05,0x00,0x98,0xd5,0x68,0x00,0x00,0x00 + +# GFX11: v_ceil_f64_e64 v[5:6], vcc ; encoding: [0x05,0x00,0x98,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x98,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_ceil_f64_e64 v[5:6], ttmp[14:15] ; encoding: [0x05,0x00,0x98,0xd5,0x7a,0x00,0x00,0x00] +0x05,0x00,0x98,0xd5,0x7a,0x00,0x00,0x00 + +# GFX11: v_ceil_f64_e64 v[5:6], exec ; encoding: [0x05,0x00,0x98,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x98,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_ceil_f64_e64 v[5:6], null ; encoding: [0x05,0x00,0x98,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x98,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_ceil_f64_e64 v[5:6], -1 ; encoding: [0x05,0x00,0x98,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x98,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_ceil_f64_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0x98,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0x98,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_ceil_f64_e64 v[5:6], -|src_scc| mul:4 ; encoding: [0x05,0x01,0x98,0xd5,0xfd,0x00,0x00,0x30] +0x05,0x01,0x98,0xd5,0xfd,0x00,0x00,0x30 + +# GFX11: v_ceil_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0x98,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +0xfe,0x80,0x98,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf + +# GFX11: v_cls_i32_e64 v5, v1 ; encoding: [0x05,0x00,0xbb,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xbb,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cls_i32_e64 v5, v255 ; encoding: [0x05,0x00,0xbb,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xbb,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cls_i32_e64 v5, s1 ; encoding: [0x05,0x00,0xbb,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xbb,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cls_i32_e64 v5, s105 ; encoding: [0x05,0x00,0xbb,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xbb,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cls_i32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xbb,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xbb,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cls_i32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xbb,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xbb,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cls_i32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xbb,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xbb,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cls_i32_e64 v5, m0 ; encoding: [0x05,0x00,0xbb,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xbb,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cls_i32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xbb,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xbb,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cls_i32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xbb,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xbb,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cls_i32_e64 v5, null ; encoding: [0x05,0x00,0xbb,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xbb,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cls_i32_e64 v5, -1 ; encoding: [0x05,0x00,0xbb,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xbb,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cls_i32_e64 v5, 0.5 ; encoding: [0x05,0x00,0xbb,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0xbb,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_cls_i32_e64 v5, src_scc ; encoding: [0x05,0x00,0xbb,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0xbb,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_cls_i32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0xbb,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0xbb,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_clz_i32_u32_e64 v5, v1 ; encoding: [0x05,0x00,0xb9,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xb9,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_clz_i32_u32_e64 v5, v255 ; encoding: [0x05,0x00,0xb9,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xb9,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_clz_i32_u32_e64 v5, s1 ; encoding: [0x05,0x00,0xb9,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xb9,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_clz_i32_u32_e64 v5, s105 ; encoding: [0x05,0x00,0xb9,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xb9,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_clz_i32_u32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xb9,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xb9,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_clz_i32_u32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xb9,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xb9,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_clz_i32_u32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xb9,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xb9,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_clz_i32_u32_e64 v5, m0 ; encoding: [0x05,0x00,0xb9,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xb9,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_clz_i32_u32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xb9,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xb9,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_clz_i32_u32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xb9,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xb9,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_clz_i32_u32_e64 v5, null ; encoding: [0x05,0x00,0xb9,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xb9,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_clz_i32_u32_e64 v5, -1 ; encoding: [0x05,0x00,0xb9,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xb9,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_clz_i32_u32_e64 v5, 0.5 ; encoding: [0x05,0x00,0xb9,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0xb9,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_clz_i32_u32_e64 v5, src_scc ; encoding: [0x05,0x00,0xb9,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0xb9,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_clz_i32_u32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0xb9,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0xb9,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_cos_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xe1,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xe1,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cos_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xe1,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xe1,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cos_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xe1,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xe1,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cos_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xe1,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xe1,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cos_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xe1,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xe1,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cos_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xe1,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xe1,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cos_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xe1,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xe1,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cos_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xe1,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xe1,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cos_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xe1,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xe1,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cos_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xe1,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xe1,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cos_f16_e64 v5, null ; encoding: [0x05,0x00,0xe1,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xe1,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cos_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xe1,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xe1,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cos_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xe1,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xe1,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_cos_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xe1,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xe1,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_cos_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xe1,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] +0xff,0x81,0xe1,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cos_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xb6,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xb6,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cos_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xb6,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xb6,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cos_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xb6,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xb6,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cos_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xb6,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xb6,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cos_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xb6,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xb6,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cos_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xb6,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xb6,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cos_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xb6,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xb6,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cos_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xb6,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xb6,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cos_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xb6,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xb6,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cos_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xb6,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xb6,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cos_f32_e64 v5, null ; encoding: [0x05,0x00,0xb6,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xb6,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cos_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xb6,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xb6,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cos_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xb6,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xb6,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_cos_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xb6,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xb6,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_cos_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xb6,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +0xff,0x81,0xb6,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf + +# GFX11: v_ctz_i32_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xba,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xba,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_ctz_i32_b32_e64 v5, v255 ; encoding: [0x05,0x00,0xba,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xba,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_ctz_i32_b32_e64 v5, s1 ; encoding: [0x05,0x00,0xba,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xba,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_ctz_i32_b32_e64 v5, s105 ; encoding: [0x05,0x00,0xba,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xba,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_ctz_i32_b32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xba,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xba,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_ctz_i32_b32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xba,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xba,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_ctz_i32_b32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xba,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xba,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_ctz_i32_b32_e64 v5, m0 ; encoding: [0x05,0x00,0xba,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xba,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_ctz_i32_b32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xba,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xba,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_ctz_i32_b32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xba,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xba,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_ctz_i32_b32_e64 v5, null ; encoding: [0x05,0x00,0xba,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xba,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_ctz_i32_b32_e64 v5, -1 ; encoding: [0x05,0x00,0xba,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xba,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_ctz_i32_b32_e64 v5, 0.5 ; encoding: [0x05,0x00,0xba,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0xba,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_ctz_i32_b32_e64 v5, src_scc ; encoding: [0x05,0x00,0xba,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0xba,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_ctz_i32_b32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0xba,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0xba,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_f16_f32_e64 v5, v1 ; encoding: [0x05,0x00,0x8a,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x8a,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_f16_f32_e64 v5, v255 ; encoding: [0x05,0x00,0x8a,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0x8a,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_f16_f32_e64 v5, s1 ; encoding: [0x05,0x00,0x8a,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0x8a,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_f32_e64 v5, s105 ; encoding: [0x05,0x00,0x8a,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0x8a,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x8a,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x8a,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x8a,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0x8a,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x8a,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0x8a,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_f32_e64 v5, m0 ; encoding: [0x05,0x00,0x8a,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0x8a,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x8a,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x8a,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x8a,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0x8a,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_f32_e64 v5, null ; encoding: [0x05,0x00,0x8a,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x8a,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_f32_e64 v5, -1 ; encoding: [0x05,0x00,0x8a,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x8a,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x8a,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0x8a,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_cvt_f16_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x8a,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0x8a,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_cvt_f16_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +0xff,0x81,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_f16_i16_e64 v5, v1 ; encoding: [0x05,0x00,0xd1,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xd1,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_f16_i16_e64 v5, v255 ; encoding: [0x05,0x00,0xd1,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xd1,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_f16_i16_e64 v5, s1 ; encoding: [0x05,0x00,0xd1,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xd1,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_i16_e64 v5, s105 ; encoding: [0x05,0x00,0xd1,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xd1,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_i16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd1,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xd1,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_i16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd1,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xd1,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_i16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd1,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xd1,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_i16_e64 v5, m0 ; encoding: [0x05,0x00,0xd1,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xd1,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_i16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd1,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xd1,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_i16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd1,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xd1,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_i16_e64 v5, null ; encoding: [0x05,0x00,0xd1,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xd1,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_i16_e64 v5, -1 ; encoding: [0x05,0x00,0xd1,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xd1,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_i16_e64 v5, 0x3800 mul:2 +0x05,0x00,0xd1,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_cvt_f16_i16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xd1,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xd1,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_cvt_f16_i16_e64 v255, 0xfe0b clamp div:2 ; encoding: [0xff,0x80,0xd1,0xd5,0xff,0x00,0x00,0x18,0x0b,0xfe,0x00,0x00] +0xff,0x80,0xd1,0xd5,0xff,0x00,0x00,0x18,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cvt_f16_u16_e64 v5, v1 ; encoding: [0x05,0x00,0xd0,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xd0,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_f16_u16_e64 v5, v255 ; encoding: [0x05,0x00,0xd0,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xd0,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_f16_u16_e64 v5, s1 ; encoding: [0x05,0x00,0xd0,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xd0,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_u16_e64 v5, s105 ; encoding: [0x05,0x00,0xd0,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xd0,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_u16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd0,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xd0,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_u16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd0,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xd0,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_u16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd0,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xd0,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_u16_e64 v5, m0 ; encoding: [0x05,0x00,0xd0,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xd0,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_u16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd0,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xd0,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_u16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd0,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xd0,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_u16_e64 v5, null ; encoding: [0x05,0x00,0xd0,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xd0,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_u16_e64 v5, -1 ; encoding: [0x05,0x00,0xd0,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xd0,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_f16_u16_e64 v5, 0x3800 mul:2 +0x05,0x00,0xd0,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_cvt_f16_u16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xd0,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xd0,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_cvt_f16_u16_e64 v255, 0xfe0b clamp div:2 ; encoding: [0xff,0x80,0xd0,0xd5,0xff,0x00,0x00,0x18,0x0b,0xfe,0x00,0x00] +0xff,0x80,0xd0,0xd5,0xff,0x00,0x00,0x18,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cvt_f32_f16_e64 v5, v1 ; encoding: [0x05,0x00,0x8b,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x8b,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_f32_f16_e64 v5, v255 ; encoding: [0x05,0x00,0x8b,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0x8b,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_f32_f16_e64 v5, s1 ; encoding: [0x05,0x00,0x8b,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0x8b,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_f16_e64 v5, s105 ; encoding: [0x05,0x00,0x8b,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0x8b,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x8b,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x8b,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x8b,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0x8b,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x8b,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0x8b,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_f16_e64 v5, m0 ; encoding: [0x05,0x00,0x8b,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0x8b,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0x8b,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x8b,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0x8b,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0x8b,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_f16_e64 v5, null ; encoding: [0x05,0x00,0x8b,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x8b,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_f16_e64 v5, -1 ; encoding: [0x05,0x00,0x8b,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x8b,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x8b,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0x8b,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_cvt_f32_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x8b,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0x8b,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_cvt_f32_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0x8b,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] +0xff,0x81,0x8b,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cvt_f32_f64_e64 v5, v[1:2] ; encoding: [0x05,0x00,0x8f,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x8f,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_f32_f64_e64 v5, v[254:255] ; encoding: [0x05,0x00,0x8f,0xd5,0xfe,0x01,0x00,0x00] +0x05,0x00,0x8f,0xd5,0xfe,0x01,0x00,0x00 + +# GFX11: v_cvt_f32_f64_e64 v5, s[2:3] ; encoding: [0x05,0x00,0x8f,0xd5,0x02,0x00,0x00,0x00] +0x05,0x00,0x8f,0xd5,0x02,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_f64_e64 v5, s[104:105] ; encoding: [0x05,0x00,0x8f,0xd5,0x68,0x00,0x00,0x00] +0x05,0x00,0x8f,0xd5,0x68,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_f64_e64 v5, vcc ; encoding: [0x05,0x00,0x8f,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x8f,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_f64_e64 v5, ttmp[14:15] ; encoding: [0x05,0x00,0x8f,0xd5,0x7a,0x00,0x00,0x00] +0x05,0x00,0x8f,0xd5,0x7a,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_f64_e64 v5, exec ; encoding: [0x05,0x00,0x8f,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x8f,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_f64_e64 v5, null ; encoding: [0x05,0x00,0x8f,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x8f,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_f64_e64 v5, -1 ; encoding: [0x05,0x00,0x8f,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x8f,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_f64_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x8f,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0x8f,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_cvt_f32_f64_e64 v5, -|src_scc| mul:4 ; encoding: [0x05,0x01,0x8f,0xd5,0xfd,0x00,0x00,0x30] +0x05,0x01,0x8f,0xd5,0xfd,0x00,0x00,0x30 + +# GFX11: v_cvt_f32_f64_e64 v255, 0xaf123456 clamp div:2 ; encoding: [0xff,0x80,0x8f,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +0xff,0x80,0x8f,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_f32_i32_e64 v5, v1 ; encoding: [0x05,0x00,0x85,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x85,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_f32_i32_e64 v5, v255 ; encoding: [0x05,0x00,0x85,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0x85,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_f32_i32_e64 v5, s1 ; encoding: [0x05,0x00,0x85,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0x85,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_i32_e64 v5, s105 ; encoding: [0x05,0x00,0x85,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0x85,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_i32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x85,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x85,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_i32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x85,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0x85,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_i32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x85,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0x85,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_i32_e64 v5, m0 ; encoding: [0x05,0x00,0x85,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0x85,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_i32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x85,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x85,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_i32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x85,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0x85,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_i32_e64 v5, null ; encoding: [0x05,0x00,0x85,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x85,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_i32_e64 v5, -1 ; encoding: [0x05,0x00,0x85,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x85,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_i32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x85,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0x85,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_cvt_f32_i32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x85,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0x85,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_cvt_f32_i32_e64 v255, 0xaf123456 clamp div:2 ; encoding: [0xff,0x80,0x85,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +0xff,0x80,0x85,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_f32_u32_e64 v5, v1 ; encoding: [0x05,0x00,0x86,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x86,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_f32_u32_e64 v5, v255 ; encoding: [0x05,0x00,0x86,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0x86,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_f32_u32_e64 v5, s1 ; encoding: [0x05,0x00,0x86,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0x86,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_u32_e64 v5, s105 ; encoding: [0x05,0x00,0x86,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0x86,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_u32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x86,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x86,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_u32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x86,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0x86,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_u32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x86,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0x86,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_u32_e64 v5, m0 ; encoding: [0x05,0x00,0x86,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0x86,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_u32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x86,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x86,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_u32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x86,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0x86,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_u32_e64 v5, null ; encoding: [0x05,0x00,0x86,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x86,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_u32_e64 v5, -1 ; encoding: [0x05,0x00,0x86,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x86,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_u32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x86,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0x86,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_cvt_f32_u32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x86,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0x86,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_cvt_f32_u32_e64 v255, 0xaf123456 clamp div:2 ; encoding: [0xff,0x80,0x86,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +0xff,0x80,0x86,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_f32_ubyte0_e64 v5, v1 ; encoding: [0x05,0x00,0x91,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x91,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte0_e64 v5, v255 ; encoding: [0x05,0x00,0x91,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0x91,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte0_e64 v5, s1 ; encoding: [0x05,0x00,0x91,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0x91,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte0_e64 v5, s105 ; encoding: [0x05,0x00,0x91,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0x91,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte0_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x91,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x91,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte0_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x91,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0x91,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte0_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x91,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0x91,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte0_e64 v5, m0 ; encoding: [0x05,0x00,0x91,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0x91,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte0_e64 v5, exec_lo ; encoding: [0x05,0x00,0x91,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x91,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte0_e64 v5, exec_hi ; encoding: [0x05,0x00,0x91,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0x91,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte0_e64 v5, null ; encoding: [0x05,0x00,0x91,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x91,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte0_e64 v5, -1 ; encoding: [0x05,0x00,0x91,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x91,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte0_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x91,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0x91,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_cvt_f32_ubyte0_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x91,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0x91,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_cvt_f32_ubyte0_e64 v255, 0xaf123456 clamp div:2 ; encoding: [0xff,0x80,0x91,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +0xff,0x80,0x91,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_f32_ubyte1_e64 v5, v1 ; encoding: [0x05,0x00,0x92,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x92,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte1_e64 v5, v255 ; encoding: [0x05,0x00,0x92,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0x92,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte1_e64 v5, s1 ; encoding: [0x05,0x00,0x92,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0x92,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte1_e64 v5, s105 ; encoding: [0x05,0x00,0x92,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0x92,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte1_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x92,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x92,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte1_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x92,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0x92,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte1_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x92,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0x92,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte1_e64 v5, m0 ; encoding: [0x05,0x00,0x92,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0x92,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte1_e64 v5, exec_lo ; encoding: [0x05,0x00,0x92,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x92,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte1_e64 v5, exec_hi ; encoding: [0x05,0x00,0x92,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0x92,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte1_e64 v5, null ; encoding: [0x05,0x00,0x92,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x92,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte1_e64 v5, -1 ; encoding: [0x05,0x00,0x92,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x92,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte1_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x92,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0x92,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_cvt_f32_ubyte1_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x92,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0x92,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_cvt_f32_ubyte1_e64 v255, 0xaf123456 clamp div:2 ; encoding: [0xff,0x80,0x92,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +0xff,0x80,0x92,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_f32_ubyte2_e64 v5, v1 ; encoding: [0x05,0x00,0x93,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x93,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte2_e64 v5, v255 ; encoding: [0x05,0x00,0x93,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0x93,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte2_e64 v5, s1 ; encoding: [0x05,0x00,0x93,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0x93,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte2_e64 v5, s105 ; encoding: [0x05,0x00,0x93,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0x93,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte2_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x93,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x93,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte2_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x93,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0x93,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte2_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x93,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0x93,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte2_e64 v5, m0 ; encoding: [0x05,0x00,0x93,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0x93,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte2_e64 v5, exec_lo ; encoding: [0x05,0x00,0x93,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x93,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte2_e64 v5, exec_hi ; encoding: [0x05,0x00,0x93,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0x93,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte2_e64 v5, null ; encoding: [0x05,0x00,0x93,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x93,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte2_e64 v5, -1 ; encoding: [0x05,0x00,0x93,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x93,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte2_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x93,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0x93,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_cvt_f32_ubyte2_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x93,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0x93,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_cvt_f32_ubyte2_e64 v255, 0xaf123456 clamp div:2 ; encoding: [0xff,0x80,0x93,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +0xff,0x80,0x93,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_f32_ubyte3_e64 v5, v1 ; encoding: [0x05,0x00,0x94,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x94,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte3_e64 v5, v255 ; encoding: [0x05,0x00,0x94,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0x94,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte3_e64 v5, s1 ; encoding: [0x05,0x00,0x94,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0x94,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte3_e64 v5, s105 ; encoding: [0x05,0x00,0x94,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0x94,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte3_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x94,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x94,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte3_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x94,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0x94,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte3_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x94,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0x94,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte3_e64 v5, m0 ; encoding: [0x05,0x00,0x94,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0x94,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte3_e64 v5, exec_lo ; encoding: [0x05,0x00,0x94,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x94,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte3_e64 v5, exec_hi ; encoding: [0x05,0x00,0x94,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0x94,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte3_e64 v5, null ; encoding: [0x05,0x00,0x94,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x94,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte3_e64 v5, -1 ; encoding: [0x05,0x00,0x94,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x94,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_f32_ubyte3_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x94,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0x94,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_cvt_f32_ubyte3_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x94,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0x94,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_cvt_f32_ubyte3_e64 v255, 0xaf123456 clamp div:2 ; encoding: [0xff,0x80,0x94,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +0xff,0x80,0x94,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_f64_f32_e64 v[5:6], v1 ; encoding: [0x05,0x00,0x90,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x90,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_f64_f32_e64 v[5:6], v255 ; encoding: [0x05,0x00,0x90,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0x90,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_f64_f32_e64 v[5:6], s1 ; encoding: [0x05,0x00,0x90,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0x90,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_f32_e64 v[5:6], s105 ; encoding: [0x05,0x00,0x90,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0x90,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_f32_e64 v[5:6], vcc_lo ; encoding: [0x05,0x00,0x90,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x90,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_f32_e64 v[5:6], vcc_hi ; encoding: [0x05,0x00,0x90,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0x90,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_f32_e64 v[5:6], ttmp15 ; encoding: [0x05,0x00,0x90,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0x90,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_f32_e64 v[5:6], m0 ; encoding: [0x05,0x00,0x90,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0x90,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_f32_e64 v[5:6], exec_lo ; encoding: [0x05,0x00,0x90,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x90,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_f32_e64 v[5:6], exec_hi ; encoding: [0x05,0x00,0x90,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0x90,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_f32_e64 v[5:6], null ; encoding: [0x05,0x00,0x90,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x90,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_f32_e64 v[5:6], -1 ; encoding: [0x05,0x00,0x90,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x90,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_f32_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0x90,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0x90,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_cvt_f64_f32_e64 v[5:6], src_scc mul:4 ; encoding: [0x05,0x00,0x90,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0x90,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_cvt_f64_f32_e64 v[254:255], -|0xaf123456| clamp div:2 ; encoding: [0xfe,0x81,0x90,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +0xfe,0x81,0x90,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_f64_i32_e64 v[5:6], v1 ; encoding: [0x05,0x00,0x84,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x84,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_f64_i32_e64 v[5:6], v255 ; encoding: [0x05,0x00,0x84,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0x84,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_f64_i32_e64 v[5:6], s1 ; encoding: [0x05,0x00,0x84,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0x84,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_i32_e64 v[5:6], s105 ; encoding: [0x05,0x00,0x84,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0x84,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_i32_e64 v[5:6], vcc_lo ; encoding: [0x05,0x00,0x84,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x84,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_i32_e64 v[5:6], vcc_hi ; encoding: [0x05,0x00,0x84,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0x84,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_i32_e64 v[5:6], ttmp15 ; encoding: [0x05,0x00,0x84,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0x84,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_i32_e64 v[5:6], m0 ; encoding: [0x05,0x00,0x84,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0x84,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_i32_e64 v[5:6], exec_lo ; encoding: [0x05,0x00,0x84,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x84,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_i32_e64 v[5:6], exec_hi ; encoding: [0x05,0x00,0x84,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0x84,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_i32_e64 v[5:6], null ; encoding: [0x05,0x00,0x84,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x84,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_i32_e64 v[5:6], -1 ; encoding: [0x05,0x00,0x84,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x84,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_i32_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0x84,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0x84,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_cvt_f64_i32_e64 v[5:6], src_scc mul:4 ; encoding: [0x05,0x00,0x84,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0x84,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_cvt_f64_i32_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0x84,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +0xfe,0x80,0x84,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_f64_u32_e64 v[5:6], v1 ; encoding: [0x05,0x00,0x96,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x96,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_f64_u32_e64 v[5:6], v255 ; encoding: [0x05,0x00,0x96,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0x96,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_f64_u32_e64 v[5:6], s1 ; encoding: [0x05,0x00,0x96,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0x96,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_u32_e64 v[5:6], s105 ; encoding: [0x05,0x00,0x96,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0x96,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_u32_e64 v[5:6], vcc_lo ; encoding: [0x05,0x00,0x96,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x96,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_u32_e64 v[5:6], vcc_hi ; encoding: [0x05,0x00,0x96,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0x96,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_u32_e64 v[5:6], ttmp15 ; encoding: [0x05,0x00,0x96,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0x96,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_u32_e64 v[5:6], m0 ; encoding: [0x05,0x00,0x96,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0x96,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_u32_e64 v[5:6], exec_lo ; encoding: [0x05,0x00,0x96,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x96,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_u32_e64 v[5:6], exec_hi ; encoding: [0x05,0x00,0x96,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0x96,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_u32_e64 v[5:6], null ; encoding: [0x05,0x00,0x96,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x96,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_u32_e64 v[5:6], -1 ; encoding: [0x05,0x00,0x96,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x96,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_f64_u32_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0x96,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0x96,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_cvt_f64_u32_e64 v[5:6], src_scc mul:4 ; encoding: [0x05,0x00,0x96,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0x96,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_cvt_f64_u32_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0x96,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +0xfe,0x80,0x96,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_floor_i32_f32_e64 v5, v1 ; encoding: [0x05,0x00,0x8d,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x8d,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_floor_i32_f32_e64 v5, v255 ; encoding: [0x05,0x00,0x8d,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0x8d,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_floor_i32_f32_e64 v5, s1 ; encoding: [0x05,0x00,0x8d,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0x8d,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_floor_i32_f32_e64 v5, s105 ; encoding: [0x05,0x00,0x8d,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0x8d,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_floor_i32_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x8d,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x8d,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_floor_i32_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x8d,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0x8d,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_floor_i32_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x8d,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0x8d,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_floor_i32_f32_e64 v5, m0 ; encoding: [0x05,0x00,0x8d,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0x8d,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_floor_i32_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x8d,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x8d,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_floor_i32_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x8d,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0x8d,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_floor_i32_f32_e64 v5, null ; encoding: [0x05,0x00,0x8d,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x8d,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_floor_i32_f32_e64 v5, -1 ; encoding: [0x05,0x00,0x8d,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x8d,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_floor_i32_f32_e64 v5, 0.5 ; encoding: [0x05,0x00,0x8d,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0x8d,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_cvt_floor_i32_f32_e64 v5, src_scc ; encoding: [0x05,0x00,0x8d,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0x8d,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_cvt_floor_i32_f32_e64 v255, -|0xaf123456| ; encoding: [0xff,0x01,0x8d,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] +0xff,0x01,0x8d,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_i16_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xd3,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xd3,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_i16_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xd3,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xd3,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_i16_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xd3,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xd3,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_i16_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xd3,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xd3,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_i16_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd3,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xd3,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_i16_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd3,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xd3,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_i16_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd3,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xd3,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_i16_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xd3,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xd3,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_i16_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd3,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xd3,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_i16_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd3,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xd3,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_i16_f16_e64 v5, null ; encoding: [0x05,0x00,0xd3,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xd3,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_i16_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xd3,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xd3,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_i16_f16_e64 v5, 0.5 ; encoding: [0x05,0x00,0xd3,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0xd3,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_cvt_i16_f16_e64 v5, src_scc ; encoding: [0x05,0x00,0xd3,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0xd3,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_cvt_i16_f16_e64 v255, -|0xfe0b| clamp ; encoding: [0xff,0x81,0xd3,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] +0xff,0x81,0xd3,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cvt_i32_f32_e64 v5, v1 ; encoding: [0x05,0x00,0x88,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x88,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_i32_f32_e64 v5, v255 ; encoding: [0x05,0x00,0x88,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0x88,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_i32_f32_e64 v5, s1 ; encoding: [0x05,0x00,0x88,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0x88,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f32_e64 v5, s105 ; encoding: [0x05,0x00,0x88,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0x88,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x88,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x88,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x88,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0x88,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x88,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0x88,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f32_e64 v5, m0 ; encoding: [0x05,0x00,0x88,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0x88,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x88,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x88,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x88,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0x88,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f32_e64 v5, null ; encoding: [0x05,0x00,0x88,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x88,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f32_e64 v5, -1 ; encoding: [0x05,0x00,0x88,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x88,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f32_e64 v5, 0.5 ; encoding: [0x05,0x00,0x88,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0x88,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f32_e64 v5, src_scc ; encoding: [0x05,0x00,0x88,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0x88,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f32_e64 v255, -|0xaf123456| clamp ; encoding: [0xff,0x81,0x88,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] +0xff,0x81,0x88,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_i32_f64_e64 v5, v[1:2] ; encoding: [0x05,0x00,0x83,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x83,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_i32_f64_e64 v5, v[254:255] ; encoding: [0x05,0x00,0x83,0xd5,0xfe,0x01,0x00,0x00] +0x05,0x00,0x83,0xd5,0xfe,0x01,0x00,0x00 + +# GFX11: v_cvt_i32_f64_e64 v5, s[2:3] ; encoding: [0x05,0x00,0x83,0xd5,0x02,0x00,0x00,0x00] +0x05,0x00,0x83,0xd5,0x02,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f64_e64 v5, s[104:105] ; encoding: [0x05,0x00,0x83,0xd5,0x68,0x00,0x00,0x00] +0x05,0x00,0x83,0xd5,0x68,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f64_e64 v5, vcc ; encoding: [0x05,0x00,0x83,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x83,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f64_e64 v5, ttmp[14:15] ; encoding: [0x05,0x00,0x83,0xd5,0x7a,0x00,0x00,0x00] +0x05,0x00,0x83,0xd5,0x7a,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f64_e64 v5, exec ; encoding: [0x05,0x00,0x83,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x83,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f64_e64 v5, null ; encoding: [0x05,0x00,0x83,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x83,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f64_e64 v5, -1 ; encoding: [0x05,0x00,0x83,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x83,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f64_e64 v5, 0.5 ; encoding: [0x05,0x00,0x83,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0x83,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_f64_e64 v5, -|src_scc| ; encoding: [0x05,0x01,0x83,0xd5,0xfd,0x00,0x00,0x20] +0x05,0x01,0x83,0xd5,0xfd,0x00,0x00,0x20 + +# GFX11: v_cvt_i32_f64_e64 v255, 0xaf123456 clamp ; encoding: [0xff,0x80,0x83,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x80,0x83,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_i32_i16_e64 v5, v1 ; encoding: [0x05,0x00,0xea,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xea,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_i32_i16_e64 v5, v255 ; encoding: [0x05,0x00,0xea,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xea,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_i32_i16_e64 v5, s1 ; encoding: [0x05,0x00,0xea,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xea,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_i16_e64 v5, s105 ; encoding: [0x05,0x00,0xea,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xea,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_i16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xea,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xea,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_i16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xea,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xea,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_i16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xea,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xea,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_i16_e64 v5, m0 ; encoding: [0x05,0x00,0xea,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xea,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_i16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xea,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xea,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_i16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xea,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xea,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_i16_e64 v5, null ; encoding: [0x05,0x00,0xea,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xea,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_i16_e64 v5, -1 ; encoding: [0x05,0x00,0xea,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xea,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_i16_e64 v5, 0x3800 +0x05,0x00,0xea,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_i16_e64 v5, src_scc ; encoding: [0x05,0x00,0xea,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0xea,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_cvt_i32_i16_e64 v255, 0xfe0b ; encoding: [0xff,0x00,0xea,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] +0xff,0x00,0xea,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cvt_nearest_i32_f32_e64 v5, v1 ; encoding: [0x05,0x00,0x8c,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x8c,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_nearest_i32_f32_e64 v5, v255 ; encoding: [0x05,0x00,0x8c,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0x8c,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_nearest_i32_f32_e64 v5, s1 ; encoding: [0x05,0x00,0x8c,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0x8c,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_nearest_i32_f32_e64 v5, s105 ; encoding: [0x05,0x00,0x8c,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0x8c,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_nearest_i32_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x8c,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x8c,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_nearest_i32_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x8c,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0x8c,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_nearest_i32_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x8c,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0x8c,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_nearest_i32_f32_e64 v5, m0 ; encoding: [0x05,0x00,0x8c,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0x8c,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_nearest_i32_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x8c,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x8c,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_nearest_i32_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x8c,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0x8c,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_nearest_i32_f32_e64 v5, null ; encoding: [0x05,0x00,0x8c,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x8c,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_nearest_i32_f32_e64 v5, -1 ; encoding: [0x05,0x00,0x8c,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x8c,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_nearest_i32_f32_e64 v5, 0.5 ; encoding: [0x05,0x00,0x8c,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0x8c,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_cvt_nearest_i32_f32_e64 v5, src_scc ; encoding: [0x05,0x00,0x8c,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0x8c,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_cvt_nearest_i32_f32_e64 v255, -|0xaf123456| ; encoding: [0xff,0x01,0x8c,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] +0xff,0x01,0x8c,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_norm_i16_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xe3,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xe3,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_norm_i16_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xe3,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xe3,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_norm_i16_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xe3,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xe3,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_i16_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xe3,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xe3,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_i16_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xe3,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xe3,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_i16_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xe3,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xe3,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_i16_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xe3,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xe3,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_i16_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xe3,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xe3,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_i16_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xe3,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xe3,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_i16_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xe3,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xe3,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_i16_f16_e64 v5, null ; encoding: [0x05,0x00,0xe3,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xe3,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_i16_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xe3,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xe3,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_i16_f16_e64 v5, 0.5 ; encoding: [0x05,0x00,0xe3,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0xe3,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_i16_f16_e64 v5, src_scc ; encoding: [0x05,0x00,0xe3,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0xe3,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_i16_f16_e64 v255, -|0xfe0b| ; encoding: [0xff,0x01,0xe3,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] +0xff,0x01,0xe3,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cvt_norm_u16_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xe4,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xe4,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_norm_u16_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xe4,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xe4,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_norm_u16_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xe4,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xe4,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_u16_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xe4,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xe4,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_u16_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xe4,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xe4,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_u16_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xe4,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xe4,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_u16_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xe4,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xe4,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_u16_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xe4,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xe4,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_u16_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xe4,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xe4,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_u16_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xe4,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xe4,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_u16_f16_e64 v5, null ; encoding: [0x05,0x00,0xe4,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xe4,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_u16_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xe4,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xe4,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_u16_f16_e64 v5, 0.5 ; encoding: [0x05,0x00,0xe4,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0xe4,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_u16_f16_e64 v5, src_scc ; encoding: [0x05,0x00,0xe4,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0xe4,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_cvt_norm_u16_f16_e64 v255, -|0xfe0b| ; encoding: [0xff,0x01,0xe4,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] +0xff,0x01,0xe4,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cvt_off_f32_i4_e64 v5, v1 ; encoding: [0x05,0x00,0x8e,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x8e,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_off_f32_i4_e64 v5, v255 ; encoding: [0x05,0x00,0x8e,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0x8e,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_off_f32_i4_e64 v5, s1 ; encoding: [0x05,0x00,0x8e,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0x8e,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_off_f32_i4_e64 v5, s105 ; encoding: [0x05,0x00,0x8e,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0x8e,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_off_f32_i4_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x8e,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x8e,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_off_f32_i4_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x8e,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0x8e,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_off_f32_i4_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x8e,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0x8e,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_off_f32_i4_e64 v5, m0 ; encoding: [0x05,0x00,0x8e,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0x8e,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_off_f32_i4_e64 v5, exec_lo ; encoding: [0x05,0x00,0x8e,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x8e,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_off_f32_i4_e64 v5, exec_hi ; encoding: [0x05,0x00,0x8e,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0x8e,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_off_f32_i4_e64 v5, null ; encoding: [0x05,0x00,0x8e,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x8e,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_off_f32_i4_e64 v5, -1 ; encoding: [0x05,0x00,0x8e,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x8e,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_off_f32_i4_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0x8e,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0x8e,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_cvt_off_f32_i4_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0x8e,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0x8e,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_cvt_off_f32_i4_e64 v255, 0x4f clamp div:2 ; encoding: [0xff,0x80,0x8e,0xd5,0xff,0x00,0x00,0x18,0x4f,0x00,0x00,0x00] +0xff,0x80,0x8e,0xd5,0xff,0x00,0x00,0x18,0x4f,0x00,0x00,0x00 + +# GFX11: v_cvt_u16_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xd2,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xd2,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_u16_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xd2,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xd2,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_u16_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xd2,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xd2,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_u16_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xd2,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xd2,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_u16_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd2,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xd2,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_u16_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd2,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xd2,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_u16_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd2,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xd2,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_u16_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xd2,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xd2,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_u16_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd2,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xd2,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_u16_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd2,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xd2,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_u16_f16_e64 v5, null ; encoding: [0x05,0x00,0xd2,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xd2,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_u16_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xd2,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xd2,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_u16_f16_e64 v5, 0.5 ; encoding: [0x05,0x00,0xd2,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0xd2,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_cvt_u16_f16_e64 v5, src_scc ; encoding: [0x05,0x00,0xd2,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0xd2,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_cvt_u16_f16_e64 v255, -|0xfe0b| clamp ; encoding: [0xff,0x81,0xd2,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] +0xff,0x81,0xd2,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00 + +# GFX11: v_cvt_u32_f32_e64 v5, v1 ; encoding: [0x05,0x00,0x87,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x87,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_u32_f32_e64 v5, v255 ; encoding: [0x05,0x00,0x87,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0x87,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_u32_f32_e64 v5, s1 ; encoding: [0x05,0x00,0x87,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0x87,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f32_e64 v5, s105 ; encoding: [0x05,0x00,0x87,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0x87,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x87,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x87,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x87,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0x87,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x87,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0x87,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f32_e64 v5, m0 ; encoding: [0x05,0x00,0x87,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0x87,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x87,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x87,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x87,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0x87,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f32_e64 v5, null ; encoding: [0x05,0x00,0x87,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x87,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f32_e64 v5, -1 ; encoding: [0x05,0x00,0x87,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x87,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f32_e64 v5, 0.5 ; encoding: [0x05,0x00,0x87,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0x87,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f32_e64 v5, src_scc ; encoding: [0x05,0x00,0x87,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0x87,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f32_e64 v255, -|0xaf123456| clamp ; encoding: [0xff,0x81,0x87,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] +0xff,0x81,0x87,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_u32_f64_e64 v5, v[1:2] ; encoding: [0x05,0x00,0x95,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x95,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_u32_f64_e64 v5, v[254:255] ; encoding: [0x05,0x00,0x95,0xd5,0xfe,0x01,0x00,0x00] +0x05,0x00,0x95,0xd5,0xfe,0x01,0x00,0x00 + +# GFX11: v_cvt_u32_f64_e64 v5, s[2:3] ; encoding: [0x05,0x00,0x95,0xd5,0x02,0x00,0x00,0x00] +0x05,0x00,0x95,0xd5,0x02,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f64_e64 v5, s[104:105] ; encoding: [0x05,0x00,0x95,0xd5,0x68,0x00,0x00,0x00] +0x05,0x00,0x95,0xd5,0x68,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f64_e64 v5, vcc ; encoding: [0x05,0x00,0x95,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x95,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f64_e64 v5, ttmp[14:15] ; encoding: [0x05,0x00,0x95,0xd5,0x7a,0x00,0x00,0x00] +0x05,0x00,0x95,0xd5,0x7a,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f64_e64 v5, exec ; encoding: [0x05,0x00,0x95,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x95,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f64_e64 v5, null ; encoding: [0x05,0x00,0x95,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x95,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f64_e64 v5, -1 ; encoding: [0x05,0x00,0x95,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x95,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f64_e64 v5, 0.5 ; encoding: [0x05,0x00,0x95,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0x95,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_f64_e64 v5, -|src_scc| ; encoding: [0x05,0x01,0x95,0xd5,0xfd,0x00,0x00,0x20] +0x05,0x01,0x95,0xd5,0xfd,0x00,0x00,0x20 + +# GFX11: v_cvt_u32_f64_e64 v255, 0xaf123456 clamp ; encoding: [0xff,0x80,0x95,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x80,0x95,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_u32_u16_e64 v5, v1 ; encoding: [0x05,0x00,0xeb,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xeb,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_cvt_u32_u16_e64 v5, v255 ; encoding: [0x05,0x00,0xeb,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xeb,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_cvt_u32_u16_e64 v5, s1 ; encoding: [0x05,0x00,0xeb,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xeb,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_u16_e64 v5, s105 ; encoding: [0x05,0x00,0xeb,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xeb,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_u16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xeb,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xeb,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_u16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xeb,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xeb,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_u16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xeb,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xeb,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_u16_e64 v5, m0 ; encoding: [0x05,0x00,0xeb,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xeb,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_u16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xeb,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xeb,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_u16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xeb,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xeb,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_u16_e64 v5, null ; encoding: [0x05,0x00,0xeb,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xeb,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_u16_e64 v5, -1 ; encoding: [0x05,0x00,0xeb,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xeb,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_u16_e64 v5, 0x3800 +0x05,0x00,0xeb,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_u16_e64 v5, src_scc ; encoding: [0x05,0x00,0xeb,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0xeb,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_cvt_u32_u16_e64 v255, 0xfe0b ; encoding: [0xff,0x00,0xeb,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] +0xff,0x00,0xeb,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00 + +# GFX11: v_exp_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xd8,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xd8,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_exp_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xd8,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xd8,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_exp_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xd8,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xd8,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_exp_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xd8,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xd8,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_exp_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd8,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xd8,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_exp_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd8,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xd8,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_exp_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd8,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xd8,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_exp_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xd8,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xd8,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_exp_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd8,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xd8,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_exp_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd8,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xd8,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_exp_f16_e64 v5, null ; encoding: [0x05,0x00,0xd8,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xd8,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_exp_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xd8,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xd8,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_exp_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xd8,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xd8,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_exp_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xd8,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xd8,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_exp_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xd8,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] +0xff,0x81,0xd8,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 + +# GFX11: v_exp_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xa5,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xa5,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_exp_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xa5,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xa5,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_exp_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xa5,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xa5,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_exp_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xa5,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xa5,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_exp_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xa5,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xa5,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_exp_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xa5,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xa5,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_exp_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xa5,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xa5,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_exp_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xa5,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xa5,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_exp_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xa5,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xa5,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_exp_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xa5,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xa5,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_exp_f32_e64 v5, null ; encoding: [0x05,0x00,0xa5,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xa5,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_exp_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xa5,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xa5,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_exp_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xa5,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xa5,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_exp_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xa5,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xa5,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_exp_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xa5,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +0xff,0x81,0xa5,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf + +# GFX11: v_floor_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xdb,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xdb,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_floor_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xdb,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xdb,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_floor_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xdb,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xdb,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_floor_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xdb,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xdb,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_floor_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xdb,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xdb,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_floor_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xdb,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xdb,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_floor_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xdb,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xdb,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_floor_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xdb,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xdb,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_floor_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xdb,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xdb,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_floor_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xdb,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xdb,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_floor_f16_e64 v5, null ; encoding: [0x05,0x00,0xdb,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xdb,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_floor_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xdb,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xdb,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_floor_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xdb,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xdb,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_floor_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xdb,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xdb,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_floor_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xdb,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] +0xff,0x81,0xdb,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 + +# GFX11: v_floor_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xa4,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xa4,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_floor_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xa4,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xa4,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_floor_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xa4,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xa4,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_floor_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xa4,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xa4,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_floor_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xa4,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xa4,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_floor_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xa4,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xa4,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_floor_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xa4,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xa4,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_floor_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xa4,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xa4,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_floor_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xa4,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xa4,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_floor_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xa4,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xa4,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_floor_f32_e64 v5, null ; encoding: [0x05,0x00,0xa4,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xa4,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_floor_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xa4,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xa4,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_floor_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xa4,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xa4,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_floor_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xa4,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xa4,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_floor_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xa4,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +0xff,0x81,0xa4,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf + +# GFX11: v_floor_f64_e64 v[5:6], v[1:2] ; encoding: [0x05,0x00,0x9a,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x9a,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_floor_f64_e64 v[5:6], v[254:255] ; encoding: [0x05,0x00,0x9a,0xd5,0xfe,0x01,0x00,0x00] +0x05,0x00,0x9a,0xd5,0xfe,0x01,0x00,0x00 + +# GFX11: v_floor_f64_e64 v[5:6], s[2:3] ; encoding: [0x05,0x00,0x9a,0xd5,0x02,0x00,0x00,0x00] +0x05,0x00,0x9a,0xd5,0x02,0x00,0x00,0x00 + +# GFX11: v_floor_f64_e64 v[5:6], s[104:105] ; encoding: [0x05,0x00,0x9a,0xd5,0x68,0x00,0x00,0x00] +0x05,0x00,0x9a,0xd5,0x68,0x00,0x00,0x00 + +# GFX11: v_floor_f64_e64 v[5:6], vcc ; encoding: [0x05,0x00,0x9a,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x9a,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_floor_f64_e64 v[5:6], ttmp[14:15] ; encoding: [0x05,0x00,0x9a,0xd5,0x7a,0x00,0x00,0x00] +0x05,0x00,0x9a,0xd5,0x7a,0x00,0x00,0x00 + +# GFX11: v_floor_f64_e64 v[5:6], exec ; encoding: [0x05,0x00,0x9a,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x9a,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_floor_f64_e64 v[5:6], null ; encoding: [0x05,0x00,0x9a,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x9a,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_floor_f64_e64 v[5:6], -1 ; encoding: [0x05,0x00,0x9a,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x9a,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_floor_f64_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0x9a,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0x9a,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_floor_f64_e64 v[5:6], -|src_scc| mul:4 ; encoding: [0x05,0x01,0x9a,0xd5,0xfd,0x00,0x00,0x30] +0x05,0x01,0x9a,0xd5,0xfd,0x00,0x00,0x30 + +# GFX11: v_floor_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0x9a,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +0xfe,0x80,0x9a,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf + +# GFX11: v_fract_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xdf,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xdf,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_fract_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xdf,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xdf,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_fract_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xdf,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xdf,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_fract_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xdf,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xdf,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_fract_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xdf,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xdf,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_fract_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xdf,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xdf,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_fract_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xdf,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xdf,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_fract_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xdf,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xdf,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_fract_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xdf,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xdf,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_fract_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xdf,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xdf,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_fract_f16_e64 v5, null ; encoding: [0x05,0x00,0xdf,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xdf,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_fract_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xdf,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xdf,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_fract_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xdf,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xdf,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_fract_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xdf,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xdf,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_fract_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xdf,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] +0xff,0x81,0xdf,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 + +# GFX11: v_fract_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xa0,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xa0,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_fract_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xa0,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xa0,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_fract_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xa0,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xa0,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_fract_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xa0,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xa0,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_fract_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xa0,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xa0,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_fract_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xa0,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xa0,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_fract_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xa0,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xa0,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_fract_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xa0,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xa0,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_fract_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xa0,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xa0,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_fract_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xa0,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xa0,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_fract_f32_e64 v5, null ; encoding: [0x05,0x00,0xa0,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xa0,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_fract_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xa0,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xa0,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_fract_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xa0,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xa0,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_fract_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xa0,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xa0,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_fract_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xa0,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +0xff,0x81,0xa0,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf + +# GFX11: v_fract_f64_e64 v[5:6], v[1:2] ; encoding: [0x05,0x00,0xbe,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xbe,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_fract_f64_e64 v[5:6], v[254:255] ; encoding: [0x05,0x00,0xbe,0xd5,0xfe,0x01,0x00,0x00] +0x05,0x00,0xbe,0xd5,0xfe,0x01,0x00,0x00 + +# GFX11: v_fract_f64_e64 v[5:6], s[2:3] ; encoding: [0x05,0x00,0xbe,0xd5,0x02,0x00,0x00,0x00] +0x05,0x00,0xbe,0xd5,0x02,0x00,0x00,0x00 + +# GFX11: v_fract_f64_e64 v[5:6], s[104:105] ; encoding: [0x05,0x00,0xbe,0xd5,0x68,0x00,0x00,0x00] +0x05,0x00,0xbe,0xd5,0x68,0x00,0x00,0x00 + +# GFX11: v_fract_f64_e64 v[5:6], vcc ; encoding: [0x05,0x00,0xbe,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xbe,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_fract_f64_e64 v[5:6], ttmp[14:15] ; encoding: [0x05,0x00,0xbe,0xd5,0x7a,0x00,0x00,0x00] +0x05,0x00,0xbe,0xd5,0x7a,0x00,0x00,0x00 + +# GFX11: v_fract_f64_e64 v[5:6], exec ; encoding: [0x05,0x00,0xbe,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xbe,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_fract_f64_e64 v[5:6], null ; encoding: [0x05,0x00,0xbe,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xbe,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_fract_f64_e64 v[5:6], -1 ; encoding: [0x05,0x00,0xbe,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xbe,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_fract_f64_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0xbe,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xbe,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_fract_f64_e64 v[5:6], -|src_scc| mul:4 ; encoding: [0x05,0x01,0xbe,0xd5,0xfd,0x00,0x00,0x30] +0x05,0x01,0xbe,0xd5,0xfd,0x00,0x00,0x30 + +# GFX11: v_fract_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0xbe,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +0xfe,0x80,0xbe,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf + +# GFX11: v_frexp_exp_i16_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xda,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xda,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_frexp_exp_i16_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xda,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xda,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_frexp_exp_i16_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xda,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xda,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i16_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xda,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xda,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i16_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xda,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xda,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i16_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xda,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xda,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i16_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xda,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xda,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i16_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xda,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xda,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i16_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xda,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xda,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i16_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xda,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xda,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i16_f16_e64 v5, null ; encoding: [0x05,0x00,0xda,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xda,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i16_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xda,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xda,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i16_f16_e64 v5, 0.5 ; encoding: [0x05,0x00,0xda,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0xda,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i16_f16_e64 v5, src_scc ; encoding: [0x05,0x00,0xda,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0xda,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i16_f16_e64 v255, -|0xfe0b| ; encoding: [0xff,0x01,0xda,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00] +0xff,0x01,0xda,0xd5,0xff,0x00,0x00,0x20,0x0b,0xfe,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xbf,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xbf,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xbf,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xbf,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xbf,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xbf,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xbf,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xbf,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xbf,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xbf,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xbf,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xbf,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xbf,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xbf,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xbf,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xbf,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xbf,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xbf,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xbf,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xbf,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f32_e64 v5, null ; encoding: [0x05,0x00,0xbf,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xbf,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xbf,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xbf,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f32_e64 v5, 0.5 ; encoding: [0x05,0x00,0xbf,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0xbf,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f32_e64 v5, src_scc ; encoding: [0x05,0x00,0xbf,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0xbf,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f32_e64 v255, -|0xaf123456| ; encoding: [0xff,0x01,0xbf,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf] +0xff,0x01,0xbf,0xd5,0xff,0x00,0x00,0x20,0x56,0x34,0x12,0xaf + +# GFX11: v_frexp_exp_i32_f64_e64 v5, v[1:2] ; encoding: [0x05,0x00,0xbc,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xbc,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f64_e64 v5, v[254:255] ; encoding: [0x05,0x00,0xbc,0xd5,0xfe,0x01,0x00,0x00] +0x05,0x00,0xbc,0xd5,0xfe,0x01,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f64_e64 v5, s[2:3] ; encoding: [0x05,0x00,0xbc,0xd5,0x02,0x00,0x00,0x00] +0x05,0x00,0xbc,0xd5,0x02,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f64_e64 v5, s[104:105] ; encoding: [0x05,0x00,0xbc,0xd5,0x68,0x00,0x00,0x00] +0x05,0x00,0xbc,0xd5,0x68,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f64_e64 v5, vcc ; encoding: [0x05,0x00,0xbc,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xbc,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f64_e64 v5, ttmp[14:15] ; encoding: [0x05,0x00,0xbc,0xd5,0x7a,0x00,0x00,0x00] +0x05,0x00,0xbc,0xd5,0x7a,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f64_e64 v5, exec ; encoding: [0x05,0x00,0xbc,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xbc,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f64_e64 v5, null ; encoding: [0x05,0x00,0xbc,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xbc,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f64_e64 v5, -1 ; encoding: [0x05,0x00,0xbc,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xbc,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f64_e64 v5, 0.5 ; encoding: [0x05,0x00,0xbc,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0xbc,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_frexp_exp_i32_f64_e64 v5, -|src_scc| ; encoding: [0x05,0x01,0xbc,0xd5,0xfd,0x00,0x00,0x20] +0x05,0x01,0xbc,0xd5,0xfd,0x00,0x00,0x20 + +# GFX11: v_frexp_exp_i32_f64_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0xbc,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0xbc,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_frexp_mant_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xd9,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xd9,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_frexp_mant_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xd9,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xd9,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_frexp_mant_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xd9,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xd9,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xd9,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xd9,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd9,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xd9,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd9,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xd9,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd9,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xd9,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xd9,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xd9,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd9,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xd9,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd9,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xd9,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f16_e64 v5, null ; encoding: [0x05,0x00,0xd9,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xd9,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xd9,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xd9,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xd9,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xd9,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_frexp_mant_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xd9,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xd9,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_frexp_mant_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xd9,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] +0xff,0x81,0xd9,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 + +# GFX11: v_frexp_mant_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xc0,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xc0,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_frexp_mant_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xc0,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xc0,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_frexp_mant_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xc0,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xc0,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xc0,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xc0,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xc0,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xc0,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xc0,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xc0,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xc0,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xc0,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xc0,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xc0,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xc0,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xc0,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xc0,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xc0,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f32_e64 v5, null ; encoding: [0x05,0x00,0xc0,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xc0,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xc0,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xc0,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xc0,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xc0,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_frexp_mant_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xc0,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xc0,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_frexp_mant_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xc0,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +0xff,0x81,0xc0,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf + +# GFX11: v_frexp_mant_f64_e64 v[5:6], v[1:2] ; encoding: [0x05,0x00,0xbd,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xbd,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_frexp_mant_f64_e64 v[5:6], v[254:255] ; encoding: [0x05,0x00,0xbd,0xd5,0xfe,0x01,0x00,0x00] +0x05,0x00,0xbd,0xd5,0xfe,0x01,0x00,0x00 + +# GFX11: v_frexp_mant_f64_e64 v[5:6], s[2:3] ; encoding: [0x05,0x00,0xbd,0xd5,0x02,0x00,0x00,0x00] +0x05,0x00,0xbd,0xd5,0x02,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f64_e64 v[5:6], s[104:105] ; encoding: [0x05,0x00,0xbd,0xd5,0x68,0x00,0x00,0x00] +0x05,0x00,0xbd,0xd5,0x68,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f64_e64 v[5:6], vcc ; encoding: [0x05,0x00,0xbd,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xbd,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f64_e64 v[5:6], ttmp[14:15] ; encoding: [0x05,0x00,0xbd,0xd5,0x7a,0x00,0x00,0x00] +0x05,0x00,0xbd,0xd5,0x7a,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f64_e64 v[5:6], exec ; encoding: [0x05,0x00,0xbd,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xbd,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f64_e64 v[5:6], null ; encoding: [0x05,0x00,0xbd,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xbd,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f64_e64 v[5:6], -1 ; encoding: [0x05,0x00,0xbd,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xbd,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_frexp_mant_f64_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0xbd,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xbd,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_frexp_mant_f64_e64 v[5:6], -|src_scc| mul:4 ; encoding: [0x05,0x01,0xbd,0xd5,0xfd,0x00,0x00,0x30] +0x05,0x01,0xbd,0xd5,0xfd,0x00,0x00,0x30 + +# GFX11: v_frexp_mant_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0xbd,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +0xfe,0x80,0xbd,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf + +# GFX11: v_log_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xd7,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xd7,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_log_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xd7,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xd7,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_log_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xd7,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xd7,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_log_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xd7,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xd7,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_log_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd7,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xd7,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_log_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd7,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xd7,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_log_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd7,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xd7,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_log_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xd7,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xd7,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_log_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd7,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xd7,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_log_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd7,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xd7,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_log_f16_e64 v5, null ; encoding: [0x05,0x00,0xd7,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xd7,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_log_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xd7,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xd7,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_log_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xd7,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xd7,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_log_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xd7,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xd7,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_log_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xd7,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] +0xff,0x81,0xd7,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 + +# GFX11: v_log_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xa7,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xa7,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_log_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xa7,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xa7,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_log_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xa7,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xa7,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_log_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xa7,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xa7,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_log_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xa7,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xa7,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_log_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xa7,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xa7,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_log_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xa7,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xa7,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_log_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xa7,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xa7,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_log_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xa7,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xa7,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_log_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xa7,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xa7,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_log_f32_e64 v5, null ; encoding: [0x05,0x00,0xa7,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xa7,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_log_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xa7,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xa7,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_log_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xa7,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xa7,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_log_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xa7,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xa7,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_log_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xa7,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +0xff,0x81,0xa7,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf + +# GFX11: v_mov_b32_e64 v5, v1 ; encoding: [0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x81,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_mov_b32_e64 v5, v255 ; encoding: [0x05,0x00,0x81,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0x81,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_mov_b32_e64 v5, s1 ; encoding: [0x05,0x00,0x81,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0x81,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_mov_b32_e64 v5, s105 ; encoding: [0x05,0x00,0x81,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0x81,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_mov_b32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0x81,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x81,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_mov_b32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0x81,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0x81,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_mov_b32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0x81,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0x81,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_mov_b32_e64 v5, m0 ; encoding: [0x05,0x00,0x81,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0x81,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_mov_b32_e64 v5, exec_lo ; encoding: [0x05,0x00,0x81,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x81,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_mov_b32_e64 v5, exec_hi ; encoding: [0x05,0x00,0x81,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0x81,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_mov_b32_e64 v5, null ; encoding: [0x05,0x00,0x81,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x81,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_mov_b32_e64 v5, -1 ; encoding: [0x05,0x00,0x81,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x81,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_mov_b32_e64 v5, 0.5 ; encoding: [0x05,0x00,0x81,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0x81,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_mov_b32_e64 v5, src_scc ; encoding: [0x05,0x00,0x81,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0x81,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_mov_b32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0x81,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0x81,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_movreld_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xc2,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_movreld_b32_e64 v5, v255 ; encoding: [0x05,0x00,0xc2,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xc2,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_movreld_b32_e64 v5, s1 ; encoding: [0x05,0x00,0xc2,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xc2,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_movreld_b32_e64 v5, s105 ; encoding: [0x05,0x00,0xc2,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xc2,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_movreld_b32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xc2,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xc2,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_movreld_b32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xc2,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xc2,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_movreld_b32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xc2,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xc2,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_movreld_b32_e64 v5, m0 ; encoding: [0x05,0x00,0xc2,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xc2,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_movreld_b32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xc2,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xc2,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_movreld_b32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xc2,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xc2,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_movreld_b32_e64 v5, null ; encoding: [0x05,0x00,0xc2,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xc2,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_movreld_b32_e64 v5, -1 ; encoding: [0x05,0x00,0xc2,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xc2,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_movreld_b32_e64 v5, 0.5 ; encoding: [0x05,0x00,0xc2,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0xc2,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_movreld_b32_e64 v5, src_scc ; encoding: [0x05,0x00,0xc2,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0xc2,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_movreld_b32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0xc2,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0xc2,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_movrels_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xc3,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_movrels_b32_e64 v255, v255 ; encoding: [0xff,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00] +0xff,0x00,0xc3,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_movrelsd_2_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xc8,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_movrelsd_2_b32_e64 v255, v255 ; encoding: [0xff,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00] +0xff,0x00,0xc8,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_movrelsd_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xc4,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_movrelsd_b32_e64 v255, v255 ; encoding: [0xff,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00] +0xff,0x00,0xc4,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_nop ; encoding: [0x00,0x00,0x80,0xd5,0x00,0x00,0x00,0x00] +0x00,0x00,0x80,0xd5,0x00,0x00,0x00,0x00 + +# GFX11: v_not_b16_e64 v5, v1 ; encoding: [0x05,0x00,0xe9,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xe9,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_not_b16_e64 v5, v255 ; encoding: [0x05,0x00,0xe9,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xe9,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_not_b16_e64 v5, s1 ; encoding: [0x05,0x00,0xe9,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xe9,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_not_b16_e64 v5, s105 ; encoding: [0x05,0x00,0xe9,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xe9,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_not_b16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xe9,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xe9,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_not_b16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xe9,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xe9,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_not_b16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xe9,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xe9,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_not_b16_e64 v5, m0 ; encoding: [0x05,0x00,0xe9,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xe9,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_not_b16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xe9,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xe9,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_not_b16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xe9,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xe9,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_not_b16_e64 v5, null ; encoding: [0x05,0x00,0xe9,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xe9,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_not_b16_e64 v5, -1 ; encoding: [0x05,0x00,0xe9,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xe9,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_not_b16_e64 v5, 0x3800 +0x05,0x00,0xe9,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_not_b16_e64 v5, src_scc ; encoding: [0x05,0x00,0xe9,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0xe9,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_not_b16_e64 v255, 0xfe0b ; encoding: [0xff,0x00,0xe9,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] +0xff,0x00,0xe9,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00 + +# GFX11: v_not_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xb7,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xb7,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_not_b32_e64 v5, v255 ; encoding: [0x05,0x00,0xb7,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xb7,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_not_b32_e64 v5, s1 ; encoding: [0x05,0x00,0xb7,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xb7,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_not_b32_e64 v5, s105 ; encoding: [0x05,0x00,0xb7,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xb7,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_not_b32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xb7,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xb7,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_not_b32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xb7,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xb7,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_not_b32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xb7,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xb7,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_not_b32_e64 v5, m0 ; encoding: [0x05,0x00,0xb7,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xb7,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_not_b32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xb7,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xb7,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_not_b32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xb7,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xb7,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_not_b32_e64 v5, null ; encoding: [0x05,0x00,0xb7,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xb7,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_not_b32_e64 v5, -1 ; encoding: [0x05,0x00,0xb7,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xb7,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_not_b32_e64 v5, 0.5 ; encoding: [0x05,0x00,0xb7,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0xb7,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_not_b32_e64 v5, src_scc ; encoding: [0x05,0x00,0xb7,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0xb7,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_not_b32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0xb7,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0xb7,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_pipeflush ; encoding: [0x00,0x00,0x9b,0xd5,0x00,0x00,0x00,0x00] +0x00,0x00,0x9b,0xd5,0x00,0x00,0x00,0x00 + +# GFX11: v_rcp_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xd4,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xd4,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_rcp_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xd4,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xd4,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_rcp_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xd4,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xd4,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_rcp_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xd4,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xd4,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_rcp_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd4,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xd4,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_rcp_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd4,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xd4,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_rcp_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd4,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xd4,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_rcp_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xd4,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xd4,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_rcp_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd4,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xd4,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_rcp_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd4,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xd4,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_rcp_f16_e64 v5, null ; encoding: [0x05,0x00,0xd4,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xd4,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_rcp_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xd4,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xd4,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_rcp_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xd4,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xd4,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_rcp_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xd4,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xd4,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_rcp_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xd4,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] +0xff,0x81,0xd4,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 + +# GFX11: v_rcp_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xaa,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xaa,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_rcp_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xaa,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xaa,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_rcp_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xaa,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xaa,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_rcp_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xaa,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xaa,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_rcp_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xaa,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xaa,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_rcp_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xaa,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xaa,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_rcp_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xaa,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xaa,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_rcp_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xaa,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xaa,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_rcp_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xaa,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xaa,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_rcp_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xaa,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xaa,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_rcp_f32_e64 v5, null ; encoding: [0x05,0x00,0xaa,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xaa,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_rcp_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xaa,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xaa,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_rcp_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xaa,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xaa,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_rcp_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xaa,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xaa,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_rcp_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xaa,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +0xff,0x81,0xaa,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf + +# GFX11: v_rcp_f64_e64 v[5:6], v[1:2] ; encoding: [0x05,0x00,0xaf,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xaf,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_rcp_f64_e64 v[5:6], v[254:255] ; encoding: [0x05,0x00,0xaf,0xd5,0xfe,0x01,0x00,0x00] +0x05,0x00,0xaf,0xd5,0xfe,0x01,0x00,0x00 + +# GFX11: v_rcp_f64_e64 v[5:6], s[2:3] ; encoding: [0x05,0x00,0xaf,0xd5,0x02,0x00,0x00,0x00] +0x05,0x00,0xaf,0xd5,0x02,0x00,0x00,0x00 + +# GFX11: v_rcp_f64_e64 v[5:6], s[104:105] ; encoding: [0x05,0x00,0xaf,0xd5,0x68,0x00,0x00,0x00] +0x05,0x00,0xaf,0xd5,0x68,0x00,0x00,0x00 + +# GFX11: v_rcp_f64_e64 v[5:6], vcc ; encoding: [0x05,0x00,0xaf,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xaf,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_rcp_f64_e64 v[5:6], ttmp[14:15] ; encoding: [0x05,0x00,0xaf,0xd5,0x7a,0x00,0x00,0x00] +0x05,0x00,0xaf,0xd5,0x7a,0x00,0x00,0x00 + +# GFX11: v_rcp_f64_e64 v[5:6], exec ; encoding: [0x05,0x00,0xaf,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xaf,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_rcp_f64_e64 v[5:6], null ; encoding: [0x05,0x00,0xaf,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xaf,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_rcp_f64_e64 v[5:6], -1 ; encoding: [0x05,0x00,0xaf,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xaf,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_rcp_f64_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0xaf,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xaf,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_rcp_f64_e64 v[5:6], -|src_scc| mul:4 ; encoding: [0x05,0x01,0xaf,0xd5,0xfd,0x00,0x00,0x30] +0x05,0x01,0xaf,0xd5,0xfd,0x00,0x00,0x30 + +# GFX11: v_rcp_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0xaf,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +0xfe,0x80,0xaf,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf + +# GFX11: v_rcp_iflag_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xab,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xab,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_rcp_iflag_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xab,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xab,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_rcp_iflag_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xab,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xab,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_rcp_iflag_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xab,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xab,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_rcp_iflag_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xab,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xab,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_rcp_iflag_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xab,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xab,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_rcp_iflag_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xab,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xab,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_rcp_iflag_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xab,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xab,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_rcp_iflag_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xab,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xab,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_rcp_iflag_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xab,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xab,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_rcp_iflag_f32_e64 v5, null ; encoding: [0x05,0x00,0xab,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xab,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_rcp_iflag_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xab,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xab,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_rcp_iflag_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xab,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xab,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_rcp_iflag_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xab,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xab,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_rcp_iflag_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xab,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +0xff,0x81,0xab,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf + +# GFX11: v_rndne_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xde,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xde,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_rndne_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xde,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xde,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_rndne_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xde,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xde,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_rndne_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xde,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xde,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_rndne_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xde,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xde,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_rndne_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xde,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xde,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_rndne_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xde,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xde,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_rndne_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xde,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xde,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_rndne_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xde,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xde,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_rndne_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xde,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xde,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_rndne_f16_e64 v5, null ; encoding: [0x05,0x00,0xde,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xde,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_rndne_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xde,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xde,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_rndne_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xde,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xde,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_rndne_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xde,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xde,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_rndne_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xde,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] +0xff,0x81,0xde,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 + +# GFX11: v_rndne_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xa3,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xa3,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_rndne_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xa3,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xa3,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_rndne_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xa3,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xa3,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_rndne_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xa3,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xa3,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_rndne_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xa3,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xa3,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_rndne_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xa3,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xa3,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_rndne_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xa3,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xa3,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_rndne_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xa3,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xa3,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_rndne_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xa3,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xa3,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_rndne_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xa3,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xa3,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_rndne_f32_e64 v5, null ; encoding: [0x05,0x00,0xa3,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xa3,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_rndne_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xa3,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xa3,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_rndne_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xa3,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xa3,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_rndne_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xa3,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xa3,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_rndne_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xa3,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +0xff,0x81,0xa3,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf + +# GFX11: v_rndne_f64_e64 v[5:6], v[1:2] ; encoding: [0x05,0x00,0x99,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x99,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_rndne_f64_e64 v[5:6], v[254:255] ; encoding: [0x05,0x00,0x99,0xd5,0xfe,0x01,0x00,0x00] +0x05,0x00,0x99,0xd5,0xfe,0x01,0x00,0x00 + +# GFX11: v_rndne_f64_e64 v[5:6], s[2:3] ; encoding: [0x05,0x00,0x99,0xd5,0x02,0x00,0x00,0x00] +0x05,0x00,0x99,0xd5,0x02,0x00,0x00,0x00 + +# GFX11: v_rndne_f64_e64 v[5:6], s[104:105] ; encoding: [0x05,0x00,0x99,0xd5,0x68,0x00,0x00,0x00] +0x05,0x00,0x99,0xd5,0x68,0x00,0x00,0x00 + +# GFX11: v_rndne_f64_e64 v[5:6], vcc ; encoding: [0x05,0x00,0x99,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x99,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_rndne_f64_e64 v[5:6], ttmp[14:15] ; encoding: [0x05,0x00,0x99,0xd5,0x7a,0x00,0x00,0x00] +0x05,0x00,0x99,0xd5,0x7a,0x00,0x00,0x00 + +# GFX11: v_rndne_f64_e64 v[5:6], exec ; encoding: [0x05,0x00,0x99,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x99,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_rndne_f64_e64 v[5:6], null ; encoding: [0x05,0x00,0x99,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x99,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_rndne_f64_e64 v[5:6], -1 ; encoding: [0x05,0x00,0x99,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x99,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_rndne_f64_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0x99,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0x99,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_rndne_f64_e64 v[5:6], -|src_scc| mul:4 ; encoding: [0x05,0x01,0x99,0xd5,0xfd,0x00,0x00,0x30] +0x05,0x01,0x99,0xd5,0xfd,0x00,0x00,0x30 + +# GFX11: v_rndne_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0x99,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +0xfe,0x80,0x99,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf + +# GFX11: v_rsq_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xd6,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xd6,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_rsq_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xd6,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xd6,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_rsq_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xd6,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xd6,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_rsq_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xd6,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xd6,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_rsq_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd6,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xd6,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_rsq_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd6,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xd6,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_rsq_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd6,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xd6,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_rsq_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xd6,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xd6,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_rsq_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd6,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xd6,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_rsq_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd6,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xd6,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_rsq_f16_e64 v5, null ; encoding: [0x05,0x00,0xd6,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xd6,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_rsq_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xd6,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xd6,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_rsq_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xd6,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xd6,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_rsq_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xd6,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xd6,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_rsq_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xd6,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] +0xff,0x81,0xd6,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 + +# GFX11: v_rsq_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xae,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xae,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_rsq_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xae,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xae,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_rsq_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xae,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xae,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_rsq_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xae,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xae,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_rsq_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xae,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xae,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_rsq_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xae,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xae,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_rsq_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xae,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xae,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_rsq_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xae,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xae,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_rsq_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xae,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xae,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_rsq_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xae,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xae,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_rsq_f32_e64 v5, null ; encoding: [0x05,0x00,0xae,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xae,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_rsq_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xae,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xae,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_rsq_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xae,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xae,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_rsq_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xae,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xae,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_rsq_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xae,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +0xff,0x81,0xae,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf + +# GFX11: v_rsq_f64_e64 v[5:6], v[1:2] ; encoding: [0x05,0x00,0xb1,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xb1,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_rsq_f64_e64 v[5:6], v[254:255] ; encoding: [0x05,0x00,0xb1,0xd5,0xfe,0x01,0x00,0x00] +0x05,0x00,0xb1,0xd5,0xfe,0x01,0x00,0x00 + +# GFX11: v_rsq_f64_e64 v[5:6], s[2:3] ; encoding: [0x05,0x00,0xb1,0xd5,0x02,0x00,0x00,0x00] +0x05,0x00,0xb1,0xd5,0x02,0x00,0x00,0x00 + +# GFX11: v_rsq_f64_e64 v[5:6], s[104:105] ; encoding: [0x05,0x00,0xb1,0xd5,0x68,0x00,0x00,0x00] +0x05,0x00,0xb1,0xd5,0x68,0x00,0x00,0x00 + +# GFX11: v_rsq_f64_e64 v[5:6], vcc ; encoding: [0x05,0x00,0xb1,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xb1,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_rsq_f64_e64 v[5:6], ttmp[14:15] ; encoding: [0x05,0x00,0xb1,0xd5,0x7a,0x00,0x00,0x00] +0x05,0x00,0xb1,0xd5,0x7a,0x00,0x00,0x00 + +# GFX11: v_rsq_f64_e64 v[5:6], exec ; encoding: [0x05,0x00,0xb1,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xb1,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_rsq_f64_e64 v[5:6], null ; encoding: [0x05,0x00,0xb1,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xb1,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_rsq_f64_e64 v[5:6], -1 ; encoding: [0x05,0x00,0xb1,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xb1,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_rsq_f64_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0xb1,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xb1,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_rsq_f64_e64 v[5:6], -|src_scc| mul:4 ; encoding: [0x05,0x01,0xb1,0xd5,0xfd,0x00,0x00,0x30] +0x05,0x01,0xb1,0xd5,0xfd,0x00,0x00,0x30 + +# GFX11: v_rsq_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0xb1,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +0xfe,0x80,0xb1,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf + +# GFX11: v_sat_pk_u8_i16_e64 v5, v1 ; encoding: [0x05,0x00,0xe2,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xe2,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_sat_pk_u8_i16_e64 v5, v255 ; encoding: [0x05,0x00,0xe2,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xe2,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_sat_pk_u8_i16_e64 v5, s1 ; encoding: [0x05,0x00,0xe2,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xe2,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_sat_pk_u8_i16_e64 v5, s105 ; encoding: [0x05,0x00,0xe2,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xe2,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_sat_pk_u8_i16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xe2,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xe2,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_sat_pk_u8_i16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xe2,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xe2,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_sat_pk_u8_i16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xe2,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xe2,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_sat_pk_u8_i16_e64 v5, m0 ; encoding: [0x05,0x00,0xe2,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xe2,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_sat_pk_u8_i16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xe2,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xe2,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_sat_pk_u8_i16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xe2,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xe2,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_sat_pk_u8_i16_e64 v5, null ; encoding: [0x05,0x00,0xe2,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xe2,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_sat_pk_u8_i16_e64 v5, -1 ; encoding: [0x05,0x00,0xe2,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xe2,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_sat_pk_u8_i16_e64 v5, 0.5 ; encoding: [0x05,0x00,0xe2,0xd5,0xf0,0x00,0x00,0x00] +0x05,0x00,0xe2,0xd5,0xf0,0x00,0x00,0x00 + +# GFX11: v_sat_pk_u8_i16_e64 v5, src_scc ; encoding: [0x05,0x00,0xe2,0xd5,0xfd,0x00,0x00,0x00] +0x05,0x00,0xe2,0xd5,0xfd,0x00,0x00,0x00 + +# GFX11: v_sat_pk_u8_i16_e64 v255, 0xfe0b ; encoding: [0xff,0x00,0xe2,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00] +0xff,0x00,0xe2,0xd5,0xff,0x00,0x00,0x00,0x0b,0xfe,0x00,0x00 + +# GFX11: v_sin_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xe0,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xe0,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_sin_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xe0,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xe0,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_sin_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xe0,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xe0,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_sin_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xe0,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xe0,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_sin_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xe0,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xe0,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_sin_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xe0,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xe0,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_sin_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xe0,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xe0,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_sin_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xe0,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xe0,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_sin_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xe0,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xe0,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_sin_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xe0,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xe0,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_sin_f16_e64 v5, null ; encoding: [0x05,0x00,0xe0,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xe0,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_sin_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xe0,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xe0,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_sin_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xe0,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xe0,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_sin_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xe0,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xe0,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_sin_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xe0,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] +0xff,0x81,0xe0,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 + +# GFX11: v_sin_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xb5,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xb5,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_sin_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xb5,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xb5,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_sin_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xb5,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xb5,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_sin_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xb5,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xb5,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_sin_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xb5,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xb5,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_sin_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xb5,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xb5,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_sin_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xb5,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xb5,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_sin_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xb5,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xb5,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_sin_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xb5,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xb5,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_sin_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xb5,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xb5,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_sin_f32_e64 v5, null ; encoding: [0x05,0x00,0xb5,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xb5,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_sin_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xb5,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xb5,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_sin_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xb5,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xb5,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_sin_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xb5,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xb5,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_sin_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xb5,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +0xff,0x81,0xb5,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf + +# GFX11: v_sqrt_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xd5,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xd5,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_sqrt_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xd5,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xd5,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_sqrt_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xd5,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xd5,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_sqrt_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xd5,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xd5,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_sqrt_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xd5,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xd5,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_sqrt_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xd5,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xd5,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_sqrt_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xd5,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xd5,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_sqrt_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xd5,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xd5,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_sqrt_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xd5,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xd5,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_sqrt_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xd5,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xd5,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_sqrt_f16_e64 v5, null ; encoding: [0x05,0x00,0xd5,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xd5,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_sqrt_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xd5,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xd5,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_sqrt_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xd5,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xd5,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_sqrt_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xd5,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xd5,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_sqrt_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xd5,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] +0xff,0x81,0xd5,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 + +# GFX11: v_sqrt_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xb3,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xb3,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_sqrt_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xb3,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xb3,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_sqrt_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xb3,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xb3,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_sqrt_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xb3,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xb3,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_sqrt_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xb3,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xb3,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_sqrt_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xb3,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xb3,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_sqrt_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xb3,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xb3,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_sqrt_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xb3,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xb3,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_sqrt_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xb3,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xb3,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_sqrt_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xb3,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xb3,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_sqrt_f32_e64 v5, null ; encoding: [0x05,0x00,0xb3,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xb3,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_sqrt_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xb3,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xb3,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_sqrt_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xb3,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xb3,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_sqrt_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xb3,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xb3,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_sqrt_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xb3,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +0xff,0x81,0xb3,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf + +# GFX11: v_sqrt_f64_e64 v[5:6], v[1:2] ; encoding: [0x05,0x00,0xb4,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xb4,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_sqrt_f64_e64 v[5:6], v[254:255] ; encoding: [0x05,0x00,0xb4,0xd5,0xfe,0x01,0x00,0x00] +0x05,0x00,0xb4,0xd5,0xfe,0x01,0x00,0x00 + +# GFX11: v_sqrt_f64_e64 v[5:6], s[2:3] ; encoding: [0x05,0x00,0xb4,0xd5,0x02,0x00,0x00,0x00] +0x05,0x00,0xb4,0xd5,0x02,0x00,0x00,0x00 + +# GFX11: v_sqrt_f64_e64 v[5:6], s[104:105] ; encoding: [0x05,0x00,0xb4,0xd5,0x68,0x00,0x00,0x00] +0x05,0x00,0xb4,0xd5,0x68,0x00,0x00,0x00 + +# GFX11: v_sqrt_f64_e64 v[5:6], vcc ; encoding: [0x05,0x00,0xb4,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xb4,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_sqrt_f64_e64 v[5:6], ttmp[14:15] ; encoding: [0x05,0x00,0xb4,0xd5,0x7a,0x00,0x00,0x00] +0x05,0x00,0xb4,0xd5,0x7a,0x00,0x00,0x00 + +# GFX11: v_sqrt_f64_e64 v[5:6], exec ; encoding: [0x05,0x00,0xb4,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xb4,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_sqrt_f64_e64 v[5:6], null ; encoding: [0x05,0x00,0xb4,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xb4,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_sqrt_f64_e64 v[5:6], -1 ; encoding: [0x05,0x00,0xb4,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xb4,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_sqrt_f64_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0xb4,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xb4,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_sqrt_f64_e64 v[5:6], -|src_scc| mul:4 ; encoding: [0x05,0x01,0xb4,0xd5,0xfd,0x00,0x00,0x30] +0x05,0x01,0xb4,0xd5,0xfd,0x00,0x00,0x30 + +# GFX11: v_sqrt_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0xb4,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +0xfe,0x80,0xb4,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf + +# GFX11: v_trunc_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xdd,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xdd,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_trunc_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xdd,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xdd,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_trunc_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xdd,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xdd,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_trunc_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xdd,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xdd,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_trunc_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xdd,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xdd,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_trunc_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xdd,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xdd,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_trunc_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xdd,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xdd,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_trunc_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xdd,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xdd,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_trunc_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xdd,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xdd,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_trunc_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xdd,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xdd,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_trunc_f16_e64 v5, null ; encoding: [0x05,0x00,0xdd,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xdd,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_trunc_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xdd,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xdd,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_trunc_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xdd,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xdd,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_trunc_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xdd,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xdd,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_trunc_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xdd,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00] +0xff,0x81,0xdd,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00 + +# GFX11: v_trunc_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xa1,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0xa1,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_trunc_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xa1,0xd5,0xff,0x01,0x00,0x00] +0x05,0x00,0xa1,0xd5,0xff,0x01,0x00,0x00 + +# GFX11: v_trunc_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xa1,0xd5,0x01,0x00,0x00,0x00] +0x05,0x00,0xa1,0xd5,0x01,0x00,0x00,0x00 + +# GFX11: v_trunc_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xa1,0xd5,0x69,0x00,0x00,0x00] +0x05,0x00,0xa1,0xd5,0x69,0x00,0x00,0x00 + +# GFX11: v_trunc_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xa1,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0xa1,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_trunc_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xa1,0xd5,0x6b,0x00,0x00,0x00] +0x05,0x00,0xa1,0xd5,0x6b,0x00,0x00,0x00 + +# GFX11: v_trunc_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xa1,0xd5,0x7b,0x00,0x00,0x00] +0x05,0x00,0xa1,0xd5,0x7b,0x00,0x00,0x00 + +# GFX11: v_trunc_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xa1,0xd5,0x7d,0x00,0x00,0x00] +0x05,0x00,0xa1,0xd5,0x7d,0x00,0x00,0x00 + +# GFX11: v_trunc_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xa1,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0xa1,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_trunc_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xa1,0xd5,0x7f,0x00,0x00,0x00] +0x05,0x00,0xa1,0xd5,0x7f,0x00,0x00,0x00 + +# GFX11: v_trunc_f32_e64 v5, null ; encoding: [0x05,0x00,0xa1,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0xa1,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_trunc_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xa1,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0xa1,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_trunc_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xa1,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0xa1,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_trunc_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xa1,0xd5,0xfd,0x00,0x00,0x10] +0x05,0x00,0xa1,0xd5,0xfd,0x00,0x00,0x10 + +# GFX11: v_trunc_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xa1,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf] +0xff,0x81,0xa1,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf + +# GFX11: v_trunc_f64_e64 v[5:6], v[1:2] ; encoding: [0x05,0x00,0x97,0xd5,0x01,0x01,0x00,0x00] +0x05,0x00,0x97,0xd5,0x01,0x01,0x00,0x00 + +# GFX11: v_trunc_f64_e64 v[5:6], v[254:255] ; encoding: [0x05,0x00,0x97,0xd5,0xfe,0x01,0x00,0x00] +0x05,0x00,0x97,0xd5,0xfe,0x01,0x00,0x00 + +# GFX11: v_trunc_f64_e64 v[5:6], s[2:3] ; encoding: [0x05,0x00,0x97,0xd5,0x02,0x00,0x00,0x00] +0x05,0x00,0x97,0xd5,0x02,0x00,0x00,0x00 + +# GFX11: v_trunc_f64_e64 v[5:6], s[104:105] ; encoding: [0x05,0x00,0x97,0xd5,0x68,0x00,0x00,0x00] +0x05,0x00,0x97,0xd5,0x68,0x00,0x00,0x00 + +# GFX11: v_trunc_f64_e64 v[5:6], vcc ; encoding: [0x05,0x00,0x97,0xd5,0x6a,0x00,0x00,0x00] +0x05,0x00,0x97,0xd5,0x6a,0x00,0x00,0x00 + +# GFX11: v_trunc_f64_e64 v[5:6], ttmp[14:15] ; encoding: [0x05,0x00,0x97,0xd5,0x7a,0x00,0x00,0x00] +0x05,0x00,0x97,0xd5,0x7a,0x00,0x00,0x00 + +# GFX11: v_trunc_f64_e64 v[5:6], exec ; encoding: [0x05,0x00,0x97,0xd5,0x7e,0x00,0x00,0x00] +0x05,0x00,0x97,0xd5,0x7e,0x00,0x00,0x00 + +# GFX11: v_trunc_f64_e64 v[5:6], null ; encoding: [0x05,0x00,0x97,0xd5,0x7c,0x00,0x00,0x00] +0x05,0x00,0x97,0xd5,0x7c,0x00,0x00,0x00 + +# GFX11: v_trunc_f64_e64 v[5:6], -1 ; encoding: [0x05,0x00,0x97,0xd5,0xc1,0x00,0x00,0x00] +0x05,0x00,0x97,0xd5,0xc1,0x00,0x00,0x00 + +# GFX11: v_trunc_f64_e64 v[5:6], 0.5 mul:2 ; encoding: [0x05,0x00,0x97,0xd5,0xf0,0x00,0x00,0x08] +0x05,0x00,0x97,0xd5,0xf0,0x00,0x00,0x08 + +# GFX11: v_trunc_f64_e64 v[5:6], -|src_scc| mul:4 ; encoding: [0x05,0x01,0x97,0xd5,0xfd,0x00,0x00,0x30] +0x05,0x01,0x97,0xd5,0xfd,0x00,0x00,0x30 + +# GFX11: v_trunc_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0x97,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf] +0xfe,0x80,0x97,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop2.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop2.txt new file mode 100644 index 0000000000000..ae577541bb205 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop2.txt @@ -0,0 +1,1842 @@ +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32 %s +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64 %s + +# W32: v_add_co_ci_u32_e64 v5, s12, v1, 0xaf123456, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] +# W64: v_add_co_ci_u32_e64 v5, s[12:13], v1, 0xaf123456, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] +0x05,0x0c,0x20,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf + +# W32: v_add_co_ci_u32_e64 v5, s12, v255, src_scc, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0xff,0xfb,0x19,0x00] +# W64: v_add_co_ci_u32_e64 v5, s[12:13], v255, src_scc, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0xff,0xfb,0x19,0x00] +0x05,0x0c,0x20,0xd5,0xff,0xfb,0x19,0x00 + +# W32: v_add_co_ci_u32_e64 v5, s12, s105, s105, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0x69,0xd2,0x18,0x00] +# W64: v_add_co_ci_u32_e64 v5, s[12:13], s105, s105, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0x69,0xd2,0x18,0x00] +0x05,0x0c,0x20,0xd5,0x69,0xd2,0x18,0x00 + +# W32: v_add_co_ci_u32_e64 v5, s12, vcc_lo, v2, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0x6a,0x04,0x1a,0x00] +# W64: v_add_co_ci_u32_e64 v5, s[12:13], vcc_lo, v2, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0x6a,0x04,0x1a,0x00] +0x05,0x0c,0x20,0xd5,0x6a,0x04,0x1a,0x00 + +# W32: v_add_co_ci_u32_e64 v5, s12, vcc_hi, v255, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0x6b,0xfe,0x1b,0x00] +# W64: v_add_co_ci_u32_e64 v5, s[12:13], vcc_hi, v255, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0x6b,0xfe,0x1b,0x00] +0x05,0x0c,0x20,0xd5,0x6b,0xfe,0x1b,0x00 + +# W32: v_add_co_ci_u32_e64 v5, s12, ttmp15, ttmp15, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0x7b,0xf6,0x18,0x00] +# W64: v_add_co_ci_u32_e64 v5, s[12:13], ttmp15, ttmp15, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0x7b,0xf6,0x18,0x00] +0x05,0x0c,0x20,0xd5,0x7b,0xf6,0x18,0x00 + +# W32: v_add_co_ci_u32_e64 v5, s12, m0, 0.5, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0x7d,0xe0,0x19,0x00] +# W64: v_add_co_ci_u32_e64 v5, s[12:13], m0, 0.5, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0x7d,0xe0,0x19,0x00] +0x05,0x0c,0x20,0xd5,0x7d,0xe0,0x19,0x00 + +# W32: v_add_co_ci_u32_e64 v5, s12, exec_lo, exec_lo, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0x7e,0xfc,0x18,0x00] +# W64: v_add_co_ci_u32_e64 v5, s[12:13], exec_lo, exec_lo, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0x7e,0xfc,0x18,0x00] +0x05,0x0c,0x20,0xd5,0x7e,0xfc,0x18,0x00 + +# W32: v_add_co_ci_u32_e64 v5, s12, exec_hi, -1, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0x7f,0x82,0x19,0x00] +# W64: v_add_co_ci_u32_e64 v5, s[12:13], exec_hi, -1, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0x7f,0x82,0x19,0x00] +0x05,0x0c,0x20,0xd5,0x7f,0x82,0x19,0x00 + +# W32: v_add_co_ci_u32_e64 v5, s12, null, exec_hi, s6 ; encoding: [0x05,0x0c,0x20,0xd5,0x7c,0xfe,0x18,0x00] +# W64: v_add_co_ci_u32_e64 v5, s[12:13], null, exec_hi, s[6:7] ; encoding: [0x05,0x0c,0x20,0xd5,0x7c,0xfe,0x18,0x00] +0x05,0x0c,0x20,0xd5,0x7c,0xfe,0x18,0x00 + +# W32: v_add_co_ci_u32_e64 v5, s104, -1, m0, s104 ; encoding: [0x05,0x68,0x20,0xd5,0xc1,0xfa,0xa0,0x01] +# W64: v_add_co_ci_u32_e64 v5, s[104:105], -1, m0, s[104:105] ; encoding: [0x05,0x68,0x20,0xd5,0xc1,0xfa,0xa0,0x01] +0x05,0x68,0x20,0xd5,0xc1,0xfa,0xa0,0x01 + +# W32: v_add_co_ci_u32_e64 v5, vcc_lo, 0.5, vcc_lo, vcc_lo ; encoding: [0x05,0x6a,0x20,0xd5,0xf0,0xd4,0xa8,0x01] +# W64: v_add_co_ci_u32_e64 v5, vcc, 0.5, vcc_lo, vcc ; encoding: [0x05,0x6a,0x20,0xd5,0xf0,0xd4,0xa8,0x01] +0x05,0x6a,0x20,0xd5,0xf0,0xd4,0xa8,0x01 + +# W32: v_add_co_ci_u32_e64 v5, ttmp14, src_scc, null, ttmp14 ; encoding: [0x05,0x7a,0x20,0xd5,0xfd,0xf8,0xe8,0x01] +# W64: v_add_co_ci_u32_e64 v5, ttmp[14:15], src_scc, null, ttmp[14:15] ; encoding: [0x05,0x7a,0x20,0xd5,0xfd,0xf8,0xe8,0x01] +0x05,0x7a,0x20,0xd5,0xfd,0xf8,0xe8,0x01 + +# GFX11: v_add_co_ci_u32_e64 v255, null, 0xaf123456, vcc_hi, null clamp ; encoding: [0xff,0xfc,0x20,0xd5,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +0xff,0xfc,0x20,0xd5,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf + +# GFX11: v_add_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x32,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x32,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_add_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x32,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x32,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_add_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x32,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x32,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_add_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x32,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x32,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_add_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x32,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x32,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_add_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x32,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x32,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 + +# GFX11: v_add_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x32,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x32,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_add_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x32,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x32,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_add_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x32,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x32,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_add_f16_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x32,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x32,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_add_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x32,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x32,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_add_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x32,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x32,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_add_f16_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x32,0xd5,0xf0,0xfa,0x00,0x48] +0x05,0x00,0x32,0xd5,0xf0,0xfa,0x00,0x48 + +# GFX11: v_add_f16_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x32,0xd5,0xfd,0xd4,0x00,0x30] +0x05,0x02,0x32,0xd5,0xfd,0xd4,0x00,0x30 + +# GFX11: v_add_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x32,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] +0xff,0x83,0x32,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00 + +# GFX11: v_add_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x03,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x03,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_add_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x03,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x03,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_add_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x03,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x03,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_add_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x03,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x03,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_add_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x03,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x03,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_add_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x03,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x03,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_add_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x03,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x03,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_add_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x03,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x03,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_add_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x03,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x03,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_add_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x03,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x03,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_add_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x03,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x03,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_add_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x03,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x03,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_add_f32_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x03,0xd5,0xf0,0xfa,0x00,0x48] +0x05,0x00,0x03,0xd5,0xf0,0xfa,0x00,0x48 + +# GFX11: v_add_f32_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x03,0xd5,0xfd,0xd4,0x00,0x30] +0x05,0x02,0x03,0xd5,0xfd,0xd4,0x00,0x30 + +# GFX11: v_add_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x03,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] +0xff,0x83,0x03,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf + +# GFX11: v_add_nc_u32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x25,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x25,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_add_nc_u32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x25,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x25,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_add_nc_u32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x25,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x25,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_add_nc_u32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x25,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x25,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_add_nc_u32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x25,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x25,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_add_nc_u32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x25,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x25,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_add_nc_u32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x25,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x25,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_add_nc_u32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x25,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x25,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_add_nc_u32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x25,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x25,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_add_nc_u32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x25,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x25,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_add_nc_u32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x25,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x25,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_add_nc_u32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x25,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x25,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_add_nc_u32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x25,0xd5,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x25,0xd5,0xf0,0xfa,0x00,0x00 + +# GFX11: v_add_nc_u32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x25,0xd5,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x25,0xd5,0xfd,0xd4,0x00,0x00 + +# GFX11: v_add_nc_u32_e64 v255, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0x80,0x25,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x80,0x25,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_and_b32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x1b,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x1b,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_and_b32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x1b,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x1b,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_and_b32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x1b,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x1b,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_and_b32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x1b,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x1b,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_and_b32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x1b,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x1b,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_and_b32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x1b,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x1b,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_and_b32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x1b,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x1b,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_and_b32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x1b,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x1b,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_and_b32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x1b,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x1b,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_and_b32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x1b,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x1b,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_and_b32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x1b,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x1b,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_and_b32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x1b,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x1b,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_and_b32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x1b,0xd5,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x1b,0xd5,0xf0,0xfa,0x00,0x00 + +# GFX11: v_and_b32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x1b,0xd5,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x1b,0xd5,0xfd,0xd4,0x00,0x00 + +# GFX11: v_and_b32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x1b,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0x1b,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_ashrrev_i32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x1a,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x1a,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_ashrrev_i32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x1a,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x1a,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_ashrrev_i32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x1a,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x1a,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_ashrrev_i32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x1a,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x1a,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_ashrrev_i32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x1a,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x1a,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_ashrrev_i32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x1a,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x1a,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_ashrrev_i32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x1a,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x1a,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_ashrrev_i32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x1a,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x1a,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_ashrrev_i32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x1a,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x1a,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_ashrrev_i32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x1a,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x1a,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_ashrrev_i32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x1a,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x1a,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_ashrrev_i32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x1a,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x1a,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_ashrrev_i32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x1a,0xd5,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x1a,0xd5,0xf0,0xfa,0x00,0x00 + +# GFX11: v_ashrrev_i32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x1a,0xd5,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x1a,0xd5,0xfd,0xd4,0x00,0x00 + +# GFX11: v_ashrrev_i32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x1a,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0x1a,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf + +# W32: v_cndmask_b32_e64 v5, v1, 0xaf123456, s6 ; encoding: [0x05,0x00,0x01,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] +# W64: v_cndmask_b32_e64 v5, v1, 0xaf123456, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x01,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf + +# W32: v_cndmask_b32_e64 v5, v255, src_scc, s6 ; encoding: [0x05,0x00,0x01,0xd5,0xff,0xfb,0x19,0x00] +# W64: v_cndmask_b32_e64 v5, v255, src_scc, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0xff,0xfb,0x19,0x00] +0x05,0x00,0x01,0xd5,0xff,0xfb,0x19,0x00 + +# W32: v_cndmask_b32_e64 v5, s105, s105, s6 ; encoding: [0x05,0x00,0x01,0xd5,0x69,0xd2,0x18,0x00] +# W64: v_cndmask_b32_e64 v5, s105, s105, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0x69,0xd2,0x18,0x00] +0x05,0x00,0x01,0xd5,0x69,0xd2,0x18,0x00 + +# W32: v_cndmask_b32_e64 v5, vcc_lo, v2, s6 ; encoding: [0x05,0x00,0x01,0xd5,0x6a,0x04,0x1a,0x00] +# W64: v_cndmask_b32_e64 v5, vcc_lo, v2, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0x6a,0x04,0x1a,0x00] +0x05,0x00,0x01,0xd5,0x6a,0x04,0x1a,0x00 + +# W32: v_cndmask_b32_e64 v5, vcc_hi, v255, s6 ; encoding: [0x05,0x00,0x01,0xd5,0x6b,0xfe,0x1b,0x00] +# W64: v_cndmask_b32_e64 v5, vcc_hi, v255, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0x6b,0xfe,0x1b,0x00] +0x05,0x00,0x01,0xd5,0x6b,0xfe,0x1b,0x00 + +# W32: v_cndmask_b32_e64 v5, ttmp15, ttmp15, s6 ; encoding: [0x05,0x00,0x01,0xd5,0x7b,0xf6,0x18,0x00] +# W64: v_cndmask_b32_e64 v5, ttmp15, ttmp15, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0x7b,0xf6,0x18,0x00] +0x05,0x00,0x01,0xd5,0x7b,0xf6,0x18,0x00 + +# W32: v_cndmask_b32_e64 v5, m0, 0.5, s6 ; encoding: [0x05,0x00,0x01,0xd5,0x7d,0xe0,0x19,0x00] +# W64: v_cndmask_b32_e64 v5, m0, 0.5, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0x7d,0xe0,0x19,0x00] +0x05,0x00,0x01,0xd5,0x7d,0xe0,0x19,0x00 + +# W32: v_cndmask_b32_e64 v5, exec_lo, exec_lo, s6 ; encoding: [0x05,0x00,0x01,0xd5,0x7e,0xfc,0x18,0x00] +# W64: v_cndmask_b32_e64 v5, exec_lo, exec_lo, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0x7e,0xfc,0x18,0x00] +0x05,0x00,0x01,0xd5,0x7e,0xfc,0x18,0x00 + +# W32: v_cndmask_b32_e64 v5, exec_hi, -1, s6 ; encoding: [0x05,0x00,0x01,0xd5,0x7f,0x82,0x19,0x00] +# W64: v_cndmask_b32_e64 v5, exec_hi, -1, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0x7f,0x82,0x19,0x00] +0x05,0x00,0x01,0xd5,0x7f,0x82,0x19,0x00 + +# W32: v_cndmask_b32_e64 v5, null, exec_hi, s6 ; encoding: [0x05,0x00,0x01,0xd5,0x7c,0xfe,0x18,0x00] +# W64: v_cndmask_b32_e64 v5, null, exec_hi, s[6:7] ; encoding: [0x05,0x00,0x01,0xd5,0x7c,0xfe,0x18,0x00] +0x05,0x00,0x01,0xd5,0x7c,0xfe,0x18,0x00 + +# W32: v_cndmask_b32_e64 v5, -1, m0, s104 ; encoding: [0x05,0x00,0x01,0xd5,0xc1,0xfa,0xa0,0x01] +# W64: v_cndmask_b32_e64 v5, -1, m0, s[104:105] ; encoding: [0x05,0x00,0x01,0xd5,0xc1,0xfa,0xa0,0x01] +0x05,0x00,0x01,0xd5,0xc1,0xfa,0xa0,0x01 + +# W32: v_cndmask_b32_e64 v5, 0.5, -|vcc_lo|, vcc_lo ; encoding: [0x05,0x02,0x01,0xd5,0xf0,0xd4,0xa8,0x41] +# W64: v_cndmask_b32_e64 v5, 0.5, -|vcc_lo|, vcc ; encoding: [0x05,0x02,0x01,0xd5,0xf0,0xd4,0xa8,0x41] +0x05,0x02,0x01,0xd5,0xf0,0xd4,0xa8,0x41 + +# W32: v_cndmask_b32_e64 v5, -|src_scc|, null, ttmp14 ; encoding: [0x05,0x01,0x01,0xd5,0xfd,0xf8,0xe8,0x21] +# W64: v_cndmask_b32_e64 v5, -|src_scc|, null, ttmp[14:15] ; encoding: [0x05,0x01,0x01,0xd5,0xfd,0xf8,0xe8,0x21] +0x05,0x01,0x01,0xd5,0xfd,0xf8,0xe8,0x21 + +# GFX11: v_cndmask_b32_e64 v255, -|0xaf123456|, -|vcc_hi|, null ; encoding: [0xff,0x03,0x01,0xd5,0xff,0xd6,0xf0,0x61,0x56,0x34,0x12,0xaf] +0xff,0x03,0x01,0xd5,0xff,0xd6,0xf0,0x61,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x2f,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x2f,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x2f,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x2f,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x2f,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x2f,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x2f,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x2f,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x2f,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x2f,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x2f,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x2f,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x2f,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x2f,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x2f,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x2f,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x2f,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x2f,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x2f,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x2f,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x2f,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x2f,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x2f,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x2f,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x2f,0xd5,0xf0,0xfa,0x00,0x40] +0x05,0x00,0x2f,0xd5,0xf0,0xfa,0x00,0x40 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64 v5, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x2f,0xd5,0xfd,0xd4,0x00,0x20] +0x05,0x02,0x2f,0xd5,0xfd,0xd4,0x00,0x20 + +# GFX11: v_cvt_pk_rtz_f16_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp ; encoding: [0xff,0x83,0x2f,0xd5,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] +0xff,0x83,0x2f,0xd5,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf + +# GFX11: v_fmac_dx9_zero_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x06,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x06,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_fmac_dx9_zero_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x06,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x06,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_fmac_dx9_zero_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x06,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x06,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_fmac_dx9_zero_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x06,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x06,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_fmac_dx9_zero_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x06,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x06,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_fmac_dx9_zero_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x06,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x06,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_fmac_dx9_zero_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x06,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x06,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_fmac_dx9_zero_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x06,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x06,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_fmac_dx9_zero_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x06,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x06,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_fmac_dx9_zero_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x06,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x06,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_fmac_dx9_zero_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x06,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x06,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_fmac_dx9_zero_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x06,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x06,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_fmac_dx9_zero_f32_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x06,0xd5,0xf0,0xfa,0x00,0x48] +0x05,0x00,0x06,0xd5,0xf0,0xfa,0x00,0x48 + +# GFX11: v_fmac_dx9_zero_f32_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x06,0xd5,0xfd,0xd4,0x00,0x30] +0x05,0x02,0x06,0xd5,0xfd,0xd4,0x00,0x30 + +# GFX11: v_fmac_dx9_zero_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x06,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] +0xff,0x83,0x06,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf + +# GFX11: v_fmac_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x36,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x36,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_fmac_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x36,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x36,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_fmac_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x36,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x36,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_fmac_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x36,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x36,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_fmac_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x36,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x36,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_fmac_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x36,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x36,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 + +# GFX11: v_fmac_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x36,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x36,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_fmac_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x36,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x36,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_fmac_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x36,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x36,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_fmac_f16_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x36,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x36,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_fmac_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x36,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x36,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_fmac_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x36,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x36,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_fmac_f16_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x36,0xd5,0xf0,0xfa,0x00,0x48] +0x05,0x00,0x36,0xd5,0xf0,0xfa,0x00,0x48 + +# GFX11: v_fmac_f16_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x36,0xd5,0xfd,0xd4,0x00,0x30] +0x05,0x02,0x36,0xd5,0xfd,0xd4,0x00,0x30 + +# GFX11: v_fmac_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x36,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] +0xff,0x83,0x36,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00 + +# GFX11: v_fmac_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x2b,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x2b,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_fmac_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x2b,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x2b,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_fmac_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x2b,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x2b,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_fmac_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x2b,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x2b,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_fmac_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x2b,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x2b,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_fmac_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x2b,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x2b,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_fmac_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x2b,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x2b,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_fmac_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x2b,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x2b,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_fmac_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x2b,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x2b,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_fmac_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x2b,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x2b,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_fmac_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x2b,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x2b,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_fmac_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x2b,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x2b,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_fmac_f32_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x2b,0xd5,0xf0,0xfa,0x00,0x48] +0x05,0x00,0x2b,0xd5,0xf0,0xfa,0x00,0x48 + +# GFX11: v_fmac_f32_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x2b,0xd5,0xfd,0xd4,0x00,0x30] +0x05,0x02,0x2b,0xd5,0xfd,0xd4,0x00,0x30 + +# GFX11: v_fmac_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x2b,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] +0xff,0x83,0x2b,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf + +# GFX11: v_ldexp_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x3b,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x3b,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_ldexp_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x3b,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x3b,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_ldexp_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x3b,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x3b,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_ldexp_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x3b,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x3b,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_ldexp_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x3b,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x3b,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_ldexp_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x3b,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x3b,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 + +# GFX11: v_ldexp_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x3b,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x3b,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_ldexp_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x3b,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x3b,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_ldexp_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x3b,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x3b,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_ldexp_f16_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x3b,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x3b,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_ldexp_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x3b,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x3b,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_ldexp_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x3b,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x3b,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_ldexp_f16_e64 v5, 0.5, m0 mul:2 ; encoding: [0x05,0x00,0x3b,0xd5,0xf0,0xfa,0x00,0x08] +0x05,0x00,0x3b,0xd5,0xf0,0xfa,0x00,0x08 + +# GFX11: v_ldexp_f16_e64 v5, src_scc, vcc_lo mul:4 ; encoding: [0x05,0x00,0x3b,0xd5,0xfd,0xd4,0x00,0x10] +0x05,0x00,0x3b,0xd5,0xfd,0xd4,0x00,0x10 + +# GFX11: v_ldexp_f16_e64 v255, -|0xfe0b|, vcc_hi clamp div:2 ; encoding: [0xff,0x81,0x3b,0xd5,0xff,0xd6,0x00,0x38,0x0b,0xfe,0x00,0x00] +0xff,0x81,0x3b,0xd5,0xff,0xd6,0x00,0x38,0x0b,0xfe,0x00,0x00 + +# GFX11: v_lshlrev_b32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x18,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x18,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_lshlrev_b32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x18,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x18,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_lshlrev_b32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x18,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x18,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_lshlrev_b32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x18,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x18,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_lshlrev_b32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x18,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x18,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_lshlrev_b32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x18,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x18,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_lshlrev_b32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x18,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x18,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_lshlrev_b32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x18,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x18,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_lshlrev_b32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x18,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x18,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_lshlrev_b32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x18,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x18,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_lshlrev_b32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x18,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x18,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_lshlrev_b32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x18,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x18,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_lshlrev_b32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x18,0xd5,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x18,0xd5,0xf0,0xfa,0x00,0x00 + +# GFX11: v_lshlrev_b32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x18,0xd5,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x18,0xd5,0xfd,0xd4,0x00,0x00 + +# GFX11: v_lshlrev_b32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x18,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0x18,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_lshrrev_b32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x19,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x19,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_lshrrev_b32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x19,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x19,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_lshrrev_b32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x19,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x19,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_lshrrev_b32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x19,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x19,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_lshrrev_b32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x19,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x19,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_lshrrev_b32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x19,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x19,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_lshrrev_b32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x19,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x19,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_lshrrev_b32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x19,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x19,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_lshrrev_b32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x19,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x19,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_lshrrev_b32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x19,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x19,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_lshrrev_b32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x19,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x19,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_lshrrev_b32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x19,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x19,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_lshrrev_b32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x19,0xd5,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x19,0xd5,0xf0,0xfa,0x00,0x00 + +# GFX11: v_lshrrev_b32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x19,0xd5,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x19,0xd5,0xfd,0xd4,0x00,0x00 + +# GFX11: v_lshrrev_b32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x19,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0x19,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_max_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x39,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x39,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_max_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x39,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x39,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_max_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x39,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x39,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_max_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x39,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x39,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_max_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x39,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x39,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_max_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x39,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x39,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 + +# GFX11: v_max_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x39,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x39,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_max_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x39,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x39,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_max_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x39,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x39,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_max_f16_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x39,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x39,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_max_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x39,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x39,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_max_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x39,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x39,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_max_f16_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x39,0xd5,0xf0,0xfa,0x00,0x48] +0x05,0x00,0x39,0xd5,0xf0,0xfa,0x00,0x48 + +# GFX11: v_max_f16_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x39,0xd5,0xfd,0xd4,0x00,0x30] +0x05,0x02,0x39,0xd5,0xfd,0xd4,0x00,0x30 + +# GFX11: v_max_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x39,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] +0xff,0x83,0x39,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00 + +# GFX11: v_max_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x10,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x10,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_max_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x10,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x10,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_max_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x10,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x10,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_max_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x10,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x10,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_max_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x10,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x10,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_max_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x10,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x10,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_max_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x10,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x10,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_max_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x10,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x10,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_max_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x10,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x10,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_max_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x10,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x10,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_max_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x10,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x10,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_max_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x10,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x10,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_max_f32_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x10,0xd5,0xf0,0xfa,0x00,0x48] +0x05,0x00,0x10,0xd5,0xf0,0xfa,0x00,0x48 + +# GFX11: v_max_f32_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x10,0xd5,0xfd,0xd4,0x00,0x30] +0x05,0x02,0x10,0xd5,0xfd,0xd4,0x00,0x30 + +# GFX11: v_max_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x10,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] +0xff,0x83,0x10,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf + +# GFX11: v_max_i32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x12,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x12,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_max_i32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x12,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x12,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_max_i32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x12,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x12,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_max_i32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x12,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x12,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_max_i32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x12,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x12,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_max_i32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x12,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x12,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_max_i32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x12,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x12,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_max_i32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x12,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x12,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_max_i32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x12,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x12,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_max_i32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x12,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x12,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_max_i32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x12,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x12,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_max_i32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x12,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x12,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_max_i32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x12,0xd5,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x12,0xd5,0xf0,0xfa,0x00,0x00 + +# GFX11: v_max_i32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x12,0xd5,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x12,0xd5,0xfd,0xd4,0x00,0x00 + +# GFX11: v_max_i32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x12,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0x12,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_max_u32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x14,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x14,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_max_u32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x14,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x14,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_max_u32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x14,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x14,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_max_u32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x14,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x14,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_max_u32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x14,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x14,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_max_u32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x14,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x14,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_max_u32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x14,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x14,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_max_u32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x14,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x14,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_max_u32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x14,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x14,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_max_u32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x14,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x14,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_max_u32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x14,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x14,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_max_u32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x14,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x14,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_max_u32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x14,0xd5,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x14,0xd5,0xf0,0xfa,0x00,0x00 + +# GFX11: v_max_u32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x14,0xd5,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x14,0xd5,0xfd,0xd4,0x00,0x00 + +# GFX11: v_max_u32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x14,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0x14,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_min_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x3a,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x3a,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_min_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x3a,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x3a,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_min_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x3a,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x3a,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_min_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x3a,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x3a,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_min_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x3a,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x3a,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_min_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x3a,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x3a,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 + +# GFX11: v_min_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x3a,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x3a,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_min_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x3a,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x3a,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_min_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x3a,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x3a,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_min_f16_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x3a,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x3a,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_min_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x3a,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x3a,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_min_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x3a,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x3a,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_min_f16_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x3a,0xd5,0xf0,0xfa,0x00,0x48] +0x05,0x00,0x3a,0xd5,0xf0,0xfa,0x00,0x48 + +# GFX11: v_min_f16_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x3a,0xd5,0xfd,0xd4,0x00,0x30] +0x05,0x02,0x3a,0xd5,0xfd,0xd4,0x00,0x30 + +# GFX11: v_min_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x3a,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] +0xff,0x83,0x3a,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00 + +# GFX11: v_min_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x0f,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x0f,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_min_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x0f,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x0f,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_min_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x0f,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x0f,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_min_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x0f,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x0f,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_min_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0f,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x0f,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_min_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x0f,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x0f,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_min_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0f,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x0f,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_min_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x0f,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x0f,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_min_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0f,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x0f,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_min_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x0f,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x0f,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_min_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x0f,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x0f,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_min_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x0f,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x0f,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_min_f32_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x0f,0xd5,0xf0,0xfa,0x00,0x48] +0x05,0x00,0x0f,0xd5,0xf0,0xfa,0x00,0x48 + +# GFX11: v_min_f32_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x0f,0xd5,0xfd,0xd4,0x00,0x30] +0x05,0x02,0x0f,0xd5,0xfd,0xd4,0x00,0x30 + +# GFX11: v_min_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x0f,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] +0xff,0x83,0x0f,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf + +# GFX11: v_min_i32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x11,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x11,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_min_i32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x11,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x11,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_min_i32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x11,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x11,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_min_i32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x11,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x11,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_min_i32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x11,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x11,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_min_i32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x11,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x11,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_min_i32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x11,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x11,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_min_i32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x11,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x11,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_min_i32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x11,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x11,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_min_i32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x11,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x11,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_min_i32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x11,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x11,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_min_i32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x11,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x11,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_min_i32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x11,0xd5,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x11,0xd5,0xf0,0xfa,0x00,0x00 + +# GFX11: v_min_i32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x11,0xd5,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x11,0xd5,0xfd,0xd4,0x00,0x00 + +# GFX11: v_min_i32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x11,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0x11,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_min_u32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x13,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x13,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_min_u32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x13,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x13,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_min_u32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x13,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x13,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_min_u32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x13,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x13,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_min_u32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x13,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x13,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_min_u32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x13,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x13,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_min_u32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x13,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x13,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_min_u32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x13,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x13,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_min_u32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x13,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x13,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_min_u32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x13,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x13,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_min_u32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x13,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x13,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_min_u32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x13,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x13,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_min_u32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x13,0xd5,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x13,0xd5,0xf0,0xfa,0x00,0x00 + +# GFX11: v_min_u32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x13,0xd5,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x13,0xd5,0xfd,0xd4,0x00,0x00 + +# GFX11: v_min_u32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x13,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0x13,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_mul_dx9_zero_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x07,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x07,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_mul_dx9_zero_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x07,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x07,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_mul_dx9_zero_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x07,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x07,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_mul_dx9_zero_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x07,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x07,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_mul_dx9_zero_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x07,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x07,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_mul_dx9_zero_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x07,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x07,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_mul_dx9_zero_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x07,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x07,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_mul_dx9_zero_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x07,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x07,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_mul_dx9_zero_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x07,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x07,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_mul_dx9_zero_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x07,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x07,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_mul_dx9_zero_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x07,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x07,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_mul_dx9_zero_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x07,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x07,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_mul_dx9_zero_f32_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x07,0xd5,0xf0,0xfa,0x00,0x48] +0x05,0x00,0x07,0xd5,0xf0,0xfa,0x00,0x48 + +# GFX11: v_mul_dx9_zero_f32_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x07,0xd5,0xfd,0xd4,0x00,0x30] +0x05,0x02,0x07,0xd5,0xfd,0xd4,0x00,0x30 + +# GFX11: v_mul_dx9_zero_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x07,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] +0xff,0x83,0x07,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf + +# GFX11: v_mul_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x35,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x35,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_mul_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x35,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x35,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_mul_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x35,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x35,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_mul_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x35,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x35,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_mul_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x35,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x35,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_mul_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x35,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x35,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 + +# GFX11: v_mul_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x35,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x35,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_mul_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x35,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x35,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_mul_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x35,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x35,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_mul_f16_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x35,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x35,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_mul_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x35,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x35,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_mul_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x35,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x35,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_mul_f16_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x35,0xd5,0xf0,0xfa,0x00,0x48] +0x05,0x00,0x35,0xd5,0xf0,0xfa,0x00,0x48 + +# GFX11: v_mul_f16_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x35,0xd5,0xfd,0xd4,0x00,0x30] +0x05,0x02,0x35,0xd5,0xfd,0xd4,0x00,0x30 + +# GFX11: v_mul_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x35,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] +0xff,0x83,0x35,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00 + +# GFX11: v_mul_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x08,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x08,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_mul_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x08,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x08,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_mul_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x08,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x08,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_mul_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x08,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x08,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_mul_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x08,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x08,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_mul_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x08,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x08,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_mul_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x08,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x08,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_mul_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x08,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x08,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_mul_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x08,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x08,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_mul_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x08,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x08,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_mul_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x08,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x08,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_mul_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x08,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x08,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_mul_f32_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x08,0xd5,0xf0,0xfa,0x00,0x48] +0x05,0x00,0x08,0xd5,0xf0,0xfa,0x00,0x48 + +# GFX11: v_mul_f32_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x08,0xd5,0xfd,0xd4,0x00,0x30] +0x05,0x02,0x08,0xd5,0xfd,0xd4,0x00,0x30 + +# GFX11: v_mul_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x08,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] +0xff,0x83,0x08,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf + +# GFX11: v_mul_hi_i32_i24_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x0a,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x0a,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_mul_hi_i32_i24_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x0a,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x0a,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_mul_hi_i32_i24_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x0a,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x0a,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_mul_hi_i32_i24_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x0a,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x0a,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_mul_hi_i32_i24_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0a,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x0a,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_mul_hi_i32_i24_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x0a,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x0a,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_mul_hi_i32_i24_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0a,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x0a,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_mul_hi_i32_i24_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x0a,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x0a,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_mul_hi_i32_i24_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0a,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x0a,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_mul_hi_i32_i24_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x0a,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x0a,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_mul_hi_i32_i24_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x0a,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x0a,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_mul_hi_i32_i24_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x0a,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x0a,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_mul_hi_i32_i24_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x0a,0xd5,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x0a,0xd5,0xf0,0xfa,0x00,0x00 + +# GFX11: v_mul_hi_i32_i24_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0a,0xd5,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x0a,0xd5,0xfd,0xd4,0x00,0x00 + +# GFX11: v_mul_hi_i32_i24_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x0a,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0x0a,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_mul_hi_u32_u24_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x0c,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x0c,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_mul_hi_u32_u24_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x0c,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x0c,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_mul_hi_u32_u24_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x0c,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x0c,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_mul_hi_u32_u24_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x0c,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x0c,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_mul_hi_u32_u24_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0c,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x0c,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_mul_hi_u32_u24_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x0c,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x0c,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_mul_hi_u32_u24_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0c,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x0c,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_mul_hi_u32_u24_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x0c,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x0c,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_mul_hi_u32_u24_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0c,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x0c,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_mul_hi_u32_u24_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x0c,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x0c,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_mul_hi_u32_u24_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x0c,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x0c,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_mul_hi_u32_u24_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x0c,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x0c,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_mul_hi_u32_u24_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x0c,0xd5,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x0c,0xd5,0xf0,0xfa,0x00,0x00 + +# GFX11: v_mul_hi_u32_u24_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0c,0xd5,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x0c,0xd5,0xfd,0xd4,0x00,0x00 + +# GFX11: v_mul_hi_u32_u24_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x0c,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0x0c,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_mul_i32_i24_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x09,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x09,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_mul_i32_i24_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x09,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x09,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_mul_i32_i24_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x09,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x09,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_mul_i32_i24_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x09,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x09,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_mul_i32_i24_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x09,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x09,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_mul_i32_i24_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x09,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x09,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_mul_i32_i24_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x09,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x09,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_mul_i32_i24_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x09,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x09,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_mul_i32_i24_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x09,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x09,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_mul_i32_i24_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x09,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x09,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_mul_i32_i24_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x09,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x09,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_mul_i32_i24_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x09,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x09,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_mul_i32_i24_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x09,0xd5,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x09,0xd5,0xf0,0xfa,0x00,0x00 + +# GFX11: v_mul_i32_i24_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x09,0xd5,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x09,0xd5,0xfd,0xd4,0x00,0x00 + +# GFX11: v_mul_i32_i24_e64 v255, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0x80,0x09,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x80,0x09,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_mul_u32_u24_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x0b,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x0b,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_mul_u32_u24_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x0b,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x0b,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_mul_u32_u24_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x0b,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x0b,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_mul_u32_u24_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x0b,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x0b,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_mul_u32_u24_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x0b,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x0b,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_mul_u32_u24_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x0b,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x0b,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_mul_u32_u24_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x0b,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x0b,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_mul_u32_u24_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x0b,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x0b,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_mul_u32_u24_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x0b,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x0b,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_mul_u32_u24_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x0b,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x0b,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_mul_u32_u24_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x0b,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x0b,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_mul_u32_u24_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x0b,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x0b,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_mul_u32_u24_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x0b,0xd5,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x0b,0xd5,0xf0,0xfa,0x00,0x00 + +# GFX11: v_mul_u32_u24_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x0b,0xd5,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x0b,0xd5,0xfd,0xd4,0x00,0x00 + +# GFX11: v_mul_u32_u24_e64 v255, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0x80,0x0b,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x80,0x0b,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_or_b32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x1c,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x1c,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_or_b32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x1c,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x1c,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_or_b32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x1c,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x1c,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_or_b32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x1c,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x1c,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_or_b32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x1c,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x1c,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_or_b32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x1c,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x1c,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_or_b32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x1c,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x1c,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_or_b32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x1c,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x1c,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_or_b32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x1c,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x1c,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_or_b32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x1c,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x1c,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_or_b32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x1c,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x1c,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_or_b32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x1c,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x1c,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_or_b32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x1c,0xd5,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x1c,0xd5,0xf0,0xfa,0x00,0x00 + +# GFX11: v_or_b32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x1c,0xd5,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x1c,0xd5,0xfd,0xd4,0x00,0x00 + +# GFX11: v_or_b32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x1c,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0x1c,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf + +# W32: v_sub_co_ci_u32_e64 v5, s12, v1, 0xaf123456, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] +# W64: v_sub_co_ci_u32_e64 v5, s[12:13], v1, 0xaf123456, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] +0x05,0x0c,0x21,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf + +# W32: v_sub_co_ci_u32_e64 v5, s12, v255, src_scc, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0xff,0xfb,0x19,0x00] +# W64: v_sub_co_ci_u32_e64 v5, s[12:13], v255, src_scc, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0xff,0xfb,0x19,0x00] +0x05,0x0c,0x21,0xd5,0xff,0xfb,0x19,0x00 + +# W32: v_sub_co_ci_u32_e64 v5, s12, s105, s105, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0x69,0xd2,0x18,0x00] +# W64: v_sub_co_ci_u32_e64 v5, s[12:13], s105, s105, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0x69,0xd2,0x18,0x00] +0x05,0x0c,0x21,0xd5,0x69,0xd2,0x18,0x00 + +# W32: v_sub_co_ci_u32_e64 v5, s12, vcc_lo, v2, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0x6a,0x04,0x1a,0x00] +# W64: v_sub_co_ci_u32_e64 v5, s[12:13], vcc_lo, v2, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0x6a,0x04,0x1a,0x00] +0x05,0x0c,0x21,0xd5,0x6a,0x04,0x1a,0x00 + +# W32: v_sub_co_ci_u32_e64 v5, s12, vcc_hi, v255, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0x6b,0xfe,0x1b,0x00] +# W64: v_sub_co_ci_u32_e64 v5, s[12:13], vcc_hi, v255, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0x6b,0xfe,0x1b,0x00] +0x05,0x0c,0x21,0xd5,0x6b,0xfe,0x1b,0x00 + +# W32: v_sub_co_ci_u32_e64 v5, s12, ttmp15, ttmp15, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0x7b,0xf6,0x18,0x00] +# W64: v_sub_co_ci_u32_e64 v5, s[12:13], ttmp15, ttmp15, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0x7b,0xf6,0x18,0x00] +0x05,0x0c,0x21,0xd5,0x7b,0xf6,0x18,0x00 + +# W32: v_sub_co_ci_u32_e64 v5, s12, m0, 0.5, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0x7d,0xe0,0x19,0x00] +# W64: v_sub_co_ci_u32_e64 v5, s[12:13], m0, 0.5, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0x7d,0xe0,0x19,0x00] +0x05,0x0c,0x21,0xd5,0x7d,0xe0,0x19,0x00 + +# W32: v_sub_co_ci_u32_e64 v5, s12, exec_lo, exec_lo, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0x7e,0xfc,0x18,0x00] +# W64: v_sub_co_ci_u32_e64 v5, s[12:13], exec_lo, exec_lo, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0x7e,0xfc,0x18,0x00] +0x05,0x0c,0x21,0xd5,0x7e,0xfc,0x18,0x00 + +# W32: v_sub_co_ci_u32_e64 v5, s12, exec_hi, -1, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0x7f,0x82,0x19,0x00] +# W64: v_sub_co_ci_u32_e64 v5, s[12:13], exec_hi, -1, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0x7f,0x82,0x19,0x00] +0x05,0x0c,0x21,0xd5,0x7f,0x82,0x19,0x00 + +# W32: v_sub_co_ci_u32_e64 v5, s12, null, exec_hi, s6 ; encoding: [0x05,0x0c,0x21,0xd5,0x7c,0xfe,0x18,0x00] +# W64: v_sub_co_ci_u32_e64 v5, s[12:13], null, exec_hi, s[6:7] ; encoding: [0x05,0x0c,0x21,0xd5,0x7c,0xfe,0x18,0x00] +0x05,0x0c,0x21,0xd5,0x7c,0xfe,0x18,0x00 + +# W32: v_sub_co_ci_u32_e64 v5, s104, -1, m0, s104 ; encoding: [0x05,0x68,0x21,0xd5,0xc1,0xfa,0xa0,0x01] +# W64: v_sub_co_ci_u32_e64 v5, s[104:105], -1, m0, s[104:105] ; encoding: [0x05,0x68,0x21,0xd5,0xc1,0xfa,0xa0,0x01] +0x05,0x68,0x21,0xd5,0xc1,0xfa,0xa0,0x01 + +# W32: v_sub_co_ci_u32_e64 v5, vcc_lo, 0.5, vcc_lo, vcc_lo ; encoding: [0x05,0x6a,0x21,0xd5,0xf0,0xd4,0xa8,0x01] +# W64: v_sub_co_ci_u32_e64 v5, vcc, 0.5, vcc_lo, vcc ; encoding: [0x05,0x6a,0x21,0xd5,0xf0,0xd4,0xa8,0x01] +0x05,0x6a,0x21,0xd5,0xf0,0xd4,0xa8,0x01 + +# W32: v_sub_co_ci_u32_e64 v5, ttmp14, src_scc, null, ttmp14 ; encoding: [0x05,0x7a,0x21,0xd5,0xfd,0xf8,0xe8,0x01] +# W64: v_sub_co_ci_u32_e64 v5, ttmp[14:15], src_scc, null, ttmp[14:15] ; encoding: [0x05,0x7a,0x21,0xd5,0xfd,0xf8,0xe8,0x01] +0x05,0x7a,0x21,0xd5,0xfd,0xf8,0xe8,0x01 + +# GFX11: v_sub_co_ci_u32_e64 v255, null, 0xaf123456, vcc_hi, null clamp ; encoding: [0xff,0xfc,0x21,0xd5,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +0xff,0xfc,0x21,0xd5,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf + +# GFX11: v_sub_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x33,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x33,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_sub_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x33,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x33,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_sub_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x33,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x33,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_sub_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x33,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x33,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_sub_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x33,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x33,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_sub_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x33,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x33,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 + +# GFX11: v_sub_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x33,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x33,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_sub_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x33,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x33,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_sub_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x33,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x33,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_sub_f16_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x33,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x33,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_sub_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x33,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x33,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_sub_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x33,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x33,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_sub_f16_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x33,0xd5,0xf0,0xfa,0x00,0x48] +0x05,0x00,0x33,0xd5,0xf0,0xfa,0x00,0x48 + +# GFX11: v_sub_f16_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x33,0xd5,0xfd,0xd4,0x00,0x30] +0x05,0x02,0x33,0xd5,0xfd,0xd4,0x00,0x30 + +# GFX11: v_sub_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x33,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] +0xff,0x83,0x33,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00 + +# GFX11: v_sub_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x04,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x04,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_sub_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x04,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x04,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_sub_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x04,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x04,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_sub_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x04,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x04,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_sub_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x04,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x04,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_sub_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x04,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x04,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_sub_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x04,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x04,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_sub_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x04,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x04,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_sub_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x04,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x04,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_sub_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x04,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x04,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_sub_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x04,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x04,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_sub_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x04,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x04,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_sub_f32_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x04,0xd5,0xf0,0xfa,0x00,0x48] +0x05,0x00,0x04,0xd5,0xf0,0xfa,0x00,0x48 + +# GFX11: v_sub_f32_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x04,0xd5,0xfd,0xd4,0x00,0x30] +0x05,0x02,0x04,0xd5,0xfd,0xd4,0x00,0x30 + +# GFX11: v_sub_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x04,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] +0xff,0x83,0x04,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf + +# GFX11: v_sub_nc_u32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x26,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x26,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_sub_nc_u32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x26,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x26,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_sub_nc_u32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x26,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x26,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_sub_nc_u32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x26,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x26,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_sub_nc_u32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x26,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x26,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_sub_nc_u32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x26,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x26,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_sub_nc_u32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x26,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x26,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_sub_nc_u32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x26,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x26,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_sub_nc_u32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x26,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x26,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_sub_nc_u32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x26,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x26,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_sub_nc_u32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x26,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x26,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_sub_nc_u32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x26,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x26,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_sub_nc_u32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x26,0xd5,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x26,0xd5,0xf0,0xfa,0x00,0x00 + +# GFX11: v_sub_nc_u32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x26,0xd5,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x26,0xd5,0xfd,0xd4,0x00,0x00 + +# GFX11: v_sub_nc_u32_e64 v255, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0x80,0x26,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x80,0x26,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf + +# W32: v_subrev_co_ci_u32_e64 v5, s12, v1, 0xaf123456, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] +# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], v1, 0xaf123456, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf] +0x05,0x0c,0x22,0xd5,0x01,0xff,0x19,0x00,0x56,0x34,0x12,0xaf + +# W32: v_subrev_co_ci_u32_e64 v5, s12, v255, src_scc, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0xff,0xfb,0x19,0x00] +# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], v255, src_scc, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0xff,0xfb,0x19,0x00] +0x05,0x0c,0x22,0xd5,0xff,0xfb,0x19,0x00 + +# W32: v_subrev_co_ci_u32_e64 v5, s12, s105, s105, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0x69,0xd2,0x18,0x00] +# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], s105, s105, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0x69,0xd2,0x18,0x00] +0x05,0x0c,0x22,0xd5,0x69,0xd2,0x18,0x00 + +# W32: v_subrev_co_ci_u32_e64 v5, s12, vcc_lo, v2, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0x6a,0x04,0x1a,0x00] +# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], vcc_lo, v2, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0x6a,0x04,0x1a,0x00] +0x05,0x0c,0x22,0xd5,0x6a,0x04,0x1a,0x00 + +# W32: v_subrev_co_ci_u32_e64 v5, s12, vcc_hi, v255, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0x6b,0xfe,0x1b,0x00] +# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], vcc_hi, v255, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0x6b,0xfe,0x1b,0x00] +0x05,0x0c,0x22,0xd5,0x6b,0xfe,0x1b,0x00 + +# W32: v_subrev_co_ci_u32_e64 v5, s12, ttmp15, ttmp15, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0x7b,0xf6,0x18,0x00] +# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], ttmp15, ttmp15, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0x7b,0xf6,0x18,0x00] +0x05,0x0c,0x22,0xd5,0x7b,0xf6,0x18,0x00 + +# W32: v_subrev_co_ci_u32_e64 v5, s12, m0, 0.5, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0x7d,0xe0,0x19,0x00] +# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], m0, 0.5, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0x7d,0xe0,0x19,0x00] +0x05,0x0c,0x22,0xd5,0x7d,0xe0,0x19,0x00 + +# W32: v_subrev_co_ci_u32_e64 v5, s12, exec_lo, exec_lo, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0x7e,0xfc,0x18,0x00] +# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], exec_lo, exec_lo, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0x7e,0xfc,0x18,0x00] +0x05,0x0c,0x22,0xd5,0x7e,0xfc,0x18,0x00 + +# W32: v_subrev_co_ci_u32_e64 v5, s12, exec_hi, -1, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0x7f,0x82,0x19,0x00] +# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], exec_hi, -1, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0x7f,0x82,0x19,0x00] +0x05,0x0c,0x22,0xd5,0x7f,0x82,0x19,0x00 + +# W32: v_subrev_co_ci_u32_e64 v5, s12, null, exec_hi, s6 ; encoding: [0x05,0x0c,0x22,0xd5,0x7c,0xfe,0x18,0x00] +# W64: v_subrev_co_ci_u32_e64 v5, s[12:13], null, exec_hi, s[6:7] ; encoding: [0x05,0x0c,0x22,0xd5,0x7c,0xfe,0x18,0x00] +0x05,0x0c,0x22,0xd5,0x7c,0xfe,0x18,0x00 + +# W32: v_subrev_co_ci_u32_e64 v5, s104, -1, m0, s104 ; encoding: [0x05,0x68,0x22,0xd5,0xc1,0xfa,0xa0,0x01] +# W64: v_subrev_co_ci_u32_e64 v5, s[104:105], -1, m0, s[104:105] ; encoding: [0x05,0x68,0x22,0xd5,0xc1,0xfa,0xa0,0x01] +0x05,0x68,0x22,0xd5,0xc1,0xfa,0xa0,0x01 + +# W32: v_subrev_co_ci_u32_e64 v5, vcc_lo, 0.5, vcc_lo, vcc_lo ; encoding: [0x05,0x6a,0x22,0xd5,0xf0,0xd4,0xa8,0x01] +# W64: v_subrev_co_ci_u32_e64 v5, vcc, 0.5, vcc_lo, vcc ; encoding: [0x05,0x6a,0x22,0xd5,0xf0,0xd4,0xa8,0x01] +0x05,0x6a,0x22,0xd5,0xf0,0xd4,0xa8,0x01 + +# W32: v_subrev_co_ci_u32_e64 v5, ttmp14, src_scc, null, ttmp14 ; encoding: [0x05,0x7a,0x22,0xd5,0xfd,0xf8,0xe8,0x01] +# W64: v_subrev_co_ci_u32_e64 v5, ttmp[14:15], src_scc, null, ttmp[14:15] ; encoding: [0x05,0x7a,0x22,0xd5,0xfd,0xf8,0xe8,0x01] +0x05,0x7a,0x22,0xd5,0xfd,0xf8,0xe8,0x01 + +# GFX11: v_subrev_co_ci_u32_e64 v255, null, 0xaf123456, vcc_hi, null clamp ; encoding: [0xff,0xfc,0x22,0xd5,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +0xff,0xfc,0x22,0xd5,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf + +# GFX11: v_subrev_f16_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x34,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x34,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_subrev_f16_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x34,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x34,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_subrev_f16_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x34,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x34,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_subrev_f16_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x34,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x34,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_subrev_f16_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x34,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x34,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_subrev_f16_e64 v5, vcc_hi, 0xfe0b ; encoding: [0x05,0x00,0x34,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +0x05,0x00,0x34,0xd5,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00 + +# GFX11: v_subrev_f16_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x34,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x34,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_subrev_f16_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x34,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x34,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_subrev_f16_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x34,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x34,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_subrev_f16_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x34,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x34,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_subrev_f16_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x34,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x34,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_subrev_f16_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x34,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x34,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_subrev_f16_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x34,0xd5,0xf0,0xfa,0x00,0x48] +0x05,0x00,0x34,0xd5,0xf0,0xfa,0x00,0x48 + +# GFX11: v_subrev_f16_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x34,0xd5,0xfd,0xd4,0x00,0x30] +0x05,0x02,0x34,0xd5,0xfd,0xd4,0x00,0x30 + +# GFX11: v_subrev_f16_e64 v255, -|0xfe0b|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x34,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00] +0xff,0x83,0x34,0xd5,0xff,0xd6,0x00,0x78,0x0b,0xfe,0x00,0x00 + +# GFX11: v_subrev_f32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x05,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x05,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_subrev_f32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x05,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x05,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_subrev_f32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x05,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x05,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_subrev_f32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x05,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x05,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_subrev_f32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x05,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x05,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_subrev_f32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x05,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x05,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_subrev_f32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x05,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x05,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_subrev_f32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x05,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x05,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_subrev_f32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x05,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x05,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_subrev_f32_e64 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x05,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x01,0x05,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_subrev_f32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x05,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x05,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_subrev_f32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x05,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x05,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_subrev_f32_e64 v5, 0.5, -m0 mul:2 ; encoding: [0x05,0x00,0x05,0xd5,0xf0,0xfa,0x00,0x48] +0x05,0x00,0x05,0xd5,0xf0,0xfa,0x00,0x48 + +# GFX11: v_subrev_f32_e64 v5, -src_scc, |vcc_lo| mul:4 ; encoding: [0x05,0x02,0x05,0xd5,0xfd,0xd4,0x00,0x30] +0x05,0x02,0x05,0xd5,0xfd,0xd4,0x00,0x30 + +# GFX11: v_subrev_f32_e64 v255, -|0xaf123456|, -|vcc_hi| clamp div:2 ; encoding: [0xff,0x83,0x05,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf] +0xff,0x83,0x05,0xd5,0xff,0xd6,0x00,0x78,0x56,0x34,0x12,0xaf + +# GFX11: v_subrev_nc_u32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x27,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x27,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_subrev_nc_u32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x27,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x27,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_subrev_nc_u32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x27,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x27,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_subrev_nc_u32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x27,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x27,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_subrev_nc_u32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x27,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x27,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_subrev_nc_u32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x27,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x27,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_subrev_nc_u32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x27,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x27,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_subrev_nc_u32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x27,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x27,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_subrev_nc_u32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x27,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x27,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_subrev_nc_u32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x27,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x27,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_subrev_nc_u32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x27,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x27,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_subrev_nc_u32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x27,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x27,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_subrev_nc_u32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x27,0xd5,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x27,0xd5,0xf0,0xfa,0x00,0x00 + +# GFX11: v_subrev_nc_u32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x27,0xd5,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x27,0xd5,0xfd,0xd4,0x00,0x00 + +# GFX11: v_subrev_nc_u32_e64 v255, 0xaf123456, vcc_hi clamp ; encoding: [0xff,0x80,0x27,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x80,0x27,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_xnor_b32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x1e,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x1e,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_xnor_b32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x1e,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x1e,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_xnor_b32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x1e,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x1e,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_xnor_b32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x1e,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x1e,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_xnor_b32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x1e,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x1e,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_xnor_b32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x1e,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x1e,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_xnor_b32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x1e,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x1e,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_xnor_b32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x1e,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x1e,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_xnor_b32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x1e,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x1e,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_xnor_b32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x1e,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x1e,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_xnor_b32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x1e,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x1e,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_xnor_b32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x1e,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x1e,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_xnor_b32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x1e,0xd5,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x1e,0xd5,0xf0,0xfa,0x00,0x00 + +# GFX11: v_xnor_b32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x1e,0xd5,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x1e,0xd5,0xfd,0xd4,0x00,0x00 + +# GFX11: v_xnor_b32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x1e,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0x1e,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_xor_b32_e64 v5, v1, v2 ; encoding: [0x05,0x00,0x1d,0xd5,0x01,0x05,0x02,0x00] +0x05,0x00,0x1d,0xd5,0x01,0x05,0x02,0x00 + +# GFX11: v_xor_b32_e64 v5, v255, v255 ; encoding: [0x05,0x00,0x1d,0xd5,0xff,0xff,0x03,0x00] +0x05,0x00,0x1d,0xd5,0xff,0xff,0x03,0x00 + +# GFX11: v_xor_b32_e64 v5, s1, s2 ; encoding: [0x05,0x00,0x1d,0xd5,0x01,0x04,0x00,0x00] +0x05,0x00,0x1d,0xd5,0x01,0x04,0x00,0x00 + +# GFX11: v_xor_b32_e64 v5, s105, s105 ; encoding: [0x05,0x00,0x1d,0xd5,0x69,0xd2,0x00,0x00] +0x05,0x00,0x1d,0xd5,0x69,0xd2,0x00,0x00 + +# GFX11: v_xor_b32_e64 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x1d,0xd5,0x6a,0xf6,0x00,0x00] +0x05,0x00,0x1d,0xd5,0x6a,0xf6,0x00,0x00 + +# GFX11: v_xor_b32_e64 v5, vcc_hi, 0xaf123456 ; encoding: [0x05,0x00,0x1d,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +0x05,0x00,0x1d,0xd5,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf + +# GFX11: v_xor_b32_e64 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x1d,0xd5,0x7b,0xfa,0x01,0x00] +0x05,0x00,0x1d,0xd5,0x7b,0xfa,0x01,0x00 + +# GFX11: v_xor_b32_e64 v5, m0, 0.5 ; encoding: [0x05,0x00,0x1d,0xd5,0x7d,0xe0,0x01,0x00] +0x05,0x00,0x1d,0xd5,0x7d,0xe0,0x01,0x00 + +# GFX11: v_xor_b32_e64 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x1d,0xd5,0x7e,0x82,0x01,0x00] +0x05,0x00,0x1d,0xd5,0x7e,0x82,0x01,0x00 + +# GFX11: v_xor_b32_e64 v5, exec_hi, null ; encoding: [0x05,0x00,0x1d,0xd5,0x7f,0xf8,0x00,0x00] +0x05,0x00,0x1d,0xd5,0x7f,0xf8,0x00,0x00 + +# GFX11: v_xor_b32_e64 v5, null, exec_lo ; encoding: [0x05,0x00,0x1d,0xd5,0x7c,0xfc,0x00,0x00] +0x05,0x00,0x1d,0xd5,0x7c,0xfc,0x00,0x00 + +# GFX11: v_xor_b32_e64 v5, -1, exec_hi ; encoding: [0x05,0x00,0x1d,0xd5,0xc1,0xfe,0x00,0x00] +0x05,0x00,0x1d,0xd5,0xc1,0xfe,0x00,0x00 + +# GFX11: v_xor_b32_e64 v5, 0.5, m0 ; encoding: [0x05,0x00,0x1d,0xd5,0xf0,0xfa,0x00,0x00] +0x05,0x00,0x1d,0xd5,0xf0,0xfa,0x00,0x00 + +# GFX11: v_xor_b32_e64 v5, src_scc, vcc_lo ; encoding: [0x05,0x00,0x1d,0xd5,0xfd,0xd4,0x00,0x00] +0x05,0x00,0x1d,0xd5,0xfd,0xd4,0x00,0x00 + +# GFX11: v_xor_b32_e64 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x1d,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] +0xff,0x00,0x1d,0xd5,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt b/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt new file mode 100644 index 0000000000000..87059b9e3c16c --- /dev/null +++ b/llvm/test/MC/Disassembler/PowerPC/ppc-encoding-ISAFuture.txt @@ -0,0 +1,41 @@ +# RUN: llvm-mc --disassemble %s -triple powerpc64-unknown-linux-gnu \ +# RUN: -mcpu=future | FileCheck %s + +# RUN: llvm-mc --disassemble %s -triple powerpc64-unknown-aix-gnu \ +# RUN: -mcpu=future | FileCheck %s + +# RUN: llvm-mc --disassemble %s -triple powerpc-unknown-aix-gnu \ +# RUN: -mcpu=future | FileCheck %s + +#CHECK: dmxxextfdmr512 1, 2, 34, 0 +0xf0 0x82 0x17 0x12 + +#CHECK: dmxxextfdmr512 1, 2, 34, 1 +0xf0 0x83 0x17 0x12 + +#CHECK: dmxxextfdmr256 3, 8, 0 +0xf1 0x80 0x47 0x90 + +#CHECK: dmxxextfdmr256 3, 8, 3 +0xf1 0x81 0x4f 0x90 + +#CHECK: dmxxinstfdmr512 1, 2, 34, 0 +0xf0 0x82 0x17 0x52 + +#CHECK: dmxxinstfdmr512 1, 2, 34, 1 +0xf0 0x83 0x17 0x52 + +#CHECK: dmxxinstfdmr256 3, 8, 0 +0xf1 0x80 0x47 0x94 + +#CHECK: dmxxinstfdmr256 3, 8, 3 +0xf1 0x81 0x4f 0x94 + +#CHECK: dmsetdmrz 3 +0x7d 0x82 0x01 0x62 + +#CHECK: dmmr 4, 5 +0x7e 0x06 0xa1 0x62 + +#CHECK: dmxor 6, 7 +0x7f 0x07 0xe1 0x62 diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt b/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt new file mode 100644 index 0000000000000..32dfcb28bf57c --- /dev/null +++ b/llvm/test/MC/Disassembler/PowerPC/ppc64le-encoding-ISAFuture.txt @@ -0,0 +1,35 @@ +# RUN: llvm-mc --disassemble %s -triple powerpc64le-unknown-unknown \ +# RUN: -mcpu=future | FileCheck %s + +#CHECK: dmxxextfdmr512 1, 2, 34, 0 +0x12 0x17 0x82 0xf0 + +#CHECK: dmxxextfdmr512 1, 2, 34, 1 +0x12 0x17 0x83 0xf0 + +#CHECK: dmxxextfdmr256 3, 8, 0 +0x90 0x47 0x80 0xf1 + +#CHECK: dmxxextfdmr256 3, 8, 3 +0x90 0x4f 0x81 0xf1 + +#CHECK: dmxxinstfdmr512 1, 2, 34, 0 +0x52 0x17 0x82 0xf0 + +#CHECK: dmxxinstfdmr512 1, 2, 34, 1 +0x52 0x17 0x83 0xf0 + +#CHECK: dmxxinstfdmr256 3, 8, 0 +0x94 0x47 0x80 0xf1 + +#CHECK: dmxxinstfdmr256 3, 8, 3 +0x94 0x4f 0x81 0xf1 + +#CHECK: dmsetdmrz 3 +0x62 0x01 0x82 0x7d + +#CHECK: dmmr 4, 5 +0x62 0xa1 0x06 0x7e + +#CHECK: dmxor 6, 7 +0x62 0xe1 0x07 0x7f diff --git a/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s b/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s new file mode 100644 index 0000000000000..2f7986f221c97 --- /dev/null +++ b/llvm/test/MC/PowerPC/ppc-encoding-ISAFuture.s @@ -0,0 +1,50 @@ +# RUN: llvm-mc -triple powerpc64-unknown-linux-gnu --show-encoding %s | \ +# RUN: FileCheck -check-prefix=CHECK-BE %s +# RUN: llvm-mc -triple powerpc64le-unknown-linux-gnu --show-encoding %s | \ +# RUN: FileCheck -check-prefix=CHECK-LE %s +# RUN: llvm-mc -triple powerpc-unknown-aix-gnu --show-encoding %s | \ +# RUN: FileCheck -check-prefix=CHECK-BE %s + +# CHECK-BE: dmxxextfdmr512 1, 2, 34, 0 # encoding: [0xf0,0x82,0x17,0x12] +# CHECK-LE: dmxxextfdmr512 1, 2, 34, 0 # encoding: [0x12,0x17,0x82,0xf0] + dmxxextfdmr512 1, 2, 34, 0 + +# CHECK-BE: dmxxextfdmr512 1, 2, 34, 1 # encoding: [0xf0,0x83,0x17,0x12] +# CHECK-LE: dmxxextfdmr512 1, 2, 34, 1 # encoding: [0x12,0x17,0x83,0xf0] + dmxxextfdmr512 1, 2, 34, 1 + +# CHECK-BE: dmxxextfdmr256 3, 8, 0 # encoding: [0xf1,0x80,0x47,0x90] +# CHECK-LE: dmxxextfdmr256 3, 8, 0 # encoding: [0x90,0x47,0x80,0xf1] + dmxxextfdmr256 3, 8, 0 + +# CHECK-BE: dmxxextfdmr256 3, 8, 3 # encoding: [0xf1,0x81,0x4f,0x90] +# CHECK-LE: dmxxextfdmr256 3, 8, 3 # encoding: [0x90,0x4f,0x81,0xf1] + dmxxextfdmr256 3, 8, 3 + +# CHECK-BE: dmxxinstfdmr512 1, 2, 34, 0 # encoding: [0xf0,0x82,0x17,0x52] +# CHECK-LE: dmxxinstfdmr512 1, 2, 34, 0 # encoding: [0x52,0x17,0x82,0xf0] + dmxxinstfdmr512 1, 2, 34, 0 + +# CHECK-BE: dmxxinstfdmr512 1, 2, 34, 1 # encoding: [0xf0,0x83,0x17,0x52] +# CHECK-LE: dmxxinstfdmr512 1, 2, 34, 1 # encoding: [0x52,0x17,0x83,0xf0] + dmxxinstfdmr512 1, 2, 34, 1 + +# CHECK-BE: dmxxinstfdmr256 3, 8, 0 # encoding: [0xf1,0x80,0x47,0x94] +# CHECK-LE: dmxxinstfdmr256 3, 8, 0 # encoding: [0x94,0x47,0x80,0xf1] + dmxxinstfdmr256 3, 8, 0 + +# CHECK-BE: dmxxinstfdmr256 3, 8, 3 # encoding: [0xf1,0x81,0x4f,0x94] +# CHECK-LE: dmxxinstfdmr256 3, 8, 3 # encoding: [0x94,0x4f,0x81,0xf1] + dmxxinstfdmr256 3, 8, 3 + +# CHECK-BE: dmsetdmrz 3 # encoding: [0x7d,0x82,0x01,0x62] +# CHECK-LE: dmsetdmrz 3 # encoding: [0x62,0x01,0x82,0x7d] + dmsetdmrz 3 + +# CHECK-BE: dmmr 4, 5 # encoding: [0x7e,0x06,0xa1,0x62] +# CHECK-LE: dmmr 4, 5 # encoding: [0x62,0xa1,0x06,0x7e] + dmmr 4, 5 + +# CHECK-BE: dmxor 6, 7 # encoding: [0x7f,0x07,0xe1,0x62] +# CHECK-LE: dmxor 6, 7 # encoding: [0x62,0xe1,0x07,0x7f] + dmxor 6, 7 diff --git a/llvm/test/MC/WebAssembly/array-fill.ll b/llvm/test/MC/WebAssembly/array-fill.ll index 2f4bf83205a4d..4725d4eda065b 100644 --- a/llvm/test/MC/WebAssembly/array-fill.ll +++ b/llvm/test/MC/WebAssembly/array-fill.ll @@ -1,4 +1,4 @@ -; RUN: llc -filetype=obj %s -o - | obj2yaml | FileCheck %s +; RUN: llc -mcpu=mvp -filetype=obj %s -o - | obj2yaml | FileCheck %s ; PR33624 source_filename = "ws.c" diff --git a/llvm/test/MC/WebAssembly/assembler-binary.ll b/llvm/test/MC/WebAssembly/assembler-binary.ll index c3d6bd588d24a..815d347047974 100644 --- a/llvm/test/MC/WebAssembly/assembler-binary.ll +++ b/llvm/test/MC/WebAssembly/assembler-binary.ll @@ -1,7 +1,7 @@ -; RUN: llc -filetype=asm -asm-verbose=false %s -o %t.s +; RUN: llc -mcpu=mvp -filetype=asm -asm-verbose=false %s -o %t.s ; RUN: FileCheck -check-prefix=ASM -input-file %t.s %s ; RUN: llvm-mc -triple=wasm32-unknown-unknown -filetype=asm %t.s -o - | FileCheck -check-prefix=ASM %s -; RUN: llc -filetype=obj %s -o - | obj2yaml | FileCheck %s +; RUN: llc -mcpu=mvp -filetype=obj %s -o - | obj2yaml | FileCheck %s ; RUN: llvm-mc -triple=wasm32-unknown-unknown -filetype=obj %t.s -o - | obj2yaml | FileCheck %s ; This specifically tests that we can generate a binary from the assembler diff --git a/llvm/test/MC/WebAssembly/bss.ll b/llvm/test/MC/WebAssembly/bss.ll index 05b6a6986e119..d1d9f5622def7 100644 --- a/llvm/test/MC/WebAssembly/bss.ll +++ b/llvm/test/MC/WebAssembly/bss.ll @@ -1,4 +1,4 @@ -; RUN: llc -filetype=obj %s -o - | obj2yaml | FileCheck %s +; RUN: llc -mcpu=mvp -filetype=obj %s -o - | obj2yaml | FileCheck %s target triple = "wasm32-unknown-unknown" diff --git a/llvm/test/MC/WebAssembly/comdat.ll b/llvm/test/MC/WebAssembly/comdat.ll index 6f8c1403be749..0886301597471 100644 --- a/llvm/test/MC/WebAssembly/comdat.ll +++ b/llvm/test/MC/WebAssembly/comdat.ll @@ -1,6 +1,6 @@ -; RUN: llc -filetype=obj %s -o - | obj2yaml | FileCheck %s -; RUN: llc -filetype=asm %s -asm-verbose=false -o - | FileCheck --check-prefix=ASM %s -; RUN: llc -filetype=asm %s -o - | llvm-mc -triple=wasm32 -filetype=obj -o - | obj2yaml | FileCheck %s +; RUN: llc -mcpu=mvp -filetype=obj %s -o - | obj2yaml | FileCheck %s +; RUN: llc -mcpu=mvp -filetype=asm %s -asm-verbose=false -o - | FileCheck --check-prefix=ASM %s +; RUN: llc -mcpu=mvp -filetype=asm %s -o - | llvm-mc -triple=wasm32 -filetype=obj -o - | obj2yaml | FileCheck %s ; These RUN lines verify the ll direct-to-object path, the ll->asm path, and the ; object output via asm. diff --git a/llvm/test/MC/WebAssembly/debug-info.ll b/llvm/test/MC/WebAssembly/debug-info.ll index d2a815f097164..a60557b65e3ba 100644 --- a/llvm/test/MC/WebAssembly/debug-info.ll +++ b/llvm/test/MC/WebAssembly/debug-info.ll @@ -1,4 +1,4 @@ -; RUN: llc -generate-arange-section -filetype=obj %s -o - | llvm-readobj -S -r --symbols - | FileCheck %s +; RUN: llc -mcpu=mvp -generate-arange-section -filetype=obj %s -o - | llvm-readobj -S -r --symbols - | FileCheck %s ; CHECK: Format: WASM ; CHECK-NEXT:Arch: wasm32 @@ -279,7 +279,7 @@ entry: ret void, !dbg !18 } -attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="mvp" "unsafe-fp-math"="false" "use-soft-float"="false" } !llvm.dbg.cu = !{!2} !llvm.module.flags = !{!13, !14, !15} diff --git a/llvm/test/MC/WebAssembly/debug-info64.ll b/llvm/test/MC/WebAssembly/debug-info64.ll index 47b33aac104c6..da204b9753a50 100644 --- a/llvm/test/MC/WebAssembly/debug-info64.ll +++ b/llvm/test/MC/WebAssembly/debug-info64.ll @@ -1,4 +1,4 @@ -; RUN: llc -generate-arange-section -filetype=obj %s -o - | llvm-readobj -S -r --symbols - | FileCheck %s +; RUN: llc -mcpu=mvp -generate-arange-section -filetype=obj %s -o - | llvm-readobj -S -r --symbols - | FileCheck %s ; CHECK: Format: WASM ; CHECK-NEXT: Arch: wasm64 @@ -285,7 +285,7 @@ entry: ret void, !dbg !18 } -attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="mvp" "unsafe-fp-math"="false" "use-soft-float"="false" } !llvm.dbg.cu = !{!2} !llvm.module.flags = !{!13, !14, !15} diff --git a/llvm/test/MC/WebAssembly/explicit-sections.ll b/llvm/test/MC/WebAssembly/explicit-sections.ll index ae04051f458e0..a65172b22d467 100644 --- a/llvm/test/MC/WebAssembly/explicit-sections.ll +++ b/llvm/test/MC/WebAssembly/explicit-sections.ll @@ -1,4 +1,4 @@ -; RUN: llc -filetype=obj %s -o - | obj2yaml | FileCheck %s +; RUN: llc -mcpu=mvp -filetype=obj %s -o - | obj2yaml | FileCheck %s target triple = "wasm32-unknown-unknown" diff --git a/llvm/test/MC/WebAssembly/global-ctor-dtor.ll b/llvm/test/MC/WebAssembly/global-ctor-dtor.ll index df1252ef9ce15..97b40e8a5d4bb 100644 --- a/llvm/test/MC/WebAssembly/global-ctor-dtor.ll +++ b/llvm/test/MC/WebAssembly/global-ctor-dtor.ll @@ -1,4 +1,4 @@ -; RUN: llc -filetype=obj %s -o - | obj2yaml | FileCheck %s +; RUN: llc -mcpu=mvp -filetype=obj %s -o - | obj2yaml | FileCheck %s target triple = "wasm32-unknown-unknown" diff --git a/llvm/test/MC/WebAssembly/tag-section-decoding.ll b/llvm/test/MC/WebAssembly/tag-section-decoding.ll index 605cae8e9f147..d872047f27414 100644 --- a/llvm/test/MC/WebAssembly/tag-section-decoding.ll +++ b/llvm/test/MC/WebAssembly/tag-section-decoding.ll @@ -339,4 +339,4 @@ define i32 @test_throw(i8* %p) { ; number with which its LEB128 and ULEB128 encodings are different, because its ; 7th least significant bit is not 0. ; CHECK: - Type: TAG -; CHEC-NEXT: TagTypes: [ 64 ] +; CHECK-NEXT: TagTypes: [ 64 ] diff --git a/llvm/test/MC/WebAssembly/type-checker-emit-after-unreachable.s b/llvm/test/MC/WebAssembly/type-checker-emit-after-unreachable.s index 4c88384616f55..806901d01dce7 100644 --- a/llvm/test/MC/WebAssembly/type-checker-emit-after-unreachable.s +++ b/llvm/test/MC/WebAssembly/type-checker-emit-after-unreachable.s @@ -16,12 +16,12 @@ foo: end_function # CHECK-LABEL: foo: -# CHEKC-NEXT: .functype foo () -> (i32) -# CHEKC-NEXT: i32.const 1 -# CHEKC-NEXT: if i32 -# CHEKC-NEXT: i32.const 2 -# CHEKC-NEXT: return -# CHEKC-NEXT: else -# CHEKC-NEXT: i32.const 3 -# CHEKC-NEXT: end_if -# CHEKC-NEXT: end_function +# CHECK-NEXT: .functype foo () -> (i32) +# CHECK-NEXT: i32.const 1 +# CHECK-NEXT: if i32 +# CHECK-NEXT: i32.const 2 +# CHECK-NEXT: return +# CHECK-NEXT: else +# CHECK-NEXT: i32.const 3 +# CHECK-NEXT: end_if +# CHECK-NEXT: end_function diff --git a/llvm/test/MC/WebAssembly/unnamed-data.ll b/llvm/test/MC/WebAssembly/unnamed-data.ll index 1fe6df2f77afc..398d53c826374 100644 --- a/llvm/test/MC/WebAssembly/unnamed-data.ll +++ b/llvm/test/MC/WebAssembly/unnamed-data.ll @@ -1,4 +1,4 @@ -; RUN: llc -filetype=obj %s -o - | obj2yaml | FileCheck %s +; RUN: llc -mcpu=mvp -filetype=obj %s -o - | obj2yaml | FileCheck %s target triple = "wasm32-unknown-unknown" diff --git a/llvm/test/MC/WebAssembly/visibility.ll b/llvm/test/MC/WebAssembly/visibility.ll index 5bb757b28f18a..69b273ecbf25e 100644 --- a/llvm/test/MC/WebAssembly/visibility.ll +++ b/llvm/test/MC/WebAssembly/visibility.ll @@ -1,4 +1,4 @@ -; RUN: llc -filetype=obj %s -o - | obj2yaml | FileCheck %s +; RUN: llc -mcpu=mvp -filetype=obj %s -o - | obj2yaml | FileCheck %s target triple = "wasm32-unknown-unknown" diff --git a/llvm/test/Other/attribute-comment.ll b/llvm/test/Other/attribute-comment.ll index eab96e8aa9cd5..cf4076eec7c82 100644 --- a/llvm/test/Other/attribute-comment.ll +++ b/llvm/test/Other/attribute-comment.ll @@ -1,6 +1,6 @@ ; RUN: opt -S < %s | FileCheck %s -strict-whitespace -; CHECK: {{^}}; Function Attrs: nounwind readnone ssp uwtable{{$}} +; CHECK: {{^}}; Function Attrs: nounwind ssp memory(none) uwtable{{$}} ; CHECK-NEXT: define void @test1() #0 define void @test1() #0 { ret void diff --git a/llvm/test/Other/cgscc-devirt-iteration.ll b/llvm/test/Other/cgscc-devirt-iteration.ll index 70f6c1f508deb..93056e962bd82 100644 --- a/llvm/test/Other/cgscc-devirt-iteration.ll +++ b/llvm/test/Other/cgscc-devirt-iteration.ll @@ -15,7 +15,7 @@ ; RUN: opt -aa-pipeline=basic-aa -passes='default' -S < %s | FileCheck %s --check-prefix=CHECK --check-prefix=AFTER --check-prefix=AFTER2 declare void @readnone() readnone -; CHECK: Function Attrs: nofree nosync readnone +; CHECK: Function Attrs: nofree nosync memory(none) ; CHECK-NEXT: declare void @readnone() declare void @unknown() @@ -28,7 +28,7 @@ declare void @unknown() define void @test1() { ; BEFORE-NOT: Function Attrs -; AFTER: Function Attrs: nofree nosync readnone +; AFTER: Function Attrs: nofree nosync memory(none) ; CHECK-LABEL: define void @test1() entry: %fptr = alloca void ()* @@ -51,13 +51,13 @@ entry: ; devirtualize again, and then deduce readnone. declare void @readnone_with_arg(void ()**) readnone -; CHECK: Function Attrs: nofree nosync readnone +; CHECK: Function Attrs: nofree nosync memory(none) ; CHECK-LABEL: declare void @readnone_with_arg(void ()**) define void @test2_a(void ()** %ignore) { ; BEFORE-NOT: Function Attrs -; AFTER1: Function Attrs: nofree readonly -; AFTER2: Function Attrs: nofree nosync readnone +; AFTER1: Function Attrs: nofree memory(read) +; AFTER2: Function Attrs: nofree nosync memory(none) ; BEFORE: define void @test2_a(void ()** %ignore) ; AFTER: define void @test2_a(void ()** readnone %ignore) entry: @@ -77,8 +77,8 @@ entry: define void @test2_b() { ; BEFORE-NOT: Function Attrs -; AFTER1: Function Attrs: nofree readonly -; AFTER2: Function Attrs: nofree nosync readnone +; AFTER1: Function Attrs: nofree memory(read) +; AFTER2: Function Attrs: nofree nosync memory(none) ; CHECK-LABEL: define void @test2_b() entry: %f2ptr = alloca void ()* diff --git a/llvm/test/Other/cgscc-iterate-function-mutation.ll b/llvm/test/Other/cgscc-iterate-function-mutation.ll index f4e4b889f50d6..50e0c748076eb 100644 --- a/llvm/test/Other/cgscc-iterate-function-mutation.ll +++ b/llvm/test/Other/cgscc-iterate-function-mutation.ll @@ -338,4 +338,4 @@ exit: ret void } -; CHECK: attributes #0 = { nofree nosync readnone } +; CHECK: attributes #0 = { nofree nosync memory(none) } diff --git a/llvm/test/Other/invariant.group.ll b/llvm/test/Other/invariant.group.ll index c757aff25024e..94b1bc7cd958e 100644 --- a/llvm/test/Other/invariant.group.ll +++ b/llvm/test/Other/invariant.group.ll @@ -91,11 +91,11 @@ declare void @use(i8* readonly) declare void @useBool(i1) declare void @clobber(i8*) -; CHECK: Function Attrs: inaccessiblememonly nocallback nofree nosync nounwind speculatable willreturn{{$}} +; CHECK: Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(inaccessiblemem: readwrite){{$}} ; CHECK-NEXT: declare i8* @llvm.launder.invariant.group.p0i8(i8*) declare i8* @llvm.launder.invariant.group.p0i8(i8*) -; CHECK: Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn{{$}} +; CHECK: Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none){{$}} ; CHECK-NEXT: declare i8* @llvm.strip.invariant.group.p0i8(i8*) declare i8* @llvm.strip.invariant.group.p0i8(i8*) diff --git a/llvm/test/Other/new-pass-manager.ll b/llvm/test/Other/new-pass-manager.ll index f9cea34e22226..5be67a34a9174 100644 --- a/llvm/test/Other/new-pass-manager.ll +++ b/llvm/test/Other/new-pass-manager.ll @@ -8,6 +8,9 @@ ; RUN: opt -disable-output -disable-verify -verify-cfg-preserved=1 -debug-pass-manager \ ; RUN: -passes=no-op-module %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-MODULE-PASS +; RUN: opt -disable-output -disable-verify -verify-cfg-preserved=1 -debug-pass-manager \ +; RUN: -p no-op-module %s 2>&1 \ +; RUN: | FileCheck %s --check-prefix=CHECK-MODULE-PASS ; CHECK-MODULE-PASS: Running pass: NoOpModulePass ; RUN: opt -disable-output -disable-verify -verify-cfg-preserved=1 -debug-pass-manager \ diff --git a/llvm/test/Other/opt-override-mcpu-mattr.ll b/llvm/test/Other/opt-override-mcpu-mattr.ll index bb95bd4ce54b1..19dee8bbad346 100644 --- a/llvm/test/Other/opt-override-mcpu-mattr.ll +++ b/llvm/test/Other/opt-override-mcpu-mattr.ll @@ -4,8 +4,8 @@ ; target-cpu and target-features using command line options -mcpu and ; -mattr. -; CHECK: attributes #0 = { nounwind readnone ssp uwtable "target-cpu"="broadwell" "target-features"="+ssse3,+cx16,+sse,+sse2,+sse3,+avx2" "use-soft-float"="false" } -; CHECK: attributes #1 = { nounwind readnone ssp uwtable "target-cpu"="core2" "target-features"="+ssse3,+cx16,+sse,+sse2,+sse3,+avx2" "use-soft-float"="false" } +; CHECK: attributes #0 = { nounwind ssp memory(none) uwtable "target-cpu"="broadwell" "target-features"="+ssse3,+cx16,+sse,+sse2,+sse3,+avx2" "use-soft-float"="false" } +; CHECK: attributes #1 = { nounwind ssp memory(none) uwtable "target-cpu"="core2" "target-features"="+ssse3,+cx16,+sse,+sse2,+sse3,+avx2" "use-soft-float"="false" } define i32 @no_target_cpu() #0 { entry: diff --git a/llvm/test/Other/print-module-scope.ll b/llvm/test/Other/print-module-scope.ll index 4a0525dbb380b..e2dc4e965b38d 100644 --- a/llvm/test/Other/print-module-scope.ll +++ b/llvm/test/Other/print-module-scope.ll @@ -30,7 +30,7 @@ ; FOO: define void @foo ; FOO: Function Attrs: nounwind ; FOO: define void @bar -; FOO: Function Attrs: nounwind readnone ssp +; FOO: Function Attrs: nounwind ssp memory(none) ; FOO: declare void @baz define void @foo() nounwind ssp { @@ -49,6 +49,6 @@ attributes #0 = { nounwind "frame-pointer"="all" } attributes #1 = { nounwind readnone ssp "use-soft-float"="false" } ; FOO: attributes #{{[0-9]}} = { nounwind "frame-pointer"="all" } -; FOO: attributes #{{[0-9]}} = { nounwind readnone ssp "use-soft-float"="false" } +; FOO: attributes #{{[0-9]}} = { nounwind ssp memory(none) "use-soft-float"="false" } ; FOO-NOT: IR Dump After {{Simplify the CFG|SimplifyCFGPass}} diff --git a/llvm/test/SYCLLowerIR/ESIMD/lower_debug_info.ll b/llvm/test/SYCLLowerIR/ESIMD/lower_debug_info.ll index 438900c72d041..1176187969e2c 100644 --- a/llvm/test/SYCLLowerIR/ESIMD/lower_debug_info.ll +++ b/llvm/test/SYCLLowerIR/ESIMD/lower_debug_info.ll @@ -4,6 +4,11 @@ ; This test checks that debug info is preserved during lowering ; ESIMD specific constructs. +; Disable test until GenXIntrinsics is updated to reflect recent community +; changes; +; XFAIL:* + + @__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 declare spir_func <16 x float> @_Z26__esimd_oword_ld_unalignedIfLi16EjLi0EEN2cl4sycl3ext5intel12experimental5esimd6detail11vector_typeIT_XT0_EE4typeET1_j(i32, i32) diff --git a/llvm/test/SYCLLowerIR/ESIMD/lower_intrins.ll b/llvm/test/SYCLLowerIR/ESIMD/lower_intrins.ll index 1dc43d2696812..d6141245c4ec1 100644 --- a/llvm/test/SYCLLowerIR/ESIMD/lower_intrins.ll +++ b/llvm/test/SYCLLowerIR/ESIMD/lower_intrins.ll @@ -9,6 +9,9 @@ ; not practical in this case. ; ; All new test cases should be added to intrins_trans.cpp +; Disable test until GenXIntrinsics is updated to reflect recent community +; changes; +; XFAIL:* target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" target triple = "spir64-unknown-unknown" diff --git a/llvm/test/ThinLTO/X86/Inputs/linkonce_resolution_comdat.ll b/llvm/test/ThinLTO/X86/Inputs/linkonce_resolution_comdat.ll index 92b5182315943..f5b3130fd1520 100644 --- a/llvm/test/ThinLTO/X86/Inputs/linkonce_resolution_comdat.ll +++ b/llvm/test/ThinLTO/X86/Inputs/linkonce_resolution_comdat.ll @@ -1,13 +1,24 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -$c2 = comdat any +$f = comdat any +$g = comdat any -define linkonce_odr i32 @f(i8*) unnamed_addr comdat($c2) { +@g_private = private global i32 41, comdat($g) + +define linkonce_odr i32 @f(i8*) unnamed_addr comdat($f) { + ret i32 41 +} + +define linkonce_odr i32 @g() unnamed_addr comdat($g) { ret i32 41 } -define i32 @g() { +define internal void @g_internal() unnamed_addr comdat($g) { + ret void +} + +define i32 @h() { %i = call i32 @f(i8* null) ret i32 %i } diff --git a/llvm/test/ThinLTO/X86/constructor-alias.ll b/llvm/test/ThinLTO/X86/constructor-alias.ll new file mode 100644 index 0000000000000..3b8db6eb1c81d --- /dev/null +++ b/llvm/test/ThinLTO/X86/constructor-alias.ll @@ -0,0 +1,44 @@ +;; The constructor alias example is reduced from +;; +;; template +;; struct A { A() {} virtual ~A() {} }; +;; template struct A; +;; void *foo() { return new A; } +;; +;; clang -c -fpic -O1 -flto=thin a.cc && cp a.o b.o && ld.lld -shared a.o b.so + +; RUN: opt -opaque-pointers -module-summary %s -o %t1.bc +; RUN: cp %t1.bc %t2.bc +; RUN: llvm-lto2 run -opaque-pointers %t1.bc %t2.bc -r=%t1.bc,_ZTV1A,pl -r=%t1.bc,_ZN1AD0Ev,pl -r=%t1.bc,_ZN1AD1Ev,pl -r=%t1.bc,_ZN1AD2Ev,pl -r=%t1.bc,D1_a,pl -r=%t1.bc,D1_a_a,pl \ +; RUN: -r=%t2.bc,_ZTV1A,l -r=%t2.bc,_ZN1AD0Ev,l -r=%t2.bc,_ZN1AD1Ev,l -r=%t2.bc,_ZN1AD2Ev,l -r=%t2.bc,D1_a,l -r=%t2.bc,D1_a_a,l -o %t3 --save-temps +; RUN: llvm-dis -opaque-pointers < %t3.2.1.promote.bc | FileCheck %s + +; CHECK: @_ZTV1A = available_externally dso_local unnamed_addr constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN1AD1Ev, ptr @_ZN1AD0Ev] } +; CHECK: @D1_a = available_externally dso_local unnamed_addr alias void (ptr), ptr @_ZN1AD1Ev +; CHECK: @_ZN1AD1Ev = available_externally dso_local unnamed_addr alias void (ptr), ptr @_ZN1AD2Ev +; CHECK: @D1_a_a = available_externally dso_local unnamed_addr alias void (ptr), ptr @D1_a +; CHECK: define available_externally dso_local void @_ZN1AD2Ev(ptr noundef nonnull %0) unnamed_addr { +; CHECK: define available_externally dso_local void @_ZN1AD0Ev(ptr noundef nonnull %0) unnamed_addr { + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +$_ZN1AD5Ev = comdat any +$_ZTV1A = comdat any + +@_ZTV1A = weak_odr unnamed_addr constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN1AD1Ev, ptr @_ZN1AD0Ev] }, comdat + +@D1_a = weak_odr unnamed_addr alias void (ptr), ptr @_ZN1AD1Ev +@_ZN1AD1Ev = weak_odr unnamed_addr alias void (ptr), ptr @_ZN1AD2Ev +@D1_a_a = weak_odr unnamed_addr alias void (ptr), ptr @D1_a + +define weak_odr void @_ZN1AD2Ev(ptr noundef nonnull %0) unnamed_addr comdat($_ZN1AD5Ev) { + ret void +} + +define weak_odr void @_ZN1AD0Ev(ptr noundef nonnull %0) unnamed_addr comdat($_ZN1AD5Ev) { + call void @D1_a(ptr noundef nonnull %0) + call void @D1_a_a(ptr noundef nonnull %0) + call void @_ZN1AD1Ev(ptr noundef nonnull %0) + ret void +} diff --git a/llvm/test/ThinLTO/X86/linkonce_resolution_comdat.ll b/llvm/test/ThinLTO/X86/linkonce_resolution_comdat.ll index 7b22180132e6a..2fb226046ea9f 100644 --- a/llvm/test/ThinLTO/X86/linkonce_resolution_comdat.ll +++ b/llvm/test/ThinLTO/X86/linkonce_resolution_comdat.ll @@ -1,33 +1,54 @@ -; This test ensures that we drop the preempted copy of @f from %t2.bc from its -; comdat after making it available_externally. If not we would get a -; verification error. +; This test ensures that we drop the preempted copy of @f/@g from %t2.bc from their +; comdats after making it available_externally. If not we would get a +; verification error. g_internal/g_private are changed to available_externally +; as well since it is in the same comdat of g. ; RUN: opt -module-summary %s -o %t1.bc ; RUN: opt -module-summary %p/Inputs/linkonce_resolution_comdat.ll -o %t2.bc -; RUN: llvm-lto -thinlto-action=run -disable-thinlto-funcattrs=0 %t1.bc %t2.bc -exported-symbol=f -exported-symbol=g -thinlto-save-temps=%t3. +; RUN: llvm-lto -thinlto-action=run -disable-thinlto-funcattrs=0 %t1.bc %t2.bc -exported-symbol=f -exported-symbol=g -exported-symbol=h -thinlto-save-temps=%t3. ; RUN: llvm-dis %t3.0.3.imported.bc -o - | FileCheck %s --check-prefix=IMPORT1 ; RUN: llvm-dis %t3.1.3.imported.bc -o - | FileCheck %s --check-prefix=IMPORT2 ; Copy from first module is prevailing and converted to weak_odr, copy ; from second module is preempted and converted to available_externally and ; removed from comdat. -; IMPORT1: define weak_odr i32 @f(i8* %0) unnamed_addr [[ATTR:#[0-9]+]] comdat($c1) { +; IMPORT1: @g_private = private global i32 43, comdat($g) +; IMPORT1: define weak_odr i32 @f(i8* %0) unnamed_addr [[ATTR:#[0-9]+]] comdat { +; IMPORT1: define weak_odr i32 @g() unnamed_addr [[ATTR]] comdat { +; IMPORT1: define internal void @g_internal() unnamed_addr comdat($g) { + +; IMPORT2: @g_private = available_externally dso_local global i32 41{{$}} ; IMPORT2: define available_externally i32 @f(i8* %0) unnamed_addr [[ATTR:#[0-9]+]] { +; IMPORT2: define available_externally i32 @g() unnamed_addr [[ATTR]] { +; IMPORT2: define available_externally dso_local void @g_internal() unnamed_addr { ; CHECK-DAG: attributes [[ATTR]] = { norecurse nounwind } -; RUN: llvm-nm -o - < %t1.bc.thinlto.o | FileCheck %s --check-prefix=NM1 +; RUN: llvm-nm %t1.bc.thinlto.o | FileCheck %s --check-prefix=NM1 ; NM1: W f +; NM1: W g -; RUN: llvm-nm -o - < %t2.bc.thinlto.o | FileCheck %s --check-prefix=NM2 +; RUN: llvm-nm %t2.bc.thinlto.o | FileCheck %s --check-prefix=NM2 ; f() would have been turned into available_externally since it is preempted, -; and inlined into g() +; and inlined into h() ; NM2-NOT: f +; NM2-NOT: g target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -$c1 = comdat any +$f = comdat any +$g = comdat any + +@g_private = private global i32 43, comdat($g) -define linkonce_odr i32 @f(i8*) unnamed_addr comdat($c1) { +define linkonce_odr i32 @f(i8*) unnamed_addr comdat { ret i32 43 } + +define linkonce_odr i32 @g() unnamed_addr comdat { + ret i32 43 +} + +define internal void @g_internal() unnamed_addr comdat($g) { + ret void +} diff --git a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll index 822af07c7fada..3815d682831d5 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll @@ -142,31 +142,26 @@ define i32 @loadCombine_4consecutive_BE(ptr %p) { } define i32 @loadCombine_4consecutive_alias(ptr %p) { -; LE-LABEL: @loadCombine_4consecutive_alias( -; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 -; LE-NEXT: store i8 10, ptr [[P]], align 1 -; LE-NEXT: ret i32 [[L1]] -; -; BE-LABEL: @loadCombine_4consecutive_alias( -; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; BE-NEXT: store i8 10, ptr [[P]], align 1 -; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 -; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 -; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 -; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] -; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] -; BE-NEXT: ret i32 [[O3]] +; ALL-LABEL: @loadCombine_4consecutive_alias( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: store i8 10, ptr [[P]], align 1 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; ALL-NEXT: ret i32 [[O3]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -193,31 +188,26 @@ define i32 @loadCombine_4consecutive_alias(ptr %p) { } define i32 @loadCombine_4consecutive_alias_BE(ptr %p) { -; LE-LABEL: @loadCombine_4consecutive_alias_BE( -; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; LE-NEXT: store i8 10, ptr [[P]], align 1 -; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; LE-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24 -; LE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16 -; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 -; LE-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]] -; LE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] -; LE-NEXT: ret i32 [[O3]] -; -; BE-LABEL: @loadCombine_4consecutive_alias_BE( -; BE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 -; BE-NEXT: store i8 10, ptr [[P]], align 1 -; BE-NEXT: ret i32 [[L1]] +; ALL-LABEL: @loadCombine_4consecutive_alias_BE( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: store i8 10, ptr [[P]], align 1 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; ALL-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24 +; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16 +; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 +; ALL-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]] +; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] +; ALL-NEXT: ret i32 [[O3]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -1770,32 +1760,26 @@ define i16 @loadCombine_2consecutive_badinsert(ptr %p) { } define i32 @loadCombine_4consecutive_badinsert(ptr %p) { -; LE-LABEL: @loadCombine_4consecutive_badinsert( -; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; LE-NEXT: store i8 0, ptr [[P1]], align 1 -; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P]], align 1 -; LE-NEXT: ret i32 [[L1]] -; -; BE-LABEL: @loadCombine_4consecutive_badinsert( -; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; BE-NEXT: store i8 0, ptr [[P1]], align 1 -; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 -; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 -; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 -; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] -; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] -; BE-NEXT: ret i32 [[O3]] +; ALL-LABEL: @loadCombine_4consecutive_badinsert( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: store i8 0, ptr [[P1]], align 1 +; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; ALL-NEXT: ret i32 [[O3]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -1820,3 +1804,49 @@ define i32 @loadCombine_4consecutive_badinsert(ptr %p) { %o3 = or i32 %o2, %s4 ret i32 %o3 } + +define i32 @loadCombine_4consecutive_badinsert2(ptr %p) { +; ALL-LABEL: @loadCombine_4consecutive_badinsert2( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: store i8 0, ptr [[P3]], align 1 +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; ALL-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l2 = load i8, ptr %p1 + store i8 0, ptr %p3, align 1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + %l1 = load i8, ptr %p + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %e1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} diff --git a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll index b043bb75313c5..c8852376d1cac 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll @@ -150,31 +150,26 @@ define i32 @loadCombine_4consecutive_BE(ptr %p) { } define i32 @loadCombine_4consecutive_alias(ptr %p) { -; LE-LABEL: @loadCombine_4consecutive_alias( -; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 -; LE-NEXT: store i8 10, ptr [[P]], align 1 -; LE-NEXT: ret i32 [[L1]] -; -; BE-LABEL: @loadCombine_4consecutive_alias( -; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; BE-NEXT: store i8 10, ptr [[P]], align 1 -; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 -; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 -; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 -; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] -; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] -; BE-NEXT: ret i32 [[O3]] +; ALL-LABEL: @loadCombine_4consecutive_alias( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: store i8 10, ptr [[P]], align 1 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; ALL-NEXT: ret i32 [[O3]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -201,31 +196,26 @@ define i32 @loadCombine_4consecutive_alias(ptr %p) { } define i32 @loadCombine_4consecutive_alias_BE(ptr %p) { -; LE-LABEL: @loadCombine_4consecutive_alias_BE( -; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; LE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; LE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; LE-NEXT: store i8 10, ptr [[P]], align 1 -; LE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; LE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; LE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; LE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; LE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; LE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; LE-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24 -; LE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16 -; LE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 -; LE-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]] -; LE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; LE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] -; LE-NEXT: ret i32 [[O3]] -; -; BE-LABEL: @loadCombine_4consecutive_alias_BE( -; BE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1 -; BE-NEXT: store i8 10, ptr [[P]], align 1 -; BE-NEXT: ret i32 [[L1]] +; ALL-LABEL: @loadCombine_4consecutive_alias_BE( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: store i8 10, ptr [[P]], align 1 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; ALL-NEXT: [[S1:%.*]] = shl i32 [[E1]], 24 +; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 16 +; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 8 +; ALL-NEXT: [[O1:%.*]] = or i32 [[S1]], [[S2]] +; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[E4]] +; ALL-NEXT: ret i32 [[O3]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -1861,22 +1851,16 @@ define i32 @loadCombine_4consecutive_lower_index_comes_before(ptr %p) { } define i16 @loadCombine_2consecutive_badinsert(ptr %p) { -; LE-LABEL: @loadCombine_2consecutive_badinsert( -; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; LE-NEXT: store i8 0, ptr [[P1]], align 1 -; LE-NEXT: [[L1:%.*]] = load i16, ptr [[P]], align 1 -; LE-NEXT: ret i16 [[L1]] -; -; BE-LABEL: @loadCombine_2consecutive_badinsert( -; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; BE-NEXT: store i8 0, ptr [[P1]], align 1 -; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i16 -; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i16 -; BE-NEXT: [[S2:%.*]] = shl i16 [[E2]], 8 -; BE-NEXT: [[O1:%.*]] = or i16 [[E1]], [[S2]] -; BE-NEXT: ret i16 [[O1]] +; ALL-LABEL: @loadCombine_2consecutive_badinsert( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: store i8 0, ptr [[P1]], align 1 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i16 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i16 +; ALL-NEXT: [[S2:%.*]] = shl i16 [[E2]], 8 +; ALL-NEXT: [[O1:%.*]] = or i16 [[E1]], [[S2]] +; ALL-NEXT: ret i16 [[O1]] ; %p1 = getelementptr i8, ptr %p, i32 1 %l2 = load i8, ptr %p1 @@ -1890,32 +1874,26 @@ define i16 @loadCombine_2consecutive_badinsert(ptr %p) { } define i32 @loadCombine_4consecutive_badinsert(ptr %p) { -; LE-LABEL: @loadCombine_4consecutive_badinsert( -; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; LE-NEXT: store i8 0, ptr [[P1]], align 1 -; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P]], align 1 -; LE-NEXT: ret i32 [[L1]] -; -; BE-LABEL: @loadCombine_4consecutive_badinsert( -; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 -; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 -; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 -; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 -; BE-NEXT: store i8 0, ptr [[P1]], align 1 -; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 -; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 -; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 -; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 -; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 -; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 -; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 -; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 -; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 -; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] -; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] -; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] -; BE-NEXT: ret i32 [[O3]] +; ALL-LABEL: @loadCombine_4consecutive_badinsert( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: store i8 0, ptr [[P1]], align 1 +; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; ALL-NEXT: ret i32 [[O3]] ; %p1 = getelementptr i8, ptr %p, i32 1 %p2 = getelementptr i8, ptr %p, i32 2 @@ -1940,3 +1918,49 @@ define i32 @loadCombine_4consecutive_badinsert(ptr %p) { %o3 = or i32 %o2, %s4 ret i32 %o3 } + +define i32 @loadCombine_4consecutive_badinsert2(ptr %p) { +; ALL-LABEL: @loadCombine_4consecutive_badinsert2( +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; ALL-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; ALL-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; ALL-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; ALL-NEXT: store i8 0, ptr [[P3]], align 1 +; ALL-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; ALL-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; ALL-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; ALL-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; ALL-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; ALL-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; ALL-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; ALL-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; ALL-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; ALL-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; ALL-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; ALL-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; ALL-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; ALL-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l2 = load i8, ptr %p1 + store i8 0, ptr %p3, align 1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + %l1 = load i8, ptr %p + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %e1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll new file mode 100644 index 0000000000000..243927c45f89d --- /dev/null +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll @@ -0,0 +1,347 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -atomic-expand %s | FileCheck -check-prefix=GFX908 %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -atomic-expand %s | FileCheck -check-prefix=GFX90A %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -atomic-expand %s | FileCheck -check-prefix=GFX940 %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -atomic-expand %s | FileCheck -check-prefix=GFX1100 %s + +define float @syncscope_system(float* %addr, float %val) #0 { +; GFX908-LABEL: @syncscope_system( +; GFX908-NEXT: [[TMP1:%.*]] = load float, float* [[ADDR:%.*]], align 4 +; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX908: atomicrmw.start: +; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]] +; GFX908-NEXT: [[TMP2:%.*]] = bitcast float* [[ADDR]] to i32* +; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4 +; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX908: atomicrmw.end: +; GFX908-NEXT: ret float [[TMP6]] +; +; GFX90A-LABEL: @syncscope_system( +; GFX90A-NEXT: [[TMP1:%.*]] = load float, float* [[ADDR:%.*]], align 4 +; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX90A: atomicrmw.start: +; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]] +; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float* [[ADDR]] to i32* +; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4 +; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX90A: atomicrmw.end: +; GFX90A-NEXT: ret float [[TMP6]] +; +; GFX940-LABEL: @syncscope_system( +; GFX940-NEXT: [[TMP1:%.*]] = load float, float* [[ADDR:%.*]], align 4 +; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX940: atomicrmw.start: +; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX940-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]] +; GFX940-NEXT: [[TMP2:%.*]] = bitcast float* [[ADDR]] to i32* +; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4 +; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX940: atomicrmw.end: +; GFX940-NEXT: ret float [[TMP6]] +; +; GFX1100-LABEL: @syncscope_system( +; GFX1100-NEXT: [[TMP1:%.*]] = load float, float* [[ADDR:%.*]], align 4 +; GFX1100-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX1100: atomicrmw.start: +; GFX1100-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX1100-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]] +; GFX1100-NEXT: [[TMP2:%.*]] = bitcast float* [[ADDR]] to i32* +; GFX1100-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; GFX1100-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX1100-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4 +; GFX1100-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX1100-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX1100-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX1100-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX1100: atomicrmw.end: +; GFX1100-NEXT: ret float [[TMP6]] +; +; GFX11-LABEL: @syncscope_system( +; GFX11-NEXT: [[TMP1:%.*]] = load float, float* [[ADDR:%.*]], align 4 +; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX11: atomicrmw.start: +; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX11-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]] +; GFX11-NEXT: [[TMP2:%.*]] = bitcast float* [[ADDR]] to i32* +; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; GFX11-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] seq_cst seq_cst, align 4 +; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX11-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX11: atomicrmw.end: +; GFX11-NEXT: ret float [[TMP6]] + %res = atomicrmw fadd float* %addr, float %val seq_cst + ret float %res +} + +define float @syncscope_workgroup_rtn(float* %addr, float %val) #0 { +; GFX908-LABEL: @syncscope_workgroup_rtn( +; GFX908-NEXT: [[TMP1:%.*]] = load float, float* [[ADDR:%.*]], align 4 +; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX908: atomicrmw.start: +; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]] +; GFX908-NEXT: [[TMP2:%.*]] = bitcast float* [[ADDR]] to i32* +; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] syncscope("workgroup") seq_cst seq_cst, align 4 +; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX908: atomicrmw.end: +; GFX908-NEXT: ret float [[TMP6]] +; +; GFX90A-LABEL: @syncscope_workgroup_rtn( +; GFX90A-NEXT: [[TMP1:%.*]] = bitcast float* [[ADDR:%.*]] to i8* +; GFX90A-NEXT: br label [[ATOMICRMW_CHECK_SHARED:%.*]] +; GFX90A: atomicrmw.check.shared: +; GFX90A-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(i8* [[TMP1]]) +; GFX90A-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]] +; GFX90A: atomicrmw.shared: +; GFX90A-NEXT: [[TMP2:%.*]] = addrspacecast float* [[ADDR]] to float addrspace(3)* +; GFX90A-NEXT: [[TMP3:%.*]] = atomicrmw fadd float addrspace(3)* [[TMP2]], float [[VAL:%.*]] syncscope("workgroup") seq_cst, align 4 +; GFX90A-NEXT: br label [[ATOMICRMW_PHI:%.*]] +; GFX90A: atomicrmw.check.private: +; GFX90A-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(i8* [[TMP1]]) +; GFX90A-NEXT: br i1 [[IS_PRIVATE]], label [[ATOMICRMW_PRIVATE:%.*]], label [[ATOMICRMW_GLOBAL:%.*]] +; GFX90A: atomicrmw.private: +; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast float* [[ADDR]] to float addrspace(5)* +; GFX90A-NEXT: [[LOADED_PRIVATE:%.*]] = load float, float addrspace(5)* [[TMP4]], align 4 +; GFX90A-NEXT: [[VAL_NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]] +; GFX90A-NEXT: store float [[VAL_NEW]], float addrspace(5)* [[TMP4]], align 4 +; GFX90A-NEXT: br label [[ATOMICRMW_PHI]] +; GFX90A: atomicrmw.global: +; GFX90A-NEXT: [[TMP5:%.*]] = addrspacecast float* [[ADDR]] to float addrspace(1)* +; GFX90A-NEXT: [[TMP6:%.*]] = atomicrmw fadd float addrspace(1)* [[TMP5]], float [[VAL]] syncscope("workgroup") seq_cst, align 4 +; GFX90A-NEXT: br label [[ATOMICRMW_PHI]] +; GFX90A: atomicrmw.phi: +; GFX90A-NEXT: [[LOADED_PHI:%.*]] = phi float [ [[TMP3]], [[ATOMICRMW_SHARED]] ], [ [[LOADED_PRIVATE]], [[ATOMICRMW_PRIVATE]] ], [ [[TMP6]], [[ATOMICRMW_GLOBAL]] ] +; GFX90A-NEXT: br label [[ATOMICRMW_END:%.*]] +; GFX90A: atomicrmw.end: +; GFX90A-NEXT: ret float [[LOADED_PHI]] +; +; GFX940-LABEL: @syncscope_workgroup_rtn( +; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd float* [[ADDR:%.*]], float [[VAL:%.*]] syncscope("workgroup") seq_cst, align 4 +; GFX940-NEXT: ret float [[RES]] +; +; GFX1100-LABEL: @syncscope_workgroup_rtn( +; GFX1100-NEXT: [[RES:%.*]] = atomicrmw fadd float* [[ADDR:%.*]], float [[VAL:%.*]] syncscope("workgroup") seq_cst, align 4 +; GFX1100-NEXT: ret float [[RES]] +; +; GFX11-LABEL: @syncscope_workgroup_rtn( +; GFX11-NEXT: [[TMP1:%.*]] = load float, float* [[ADDR:%.*]], align 4 +; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX11: atomicrmw.start: +; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX11-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]] +; GFX11-NEXT: [[TMP2:%.*]] = bitcast float* [[ADDR]] to i32* +; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; GFX11-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] syncscope("workgroup") seq_cst seq_cst, align 4 +; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX11-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX11: atomicrmw.end: +; GFX11-NEXT: ret float [[TMP6]] + %res = atomicrmw fadd float* %addr, float %val syncscope("workgroup") seq_cst + ret float %res +} + +define void @syncscope_workgroup_nortn(float* %addr, float %val) #0 { +; GFX908-LABEL: @syncscope_workgroup_nortn( +; GFX908-NEXT: [[TMP1:%.*]] = bitcast float* [[ADDR:%.*]] to i8* +; GFX908-NEXT: br label [[ATOMICRMW_CHECK_SHARED:%.*]] +; GFX908: atomicrmw.check.shared: +; GFX908-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(i8* [[TMP1]]) +; GFX908-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]] +; GFX908: atomicrmw.shared: +; GFX908-NEXT: [[TMP2:%.*]] = addrspacecast float* [[ADDR]] to float addrspace(3)* +; GFX908-NEXT: [[TMP3:%.*]] = atomicrmw fadd float addrspace(3)* [[TMP2]], float [[VAL:%.*]] syncscope("workgroup") seq_cst, align 4 +; GFX908-NEXT: br label [[ATOMICRMW_PHI:%.*]] +; GFX908: atomicrmw.check.private: +; GFX908-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(i8* [[TMP1]]) +; GFX908-NEXT: br i1 [[IS_PRIVATE]], label [[ATOMICRMW_PRIVATE:%.*]], label [[ATOMICRMW_GLOBAL:%.*]] +; GFX908: atomicrmw.private: +; GFX908-NEXT: [[TMP4:%.*]] = addrspacecast float* [[ADDR]] to float addrspace(5)* +; GFX908-NEXT: [[LOADED_PRIVATE:%.*]] = load float, float addrspace(5)* [[TMP4]], align 4 +; GFX908-NEXT: [[VAL_NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]] +; GFX908-NEXT: store float [[VAL_NEW]], float addrspace(5)* [[TMP4]], align 4 +; GFX908-NEXT: br label [[ATOMICRMW_PHI]] +; GFX908: atomicrmw.global: +; GFX908-NEXT: [[TMP5:%.*]] = addrspacecast float* [[ADDR]] to float addrspace(1)* +; GFX908-NEXT: [[TMP6:%.*]] = atomicrmw fadd float addrspace(1)* [[TMP5]], float [[VAL]] syncscope("workgroup") seq_cst, align 4 +; GFX908-NEXT: br label [[ATOMICRMW_PHI]] +; GFX908: atomicrmw.phi: +; GFX908-NEXT: [[LOADED_PHI:%.*]] = phi float [ [[TMP3]], [[ATOMICRMW_SHARED]] ], [ [[LOADED_PRIVATE]], [[ATOMICRMW_PRIVATE]] ], [ [[TMP6]], [[ATOMICRMW_GLOBAL]] ] +; GFX908-NEXT: br label [[ATOMICRMW_END:%.*]] +; GFX908: atomicrmw.end: +; GFX908-NEXT: ret void +; +; GFX90A-LABEL: @syncscope_workgroup_nortn( +; GFX90A-NEXT: [[TMP1:%.*]] = bitcast float* [[ADDR:%.*]] to i8* +; GFX90A-NEXT: br label [[ATOMICRMW_CHECK_SHARED:%.*]] +; GFX90A: atomicrmw.check.shared: +; GFX90A-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(i8* [[TMP1]]) +; GFX90A-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]] +; GFX90A: atomicrmw.shared: +; GFX90A-NEXT: [[TMP2:%.*]] = addrspacecast float* [[ADDR]] to float addrspace(3)* +; GFX90A-NEXT: [[TMP3:%.*]] = atomicrmw fadd float addrspace(3)* [[TMP2]], float [[VAL:%.*]] syncscope("workgroup") seq_cst, align 4 +; GFX90A-NEXT: br label [[ATOMICRMW_PHI:%.*]] +; GFX90A: atomicrmw.check.private: +; GFX90A-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(i8* [[TMP1]]) +; GFX90A-NEXT: br i1 [[IS_PRIVATE]], label [[ATOMICRMW_PRIVATE:%.*]], label [[ATOMICRMW_GLOBAL:%.*]] +; GFX90A: atomicrmw.private: +; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast float* [[ADDR]] to float addrspace(5)* +; GFX90A-NEXT: [[LOADED_PRIVATE:%.*]] = load float, float addrspace(5)* [[TMP4]], align 4 +; GFX90A-NEXT: [[VAL_NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]] +; GFX90A-NEXT: store float [[VAL_NEW]], float addrspace(5)* [[TMP4]], align 4 +; GFX90A-NEXT: br label [[ATOMICRMW_PHI]] +; GFX90A: atomicrmw.global: +; GFX90A-NEXT: [[TMP5:%.*]] = addrspacecast float* [[ADDR]] to float addrspace(1)* +; GFX90A-NEXT: [[TMP6:%.*]] = atomicrmw fadd float addrspace(1)* [[TMP5]], float [[VAL]] syncscope("workgroup") seq_cst, align 4 +; GFX90A-NEXT: br label [[ATOMICRMW_PHI]] +; GFX90A: atomicrmw.phi: +; GFX90A-NEXT: [[LOADED_PHI:%.*]] = phi float [ [[TMP3]], [[ATOMICRMW_SHARED]] ], [ [[LOADED_PRIVATE]], [[ATOMICRMW_PRIVATE]] ], [ [[TMP6]], [[ATOMICRMW_GLOBAL]] ] +; GFX90A-NEXT: br label [[ATOMICRMW_END:%.*]] +; GFX90A: atomicrmw.end: +; GFX90A-NEXT: ret void +; +; GFX940-LABEL: @syncscope_workgroup_nortn( +; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd float* [[ADDR:%.*]], float [[VAL:%.*]] syncscope("workgroup") seq_cst, align 4 +; GFX940-NEXT: ret void +; +; GFX1100-LABEL: @syncscope_workgroup_nortn( +; GFX1100-NEXT: [[RES:%.*]] = atomicrmw fadd float* [[ADDR:%.*]], float [[VAL:%.*]] syncscope("workgroup") seq_cst, align 4 +; GFX1100-NEXT: ret void +; +; GFX11-LABEL: @syncscope_workgroup_nortn( +; GFX11-NEXT: [[TMP1:%.*]] = load float, float* [[ADDR:%.*]], align 4 +; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX11: atomicrmw.start: +; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX11-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]] +; GFX11-NEXT: [[TMP2:%.*]] = bitcast float* [[ADDR]] to i32* +; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; GFX11-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] syncscope("workgroup") seq_cst seq_cst, align 4 +; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX11-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX11: atomicrmw.end: +; GFX11-NEXT: ret void + %res = atomicrmw fadd float* %addr, float %val syncscope("workgroup") seq_cst + ret void +} + +define float @no_unsafe(float* %addr, float %val) { +; GFX908-LABEL: @no_unsafe( +; GFX908-NEXT: [[TMP1:%.*]] = load float, float* [[ADDR:%.*]], align 4 +; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX908: atomicrmw.start: +; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]] +; GFX908-NEXT: [[TMP2:%.*]] = bitcast float* [[ADDR]] to i32* +; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; GFX908-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX908-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] syncscope("workgroup") seq_cst seq_cst, align 4 +; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX908-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX908: atomicrmw.end: +; GFX908-NEXT: ret float [[TMP6]] +; +; GFX90A-LABEL: @no_unsafe( +; GFX90A-NEXT: [[TMP1:%.*]] = load float, float* [[ADDR:%.*]], align 4 +; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX90A: atomicrmw.start: +; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]] +; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float* [[ADDR]] to i32* +; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] syncscope("workgroup") seq_cst seq_cst, align 4 +; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX90A: atomicrmw.end: +; GFX90A-NEXT: ret float [[TMP6]] +; +; GFX940-LABEL: @no_unsafe( +; GFX940-NEXT: [[TMP1:%.*]] = load float, float* [[ADDR:%.*]], align 4 +; GFX940-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX940: atomicrmw.start: +; GFX940-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX940-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]] +; GFX940-NEXT: [[TMP2:%.*]] = bitcast float* [[ADDR]] to i32* +; GFX940-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; GFX940-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX940-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] syncscope("workgroup") seq_cst seq_cst, align 4 +; GFX940-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX940-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX940-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX940-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX940: atomicrmw.end: +; GFX940-NEXT: ret float [[TMP6]] +; +; GFX1100-LABEL: @no_unsafe( +; GFX1100-NEXT: [[TMP1:%.*]] = load float, float* [[ADDR:%.*]], align 4 +; GFX1100-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX1100: atomicrmw.start: +; GFX1100-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX1100-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]] +; GFX1100-NEXT: [[TMP2:%.*]] = bitcast float* [[ADDR]] to i32* +; GFX1100-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; GFX1100-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX1100-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] syncscope("workgroup") seq_cst seq_cst, align 4 +; GFX1100-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX1100-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX1100-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX1100-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX1100: atomicrmw.end: +; GFX1100-NEXT: ret float [[TMP6]] +; +; GFX11-LABEL: @no_unsafe( +; GFX11-NEXT: [[TMP1:%.*]] = load float, float* [[ADDR:%.*]], align 4 +; GFX11-NEXT: br label [[ATOMICRMW_START:%.*]] +; GFX11: atomicrmw.start: +; GFX11-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; GFX11-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]] +; GFX11-NEXT: [[TMP2:%.*]] = bitcast float* [[ADDR]] to i32* +; GFX11-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 +; GFX11-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 +; GFX11-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] syncscope("workgroup") seq_cst seq_cst, align 4 +; GFX11-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; GFX11-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; GFX11-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float +; GFX11-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX11: atomicrmw.end: +; GFX11-NEXT: ret float [[TMP6]] + %res = atomicrmw fadd float* %addr, float %val syncscope("workgroup") seq_cst + ret float %res +} + +attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" } diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd.ll index 19ce5effd76c6..99bdde2a26301 100644 --- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd.ll +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd.ll @@ -263,21 +263,33 @@ define float @test_atomicrmw_fadd_f32_flat_unsafe(float* %ptr, float %value) #0 ; GFX908-NEXT: ret float [[TMP6]] ; ; GFX90A-LABEL: @test_atomicrmw_fadd_f32_flat_unsafe( -; GFX90A-NEXT: [[TMP1:%.*]] = load float, float* [[PTR:%.*]], align 4 -; GFX90A-NEXT: br label [[ATOMICRMW_START:%.*]] -; GFX90A: atomicrmw.start: -; GFX90A-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] -; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]] -; GFX90A-NEXT: [[TMP2:%.*]] = bitcast float* [[PTR]] to i32* -; GFX90A-NEXT: [[TMP3:%.*]] = bitcast float [[NEW]] to i32 -; GFX90A-NEXT: [[TMP4:%.*]] = bitcast float [[LOADED]] to i32 -; GFX90A-NEXT: [[TMP5:%.*]] = cmpxchg i32* [[TMP2]], i32 [[TMP4]], i32 [[TMP3]] syncscope("wavefront") monotonic monotonic, align 4 -; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 -; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 -; GFX90A-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to float -; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; GFX90A-NEXT: [[TMP1:%.*]] = bitcast float* [[PTR:%.*]] to i8* +; GFX90A-NEXT: br label [[ATOMICRMW_CHECK_SHARED:%.*]] +; GFX90A: atomicrmw.check.shared: +; GFX90A-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(i8* [[TMP1]]) +; GFX90A-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]] +; GFX90A: atomicrmw.shared: +; GFX90A-NEXT: [[TMP2:%.*]] = addrspacecast float* [[PTR]] to float addrspace(3)* +; GFX90A-NEXT: [[TMP3:%.*]] = atomicrmw fadd float addrspace(3)* [[TMP2]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4 +; GFX90A-NEXT: br label [[ATOMICRMW_PHI:%.*]] +; GFX90A: atomicrmw.check.private: +; GFX90A-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(i8* [[TMP1]]) +; GFX90A-NEXT: br i1 [[IS_PRIVATE]], label [[ATOMICRMW_PRIVATE:%.*]], label [[ATOMICRMW_GLOBAL:%.*]] +; GFX90A: atomicrmw.private: +; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast float* [[PTR]] to float addrspace(5)* +; GFX90A-NEXT: [[LOADED_PRIVATE:%.*]] = load float, float addrspace(5)* [[TMP4]], align 4 +; GFX90A-NEXT: [[VAL_NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VALUE]] +; GFX90A-NEXT: store float [[VAL_NEW]], float addrspace(5)* [[TMP4]], align 4 +; GFX90A-NEXT: br label [[ATOMICRMW_PHI]] +; GFX90A: atomicrmw.global: +; GFX90A-NEXT: [[TMP5:%.*]] = addrspacecast float* [[PTR]] to float addrspace(1)* +; GFX90A-NEXT: [[TMP6:%.*]] = atomicrmw fadd float addrspace(1)* [[TMP5]], float [[VALUE]] syncscope("wavefront") monotonic, align 4 +; GFX90A-NEXT: br label [[ATOMICRMW_PHI]] +; GFX90A: atomicrmw.phi: +; GFX90A-NEXT: [[LOADED_PHI:%.*]] = phi float [ [[TMP3]], [[ATOMICRMW_SHARED]] ], [ [[LOADED_PRIVATE]], [[ATOMICRMW_PRIVATE]] ], [ [[TMP6]], [[ATOMICRMW_GLOBAL]] ] +; GFX90A-NEXT: br label [[ATOMICRMW_END:%.*]] ; GFX90A: atomicrmw.end: -; GFX90A-NEXT: ret float [[TMP6]] +; GFX90A-NEXT: ret float [[LOADED_PHI]] ; ; GFX940-LABEL: @test_atomicrmw_fadd_f32_flat_unsafe( ; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd float* [[PTR:%.*]], float [[VALUE:%.*]] syncscope("wavefront") monotonic, align 4 @@ -912,6 +924,18 @@ define half @test_atomicrmw_fadd_f16_global_align4(half addrspace(1)* %ptr, half ; GFX908-LABEL: @test_atomicrmw_fadd_f16_global_align4( ; GFX908-NEXT: [[RES:%.*]] = atomicrmw fadd half addrspace(1)* [[PTR:%.*]], half [[VALUE:%.*]] seq_cst, align 4 ; GFX908-NEXT: ret half [[RES]] +; +; GFX90A-LABEL: @test_atomicrmw_fadd_f16_global_align4( +; GFX90A-NEXT: [[RES:%.*]] = atomicrmw fadd half addrspace(1)* [[PTR:%.*]], half [[VALUE:%.*]] seq_cst, align 4 +; GFX90A-NEXT: ret half [[RES]] +; +; GFX940-LABEL: @test_atomicrmw_fadd_f16_global_align4( +; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd half addrspace(1)* [[PTR:%.*]], half [[VALUE:%.*]] seq_cst, align 4 +; GFX940-NEXT: ret half [[RES]] +; +; GFX11-LABEL: @test_atomicrmw_fadd_f16_global_align4( +; GFX11-NEXT: [[RES:%.*]] = atomicrmw fadd half addrspace(1)* [[PTR:%.*]], half [[VALUE:%.*]] seq_cst, align 4 +; GFX11-NEXT: ret half [[RES]] ; %res = atomicrmw fadd half addrspace(1)* %ptr, half %value seq_cst, align 4 ret half %res diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll index d7f1dd125db4e..e1941d2aa75af 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll @@ -3,7 +3,7 @@ ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC define internal i32 @deref(i32* %x) nounwind { -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@deref ; CGSCC-SAME: (i32 [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -18,7 +18,7 @@ entry: } define i32 @f(i32 %x) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@f ; TUNIT-SAME: (i32 returned [[X:%.*]]) #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -26,7 +26,7 @@ define i32 @f(i32 %x) { ; TUNIT-NEXT: store i32 [[X]], i32* [[X_ADDR]], align 4 ; TUNIT-NEXT: ret i32 [[X]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@f ; CGSCC-SAME: (i32 [[X:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -42,9 +42,9 @@ entry: ret i32 %tmp1 } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { nounwind readonly willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { nounwind willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-07-02-array-indexing.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-07-02-array-indexing.ll index 7d374bfe835a1..553ba1c633de8 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-07-02-array-indexing.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-07-02-array-indexing.ll @@ -7,7 +7,7 @@ ; because there is a load of %A in the entry block define internal i32 @callee(i1 %C, i32* %A) { ; -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@callee ; CHECK-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -36,13 +36,13 @@ F: } define i32 @foo(i32* %A) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@foo ; TUNIT-SAME: (i32* nocapture nofree readonly [[A:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: [[X:%.*]] = call i32 @callee(i32* nocapture nofree readonly align 4 [[A]]) #[[ATTR1:[0-9]+]] ; TUNIT-NEXT: ret i32 [[X]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@foo ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: [[X:%.*]] = call i32 @callee(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]]) #[[ATTR2:[0-9]+]] @@ -53,10 +53,10 @@ define i32 @foo(i32* %A) { } ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR1]] = { nofree nosync nounwind readonly willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; TUNIT: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR1]] = { argmemonly nofree nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR2]] = { readonly willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(argmem: read) } +; CGSCC: attributes #[[ATTR2]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-09-07-CGUpdate.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-09-07-CGUpdate.ll index d4f6c99543433..87f38af7f8a8d 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-09-07-CGUpdate.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-09-07-CGUpdate.ll @@ -3,7 +3,7 @@ ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC define internal fastcc i32 @hash(i32* %ts, i32 %mod) nounwind { -; CGSCC: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse noreturn nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@hash ; CGSCC-SAME: () #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -14,13 +14,13 @@ entry: } define void @encode(i32* %m, i32* %ts, i32* %new) nounwind { -; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@encode ; TUNIT-SAME: (i32* nocapture nofree readnone [[M:%.*]], i32* nocapture nofree readnone [[TS:%.*]], i32* nocapture nofree readnone [[NEW:%.*]]) #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree noreturn nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree noreturn nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@encode ; CGSCC-SAME: (i32* nocapture nofree readnone [[M:%.*]], i32* nocapture nofree readnone [[TS:%.*]], i32* nocapture nofree readnone [[NEW:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -31,8 +31,8 @@ entry: unreachable } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse noreturn nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse noreturn nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse noreturn nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree noreturn nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse noreturn nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree noreturn nosync nounwind willreturn memory(none) } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-09-08-CGUpdateSelfEdge.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-09-08-CGUpdateSelfEdge.ll index f83383ffb2943..be4d00168b773 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-09-08-CGUpdateSelfEdge.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-09-08-CGUpdateSelfEdge.ll @@ -3,7 +3,7 @@ ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC define internal fastcc i32 @term_SharingList(i32* %Term, i32* %List) nounwind { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@term_SharingList ; CGSCC-SAME: () #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -25,7 +25,7 @@ bb5: ; preds = %entry } define i32 @term_Sharing(i32* %Term) nounwind { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@term_Sharing ; CHECK-SAME: (i32* nocapture nofree readnone [[TERM:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -46,5 +46,5 @@ bb14: ; preds = %entry ret i32 0 } ;. -; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll index 9c4839bd22293..59c3fcecb7bcf 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll @@ -7,7 +7,7 @@ target triple = "x86_64-unknown-linux-gnu" define internal fastcc void @no_promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) #0 { -; CHECK: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; CHECK: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: define {{[^@]+}}@no_promote_avx2 ; CHECK-SAME: (<4 x i64>* noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64>* noalias nocapture nofree noundef nonnull readonly align 32 dereferenceable(32) [[ARG1:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: bb: @@ -22,7 +22,7 @@ bb: } define void @no_promote(<4 x i64>* %arg) #1 { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn uwtable +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; TUNIT-LABEL: define {{[^@]+}}@no_promote ; TUNIT-SAME: (<4 x i64>* nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR1:[0-9]+]] { ; TUNIT-NEXT: bb: @@ -35,7 +35,7 @@ define void @no_promote(<4 x i64>* %arg) #1 { ; TUNIT-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn uwtable +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CGSCC-LABEL: define {{[^@]+}}@no_promote ; CGSCC-SAME: (<4 x i64>* nocapture nofree noundef nonnull writeonly align 2 dereferenceable(32) [[ARG:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: bb: @@ -60,7 +60,7 @@ bb: } define internal fastcc void @promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) #0 { -; CHECK: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; CHECK: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: define {{[^@]+}}@promote_avx2 ; CHECK-SAME: (<4 x i64>* noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64> [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: bb: @@ -77,7 +77,7 @@ bb: } define void @promote(<4 x i64>* %arg) #0 { -; TUNIT: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; TUNIT: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; TUNIT-LABEL: define {{[^@]+}}@promote ; TUNIT-SAME: (<4 x i64>* nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: bb: @@ -91,7 +91,7 @@ define void @promote(<4 x i64>* %arg) #0 { ; TUNIT-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; CGSCC: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CGSCC-LABEL: define {{[^@]+}}@promote ; CGSCC-SAME: (<4 x i64>* nocapture nofree noundef nonnull writeonly align 2 dereferenceable(32) [[ARG:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: bb: @@ -123,15 +123,15 @@ attributes #0 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2 attributes #1 = { nounwind uwtable } attributes #2 = { argmemonly nounwind } ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "target-features"="+avx2" } -; TUNIT: attributes #[[ATTR1]] = { argmemonly nofree norecurse nosync nounwind willreturn uwtable } -; TUNIT: attributes #[[ATTR2:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR3]] = { willreturn writeonly } +; TUNIT: attributes #[[ATTR0]] = { inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "target-features"="+avx2" } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable } +; TUNIT: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } +; TUNIT: attributes #[[ATTR3]] = { willreturn } ; TUNIT: attributes #[[ATTR4]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "target-features"="+avx2" } -; CGSCC: attributes #[[ATTR1]] = { argmemonly nofree nosync nounwind willreturn uwtable } -; CGSCC: attributes #[[ATTR2:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR3]] = { willreturn writeonly } +; CGSCC: attributes #[[ATTR0]] = { inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "target-features"="+avx2" } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(argmem: readwrite) uwtable } +; CGSCC: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } +; CGSCC: attributes #[[ATTR3]] = { willreturn } ; CGSCC: attributes #[[ATTR4]] = { nounwind willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll index 5cf7340a2f78b..660deb3cd8015 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll @@ -9,7 +9,7 @@ target triple = "x86_64-unknown-linux-gnu" ; This should promote define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 { ; -; CHECK: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; CHECK: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512 ; CHECK-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: bb: @@ -27,7 +27,7 @@ bb: define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg) #0 { ; -; TUNIT: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; TUNIT: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; TUNIT-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer512 ; TUNIT-SAME: (<8 x i64>* nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: bb: @@ -41,7 +41,7 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* ; TUNIT-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; CGSCC: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CGSCC-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer512 ; CGSCC-SAME: (<8 x i64>* nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: bb: @@ -69,7 +69,7 @@ bb: ; This should promote define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 { ; -; CHECK: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; CHECK: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256 ; CHECK-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: bb: @@ -87,7 +87,7 @@ bb: define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg) #1 { ; -; TUNIT: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; TUNIT: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; TUNIT-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer256 ; TUNIT-SAME: (<8 x i64>* nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: bb: @@ -101,7 +101,7 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; TUNIT-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; CGSCC: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CGSCC-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer256 ; CGSCC-SAME: (<8 x i64>* nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: bb: @@ -129,7 +129,7 @@ bb: ; This should promote define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 { ; -; CHECK: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; CHECK: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256 ; CHECK-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: bb: @@ -147,7 +147,7 @@ bb: define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg) #0 { ; -; TUNIT: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; TUNIT: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; TUNIT-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer256 ; TUNIT-SAME: (<8 x i64>* nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: bb: @@ -161,7 +161,7 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* ; TUNIT-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; CGSCC: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CGSCC-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer256 ; CGSCC-SAME: (<8 x i64>* nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: bb: @@ -189,7 +189,7 @@ bb: ; This should promote define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 { ; -; CHECK: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; CHECK: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512 ; CHECK-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: bb: @@ -207,7 +207,7 @@ bb: define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg) #1 { ; -; TUNIT: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; TUNIT: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; TUNIT-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer512 ; TUNIT-SAME: (<8 x i64>* nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: bb: @@ -221,7 +221,7 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* ; TUNIT-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; CGSCC: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CGSCC-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer512 ; CGSCC-SAME: (<8 x i64>* nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: bb: @@ -249,7 +249,7 @@ bb: ; This should not promote define internal fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 { ; -; CHECK: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; CHECK: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256 ; CHECK-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: bb: @@ -265,7 +265,7 @@ bb: define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg) #2 { ; -; TUNIT: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; TUNIT: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; TUNIT-LABEL: define {{[^@]+}}@avx512_legal256_prefer256_call_avx512_legal512_prefer256 ; TUNIT-SAME: (<8 x i64>* nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR2:[0-9]+]] { ; TUNIT-NEXT: bb: @@ -278,7 +278,7 @@ define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; TUNIT-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; CGSCC: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CGSCC-LABEL: define {{[^@]+}}@avx512_legal256_prefer256_call_avx512_legal512_prefer256 ; CGSCC-SAME: (<8 x i64>* nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR2:[0-9]+]] { ; CGSCC-NEXT: bb: @@ -305,7 +305,7 @@ bb: ; This should not promote define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #2 { ; -; CHECK: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; CHECK: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256 ; CHECK-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: bb: @@ -321,7 +321,7 @@ bb: define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %arg) #1 { ; -; TUNIT: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; TUNIT: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; TUNIT-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal256_prefer256 ; TUNIT-SAME: (<8 x i64>* nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: bb: @@ -334,7 +334,7 @@ define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* ; TUNIT-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; CGSCC: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CGSCC-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal256_prefer256 ; CGSCC-SAME: (<8 x i64>* nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: bb: @@ -361,7 +361,7 @@ bb: ; This should promote define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #3 { ; -; CHECK: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; CHECK: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256 ; CHECK-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] { ; CHECK-NEXT: bb: @@ -379,7 +379,7 @@ bb: define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg) #4 { ; -; TUNIT: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; TUNIT: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; TUNIT-LABEL: define {{[^@]+}}@avx2_legal256_prefer256_call_avx2_legal512_prefer256 ; TUNIT-SAME: (<8 x i64>* nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: bb: @@ -393,7 +393,7 @@ define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %ar ; TUNIT-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; CGSCC: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CGSCC-LABEL: define {{[^@]+}}@avx2_legal256_prefer256_call_avx2_legal512_prefer256 ; CGSCC-SAME: (<8 x i64>* nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: bb: @@ -421,7 +421,7 @@ bb: ; This should promote define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #4 { ; -; CHECK: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; CHECK: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256 ; CHECK-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: bb: @@ -439,7 +439,7 @@ bb: define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg) #3 { ; -; TUNIT: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; TUNIT: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; TUNIT-LABEL: define {{[^@]+}}@avx2_legal512_prefer256_call_avx2_legal256_prefer256 ; TUNIT-SAME: (<8 x i64>* nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: bb: @@ -453,7 +453,7 @@ define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %ar ; TUNIT-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; CGSCC: Function Attrs: inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CGSCC-LABEL: define {{[^@]+}}@avx2_legal512_prefer256_call_avx2_legal256_prefer256 ; CGSCC-SAME: (<8 x i64>* nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: bb: @@ -488,19 +488,19 @@ attributes #3 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2 attributes #4 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" "min-legal-vector-width"="256" "prefer-vector-width"="256" } attributes #5 = { argmemonly nounwind } ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="512" "target-features"="+avx512vl" } -; TUNIT: attributes #[[ATTR1]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx512vl" } -; TUNIT: attributes #[[ATTR2]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx512vl" } -; TUNIT: attributes #[[ATTR3]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx2" } -; TUNIT: attributes #[[ATTR4:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR5]] = { willreturn writeonly } +; TUNIT: attributes #[[ATTR0]] = { inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="512" "target-features"="+avx512vl" } +; TUNIT: attributes #[[ATTR1]] = { inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx512vl" } +; TUNIT: attributes #[[ATTR2]] = { inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx512vl" } +; TUNIT: attributes #[[ATTR3]] = { inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx2" } +; TUNIT: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } +; TUNIT: attributes #[[ATTR5]] = { willreturn } ; TUNIT: attributes #[[ATTR6]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="512" "target-features"="+avx512vl" } -; CGSCC: attributes #[[ATTR1]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx512vl" } -; CGSCC: attributes #[[ATTR2]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx512vl" } -; CGSCC: attributes #[[ATTR3]] = { argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx2" } -; CGSCC: attributes #[[ATTR4:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR5]] = { willreturn writeonly } +; CGSCC: attributes #[[ATTR0]] = { inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="512" "target-features"="+avx512vl" } +; CGSCC: attributes #[[ATTR1]] = { inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx512vl" } +; CGSCC: attributes #[[ATTR2]] = { inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx512vl" } +; CGSCC: attributes #[[ATTR3]] = { inlinehint nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx2" } +; CGSCC: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } +; CGSCC: attributes #[[ATTR5]] = { willreturn } ; CGSCC: attributes #[[ATTR6]] = { nounwind willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/aggregate-promote.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/aggregate-promote.ll index d08c230e79607..84aac94f429af 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/aggregate-promote.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/aggregate-promote.ll @@ -9,7 +9,7 @@ ; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = constant [[T:%.*]] { i32 0, i32 0, i32 17, i32 25 } ;. define internal i32 @test(%T* %p) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test ; CGSCC-SAME: () #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -26,13 +26,13 @@ entry: define i32 @caller() { ; -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@caller ; TUNIT-SAME: () #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: ret i32 42 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@caller ; CGSCC-SAME: () #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -44,9 +44,9 @@ entry: ret i32 %v } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll index 0d3e26e4b3aeb..31fc10745c1c6 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll @@ -40,7 +40,7 @@ declare void @z(i32) ; Test2 ; Different alignemnt privatizable arguments define internal i32 @test(i32* %X, i64* %Y) { -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@test ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[X:%.*]], i64 [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: [[Y_PRIV:%.*]] = alloca i64, align 8 @@ -69,7 +69,7 @@ Return2: } define internal i32 @caller(i32* %A) { -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@caller ; CGSCC-SAME: (i32 [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: [[A_PRIV:%.*]] = alloca i32, align 4 @@ -84,13 +84,13 @@ define internal i32 @caller(i32* %A) { } define i32 @callercaller() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@callercaller ; TUNIT-SAME: () #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: [[B:%.*]] = alloca i32, align 4 ; TUNIT-NEXT: ret i32 3 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@callercaller ; CGSCC-SAME: () #[[ATTR2:[0-9]+]] { ; CGSCC-NEXT: [[X:%.*]] = call i32 @caller(i32 noundef 2) #[[ATTR4:[0-9]+]] @@ -102,11 +102,11 @@ define i32 @callercaller() { ret i32 %X } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR1]] = { argmemonly nofree nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR3]] = { readonly willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR3]] = { willreturn memory(read) } ; CGSCC: attributes #[[ATTR4]] = { nounwind willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll index 26d738a59469f..13c4ecba9cc66 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll @@ -7,7 +7,7 @@ ; Don't drop 'byval' on %X here. define internal i32 @f(%struct.ss* byval(%struct.ss) %b, i32* byval(i32) %X, i32 %i) nounwind { ; -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@f ; CHECK-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]], i32 [[TMP2:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -43,7 +43,7 @@ entry: ; Also make sure we don't drop the call zeroext attribute. define i32 @test(i32* %X) { ; -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@test ; TUNIT-SAME: (i32* nocapture nofree readonly [[X:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: entry: @@ -59,7 +59,7 @@ define i32 @test(i32* %X) { ; TUNIT-NEXT: [[C:%.*]] = call i32 @f(i32 [[TMP0]], i64 [[TMP1]], i32 [[TMP2]]) #[[ATTR1:[0-9]+]] ; TUNIT-NEXT: ret i32 [[C]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@test ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -82,10 +82,10 @@ entry: ret i32 %c } ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } ; TUNIT: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR1]] = { argmemonly nofree nosync nounwind willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(argmem: readwrite) } ; CGSCC: attributes #[[ATTR2]] = { nounwind willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll index 457e53f1ab45d..e8aea37d37f95 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll @@ -4,7 +4,7 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" define internal i32 @test(i32* %X, i32* %Y) { -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@test ; CGSCC-SAME: (i32 [[TMP0:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[Y:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: [[X_PRIV:%.*]] = alloca i32, align 4 @@ -21,7 +21,7 @@ define internal i32 @test(i32* %X, i32* %Y) { } define internal i32 @caller(i32* %B) { -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@caller ; CGSCC-SAME: (i32 [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: [[B_PRIV:%.*]] = alloca i32, align 4 @@ -36,13 +36,13 @@ define internal i32 @caller(i32* %B) { } define i32 @callercaller() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@callercaller ; TUNIT-SAME: () #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: [[B:%.*]] = alloca i32, align 4 ; TUNIT-NEXT: ret i32 3 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@callercaller ; CGSCC-SAME: () #[[ATTR2:[0-9]+]] { ; CGSCC-NEXT: [[X:%.*]] = call i32 @caller(i32 noundef 2) #[[ATTR4:[0-9]+]] @@ -55,11 +55,11 @@ define i32 @callercaller() { } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR1]] = { argmemonly nofree nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR3]] = { readonly willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR3]] = { willreturn memory(read) } ; CGSCC: attributes #[[ATTR4]] = { nounwind willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll index 3e15ba83fef80..e6b145c5f3902 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll @@ -5,7 +5,7 @@ %struct.ss = type { i32, i64 } define internal void @f(%struct.ss* byval(%struct.ss) %b, i32* byval(i32) %X) nounwind { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@f ; CHECK-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]], i32 [[TMP2:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -35,7 +35,7 @@ entry: define i32 @test(i32* %X) { ; -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@test ; TUNIT-SAME: (i32* nocapture nofree readonly [[X:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: entry: @@ -51,7 +51,7 @@ define i32 @test(i32* %X) { ; TUNIT-NEXT: call void @f(i32 [[TMP0]], i64 [[TMP1]], i32 [[TMP2]]) #[[ATTR1:[0-9]+]] ; TUNIT-NEXT: ret i32 0 ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@test ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -72,10 +72,10 @@ entry: ret i32 0 } ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } ; TUNIT: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR1]] = { argmemonly nofree nosync nounwind willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(argmem: readwrite) } ; CGSCC: attributes #[[ATTR2]] = { nounwind willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll index c14aacf6011a2..e27904ebd3113 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll @@ -7,7 +7,7 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1 %struct.ss = type { i32, i64 } define internal i32 @f(%struct.ss* byval(%struct.ss) %b) nounwind { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@f ; CHECK-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -32,7 +32,7 @@ entry: define internal i32 @g(%struct.ss* byval(%struct.ss) align 32 %b) nounwind { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@g ; CHECK-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -57,7 +57,7 @@ entry: define i32 @main() nounwind { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@main ; TUNIT-SAME: () #[[ATTR1:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -65,20 +65,20 @@ define i32 @main() nounwind { ; TUNIT-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 ; TUNIT-NEXT: store i32 1, i32* [[TMP1]], align 8 ; TUNIT-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 -; TUNIT-NEXT: [[S_CAST1:%.*]] = bitcast %struct.ss* [[S]] to i32* -; TUNIT-NEXT: [[TMP0:%.*]] = load i32, i32* [[S_CAST1]], align 8 -; TUNIT-NEXT: [[S_0_12:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i64 0, i32 1 -; TUNIT-NEXT: [[TMP1:%.*]] = load i64, i64* [[S_0_12]], align 8 -; TUNIT-NEXT: [[C0:%.*]] = call i32 @f(i32 [[TMP0]], i64 [[TMP1]]) #[[ATTR2:[0-9]+]] ; TUNIT-NEXT: [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32* -; TUNIT-NEXT: [[TMP2:%.*]] = load i32, i32* [[S_CAST]], align 32 +; TUNIT-NEXT: [[TMP0:%.*]] = load i32, i32* [[S_CAST]], align 8 ; TUNIT-NEXT: [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i64 0, i32 1 -; TUNIT-NEXT: [[TMP3:%.*]] = load i64, i64* [[S_0_1]], align 32 +; TUNIT-NEXT: [[TMP1:%.*]] = load i64, i64* [[S_0_1]], align 8 +; TUNIT-NEXT: [[C0:%.*]] = call i32 @f(i32 [[TMP0]], i64 [[TMP1]]) #[[ATTR2:[0-9]+]] +; TUNIT-NEXT: [[S_CAST1:%.*]] = bitcast %struct.ss* [[S]] to i32* +; TUNIT-NEXT: [[TMP2:%.*]] = load i32, i32* [[S_CAST1]], align 32 +; TUNIT-NEXT: [[S_0_12:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i64 0, i32 1 +; TUNIT-NEXT: [[TMP3:%.*]] = load i64, i64* [[S_0_12]], align 32 ; TUNIT-NEXT: [[C1:%.*]] = call i32 @g(i32 [[TMP2]], i64 [[TMP3]]) #[[ATTR2]] ; TUNIT-NEXT: [[A:%.*]] = add i32 [[C0]], [[C1]] ; TUNIT-NEXT: ret i32 [[A]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@main ; CGSCC-SAME: () #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -104,11 +104,11 @@ entry: ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } ; TUNIT: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } ; CGSCC: attributes #[[ATTR2]] = { nounwind willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/chained.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/chained.ll index 0ed328d0a6cf5..2a6d3dc3378a1 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/chained.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/chained.ll @@ -11,7 +11,7 @@ ;. define internal i32 @test(i32** %x) { ; -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -25,14 +25,14 @@ entry: } define i32 @caller() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@caller ; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[X:%.*]] = call i32 @test() #[[ATTR1:[0-9]+]] ; TUNIT-NEXT: ret i32 [[X]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@caller ; CGSCC-SAME: () #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -45,10 +45,10 @@ entry: } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow.ll index bf8441ebfec2c..44ce05fd82363 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow.ll @@ -4,7 +4,7 @@ ; Don't promote around control flow. define internal i32 @callee(i1 %C, i32* %P) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@callee ; CHECK-SAME: (i1 [[C:%.*]], i32* nocapture nofree readonly [[P:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -27,14 +27,14 @@ F: } define i32 @foo(i1 %C, i32* %P) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@foo ; TUNIT-SAME: (i1 [[C:%.*]], i32* nocapture nofree readonly [[P:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[X:%.*]] = call i32 @callee(i1 [[C]], i32* nocapture nofree readonly [[P]]) #[[ATTR1:[0-9]+]] ; TUNIT-NEXT: ret i32 [[X]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@foo ; CGSCC-SAME: (i1 [[C:%.*]], i32* nocapture nofree readonly [[P:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -47,10 +47,10 @@ entry: } ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR1]] = { nofree nosync nounwind readonly willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; TUNIT: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR1]] = { argmemonly nofree nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR2]] = { readonly willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(argmem: read) } +; CGSCC: attributes #[[ATTR2]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll index ab2cb25792f39..0386d1fe9808f 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll @@ -5,7 +5,7 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" define internal i32 @callee(i1 %C, i32* %P) { -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@callee ; CGSCC-SAME: (i32 [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: [[P_PRIV:%.*]] = alloca i32, align 4 @@ -28,13 +28,13 @@ F: ; preds = %0 } define i32 @foo() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@foo ; TUNIT-SAME: () #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: [[A:%.*]] = alloca i32, align 4 ; TUNIT-NEXT: ret i32 17 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@foo ; CGSCC-SAME: () #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: [[X:%.*]] = call i32 @callee(i32 noundef 17) #[[ATTR2:[0-9]+]] @@ -47,9 +47,9 @@ define i32 @foo() { } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { readonly willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll index c26c758e66060..7afc19674ae34 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll @@ -7,7 +7,7 @@ ; Inlining should nuke the invoke (and any inlined calls) here even with ; argument promotion running along with it. define void @zot() personality i32 (...)* @wibble { -; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@zot ; TUNIT-SAME: () #[[ATTR0:[0-9]+]] personality i32 (...)* @wibble { ; TUNIT-NEXT: bb: @@ -18,7 +18,7 @@ define void @zot() personality i32 (...)* @wibble { ; TUNIT: bb2: ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree noreturn nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree noreturn nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@zot ; CGSCC-SAME: () #[[ATTR0:[0-9]+]] personality i32 (...)* @wibble { ; CGSCC-NEXT: bb: @@ -43,13 +43,13 @@ bb2: } define internal void @hoge() { -; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@hoge ; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: bb: ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree noreturn nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree noreturn nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@hoge ; CGSCC-SAME: () #[[ATTR0]] { ; CGSCC-NEXT: bb: @@ -62,7 +62,7 @@ bb: } define internal fastcc i8* @spam(i1 (i8*)* %arg) { -; CGSCC: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse noreturn nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@spam ; CGSCC-SAME: () #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: bb: @@ -85,7 +85,7 @@ bb: } define internal i1 @barney(i8* %arg) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@barney ; CGSCC-SAME: () #[[ATTR2:[0-9]+]] { ; CGSCC-NEXT: bb: @@ -96,13 +96,13 @@ bb: } define i32 @test_inf_promote_caller(i32 %arg) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@test_inf_promote_caller ; TUNIT-SAME: (i32 [[ARG:%.*]]) #[[ATTR1:[0-9]+]] { ; TUNIT-NEXT: bb: ; TUNIT-NEXT: ret i32 0 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test_inf_promote_caller ; CGSCC-SAME: (i32 [[ARG:%.*]]) #[[ATTR3:[0-9]+]] { ; CGSCC-NEXT: bb: @@ -119,7 +119,7 @@ bb: } define internal i32 @test_inf_promote_callee(%S* %arg, %S* %arg1) { -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test_inf_promote_callee ; CGSCC-SAME: () #[[ATTR3]] { ; CGSCC-NEXT: bb: @@ -137,13 +137,13 @@ bb: declare i32 @wibble(...) ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse noreturn nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR2]] = { noreturn nounwind readnone } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse noreturn nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR2]] = { noreturn nounwind } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree noreturn nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree norecurse noreturn nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR3]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR4]] = { noreturn nounwind readnone } +; CGSCC: attributes #[[ATTR0]] = { nofree noreturn nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse noreturn nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR3]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR4]] = { noreturn nounwind } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll index ad98b8a4eb562..bb62017e2aa34 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll @@ -19,13 +19,13 @@ target triple = "x86_64-unknown-linux-gnu" ;. define void @run() { ; -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@run ; TUNIT-SAME: () #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@run ; CGSCC-SAME: () #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -40,7 +40,7 @@ entry: } define internal i8 @UseLongDoubleUnsafely(%union.u* byval(%union.u) align 16 %arg) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@UseLongDoubleUnsafely ; CGSCC-SAME: () #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -54,7 +54,7 @@ entry: } define internal x86_fp80 @UseLongDoubleSafely(%union.u* byval(%union.u) align 16 %arg) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@UseLongDoubleSafely ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: ret x86_fp80 undef @@ -65,7 +65,7 @@ define internal x86_fp80 @UseLongDoubleSafely(%union.u* byval(%union.u) align 16 } define internal i64 @AccessPaddingOfStruct(%struct.Foo* byval(%struct.Foo) %a) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@AccessPaddingOfStruct ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: ret i64 undef @@ -76,7 +76,7 @@ define internal i64 @AccessPaddingOfStruct(%struct.Foo* byval(%struct.Foo) %a) { } define internal i64 @CaptureAStruct(%struct.Foo* byval(%struct.Foo) %a) { -; CGSCC: Function Attrs: nofree norecurse noreturn nosync nounwind readnone +; CGSCC: Function Attrs: nofree norecurse noreturn nosync nounwind memory(none) ; CGSCC-LABEL: define {{[^@]+}}@CaptureAStruct ; CGSCC-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -104,9 +104,9 @@ loop: br label %loop } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { nofree norecurse noreturn nosync nounwind readnone } +; CGSCC: attributes #[[ATTR0]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { nofree norecurse noreturn nosync nounwind memory(none) } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll index 30ecb4450596c..01f537744a452 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll @@ -8,7 +8,7 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1 ; Argpromote + sroa should change this to passing the two integers by value. define internal i32 @f(%struct.ss* inalloca(%struct.ss) %s) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@f ; CHECK-SAME: (%struct.ss* noalias nocapture nofree noundef nonnull inalloca([[STRUCT_SS:%.*]]) align 4 dereferenceable(8) [[S:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -29,7 +29,7 @@ entry: } define i32 @main() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@main ; TUNIT-SAME: () #[[ATTR1:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -41,7 +41,7 @@ define i32 @main() { ; TUNIT-NEXT: [[R:%.*]] = call i32 @f(%struct.ss* noalias nocapture nofree noundef nonnull inalloca([[STRUCT_SS]]) align 4 dereferenceable(8) [[S]]) #[[ATTR2:[0-9]+]] ; TUNIT-NEXT: ret i32 [[R]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@main ; CGSCC-SAME: () #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -65,7 +65,7 @@ entry: ; Argpromote can't promote %a because of the icmp use. define internal i1 @g(%struct.ss* %a, %struct.ss* inalloca(%struct.ss) %b) nounwind { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@g ; CGSCC-SAME: (%struct.ss* noalias nocapture nofree nonnull readnone align 4 dereferenceable(8) [[A:%.*]], %struct.ss* noalias nocapture nofree nonnull writeonly inalloca([[STRUCT_SS:%.*]]) align 4 dereferenceable(8) [[B:%.*]]) #[[ATTR2:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -77,13 +77,13 @@ entry: } define i32 @test() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@test ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: ret i32 0 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: entry: @@ -95,12 +95,12 @@ entry: ret i32 0 } ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR2]] = { nofree nosync nounwind readonly willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR3]] = { readonly willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR3]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/invalidation.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/invalidation.ll index c23760a146f36..4446607a08fae 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/invalidation.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/invalidation.ll @@ -21,7 +21,7 @@ entry: } define i32 @b() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@b ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -33,7 +33,7 @@ entry: } define i32 @c() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@c ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -46,5 +46,5 @@ entry: ret i32 %result } ;. -; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll index 8d383f8c9f94c..8b878da52962b 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll @@ -13,7 +13,7 @@ define internal void @dead() { } define internal i32 @test(i32* %X, i32* %Y) { -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@test ; CGSCC-SAME: (i32* noalias nocapture nofree noundef writeonly align 4 [[X:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: br i1 true, label [[LIVE:%.*]], label [[DEAD:%.*]] @@ -34,7 +34,7 @@ dead: } define internal i32 @caller(i32* %B) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@caller ; CGSCC-SAME: () #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: [[A:%.*]] = alloca i32, align 4 @@ -48,13 +48,13 @@ define internal i32 @caller(i32* %B) { } define i32 @callercaller() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@callercaller ; TUNIT-SAME: () #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: [[B:%.*]] = alloca i32, align 4 ; TUNIT-NEXT: ret i32 0 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@callercaller ; CGSCC-SAME: () #[[ATTR2:[0-9]+]] { ; CGSCC-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -68,11 +68,11 @@ define i32 @callercaller() { } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR3]] = { nofree nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR4]] = { readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR3]] = { nofree nosync nounwind willreturn memory(write) } +; CGSCC: attributes #[[ATTR4]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll index a2f862cf2cf0a..c65dfa45b305e 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll @@ -13,7 +13,7 @@ define internal void @dead() { } define internal i32 @test(i32* %X, i32* %Y) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@test ; CHECK-SAME: (i32* noalias nocapture nofree noundef writeonly align 4 [[X:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: br i1 true, label [[LIVE:%.*]], label [[DEAD:%.*]] @@ -34,14 +34,14 @@ dead: } define internal i32 @caller(i32* %B) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@caller ; TUNIT-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: [[A:%.*]] = alloca i32, align 4 ; TUNIT-NEXT: [[C:%.*]] = call i32 @test(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B]]) #[[ATTR2:[0-9]+]] ; TUNIT-NEXT: ret i32 undef ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@caller ; CGSCC-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: [[A:%.*]] = alloca i32, align 4 @@ -55,14 +55,14 @@ define internal i32 @caller(i32* %B) { } define i32 @callercaller() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@callercaller ; TUNIT-SAME: () #[[ATTR1:[0-9]+]] { ; TUNIT-NEXT: [[B:%.*]] = alloca i32, align 4 ; TUNIT-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B]]) #[[ATTR2]] ; TUNIT-NEXT: ret i32 0 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@callercaller ; CGSCC-SAME: () #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -77,12 +77,12 @@ define i32 @callercaller() { } ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn writeonly } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR3]] = { nounwind willreturn writeonly } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn memory(write) } +; CGSCC: attributes #[[ATTR3]] = { nounwind willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/musttail.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/musttail.ll index e913052a81279..a23e39156e7c8 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/musttail.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/musttail.ll @@ -8,7 +8,7 @@ %T = type { i32, i32, i32, i32 } define internal i32 @test(%T* %p) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@test ; CHECK-SAME: (%T* nocapture nofree readonly [[P:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr [[T:%.*]], %T* [[P]], i64 0, i32 3 @@ -27,13 +27,13 @@ define internal i32 @test(%T* %p) { } define i32 @caller(%T* %p) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@caller ; TUNIT-SAME: (%T* nocapture nofree readonly [[P:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: [[V:%.*]] = musttail call i32 @test(%T* nocapture nofree readonly [[P]]) #[[ATTR4:[0-9]+]] ; TUNIT-NEXT: ret i32 [[V]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@caller ; CGSCC-SAME: (%T* nocapture nofree readonly [[P:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: [[V:%.*]] = musttail call i32 @test(%T* nocapture nofree readonly [[P]]) #[[ATTR5:[0-9]+]] @@ -46,12 +46,12 @@ define i32 @caller(%T* %p) { ; Don't promote arguments of musttail caller define i32 @foo(%T* %p, i32 %v) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@foo ; TUNIT-SAME: (%T* nocapture nofree readnone [[P:%.*]], i32 [[V:%.*]]) #[[ATTR1:[0-9]+]] { ; TUNIT-NEXT: ret i32 0 ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@foo ; CGSCC-SAME: (%T* nocapture nofree readnone [[P:%.*]], i32 [[V:%.*]]) #[[ATTR2:[0-9]+]] { ; CGSCC-NEXT: ret i32 0 @@ -60,7 +60,7 @@ define i32 @foo(%T* %p, i32 %v) { } define internal i32 @test2(%T* %p, i32 %p2) { -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@test2 ; CGSCC-SAME: (%T* nocapture nofree readonly [[P:%.*]], i32 [[P2:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: [[A_GEP:%.*]] = getelementptr [[T:%.*]], %T* [[P]], i64 0, i32 3 @@ -68,7 +68,7 @@ define internal i32 @test2(%T* %p, i32 %p2) { ; CGSCC-NEXT: [[A:%.*]] = load i32, i32* [[A_GEP]], align 4 ; CGSCC-NEXT: [[B:%.*]] = load i32, i32* [[B_GEP]], align 4 ; CGSCC-NEXT: [[V:%.*]] = add i32 [[A]], [[B]] -; CGSCC-NEXT: [[CA:%.*]] = musttail call noundef i32 @foo(%T* undef, i32 [[V]]) #[[ATTR6:[0-9]+]] +; CGSCC-NEXT: [[CA:%.*]] = musttail call noundef i32 @foo(%T* undef, i32 [[V]]) #[[ATTR5]] ; CGSCC-NEXT: ret i32 [[CA]] ; %a.gep = getelementptr %T, %T* %p, i64 0, i32 3 @@ -81,12 +81,12 @@ define internal i32 @test2(%T* %p, i32 %p2) { } define i32 @caller2(%T* %g) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@caller2 ; TUNIT-SAME: (%T* nocapture nofree readnone [[G:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: ret i32 0 ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@caller2 ; CGSCC-SAME: (%T* nocapture nofree readonly align 4 [[G:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: [[V:%.*]] = call noundef i32 @test2(%T* nocapture nofree readonly [[G]], i32 noundef 0) #[[ATTR5]] @@ -101,14 +101,14 @@ define i32 @caller2(%T* %g) { ; is kept as well. define i32 @bar(%T* %p, i32 %v) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@bar ; TUNIT-SAME: (%T* nocapture nofree nonnull writeonly dereferenceable(4) [[P:%.*]], i32 [[V:%.*]]) #[[ATTR2:[0-9]+]] { ; TUNIT-NEXT: [[I32PTR:%.*]] = getelementptr [[T:%.*]], %T* [[P]], i64 0, i32 0 ; TUNIT-NEXT: store i32 [[V]], i32* [[I32PTR]], align 4 ; TUNIT-NEXT: ret i32 0 ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@bar ; CGSCC-SAME: (%T* nocapture nofree nonnull writeonly dereferenceable(4) [[P:%.*]], i32 [[V:%.*]]) #[[ATTR3:[0-9]+]] { ; CGSCC-NEXT: [[I32PTR:%.*]] = getelementptr [[T:%.*]], %T* [[P]], i64 0, i32 0 @@ -121,7 +121,7 @@ define i32 @bar(%T* %p, i32 %v) { } define internal i32 @test2b(%T* %p, i32 %p2) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@test2b ; TUNIT-SAME: (%T* nocapture nofree readonly [[P:%.*]], i32 [[P2:%.*]]) #[[ATTR3:[0-9]+]] { ; TUNIT-NEXT: [[A_GEP:%.*]] = getelementptr [[T:%.*]], %T* [[P]], i64 0, i32 3 @@ -129,10 +129,10 @@ define internal i32 @test2b(%T* %p, i32 %p2) { ; TUNIT-NEXT: [[A:%.*]] = load i32, i32* [[A_GEP]], align 4 ; TUNIT-NEXT: [[B:%.*]] = load i32, i32* [[B_GEP]], align 4 ; TUNIT-NEXT: [[V:%.*]] = add i32 [[A]], [[B]] -; TUNIT-NEXT: [[CA:%.*]] = musttail call noundef i32 @bar(%T* undef, i32 [[V]]) #[[ATTR5:[0-9]+]] +; TUNIT-NEXT: [[CA:%.*]] = musttail call noundef i32 @bar(%T* undef, i32 [[V]]) #[[ATTR4]] ; TUNIT-NEXT: ret i32 [[CA]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@test2b ; CGSCC-SAME: (%T* nocapture nofree readonly [[P:%.*]], i32 [[P2:%.*]]) #[[ATTR4:[0-9]+]] { ; CGSCC-NEXT: [[A_GEP:%.*]] = getelementptr [[T:%.*]], %T* [[P]], i64 0, i32 3 @@ -140,7 +140,7 @@ define internal i32 @test2b(%T* %p, i32 %p2) { ; CGSCC-NEXT: [[A:%.*]] = load i32, i32* [[A_GEP]], align 4 ; CGSCC-NEXT: [[B:%.*]] = load i32, i32* [[B_GEP]], align 4 ; CGSCC-NEXT: [[V:%.*]] = add i32 [[A]], [[B]] -; CGSCC-NEXT: [[CA:%.*]] = musttail call noundef i32 @bar(%T* undef, i32 [[V]]) #[[ATTR7:[0-9]+]] +; CGSCC-NEXT: [[CA:%.*]] = musttail call noundef i32 @bar(%T* undef, i32 [[V]]) #[[ATTR6:[0-9]+]] ; CGSCC-NEXT: ret i32 [[CA]] ; %a.gep = getelementptr %T, %T* %p, i64 0, i32 3 @@ -153,37 +153,34 @@ define internal i32 @test2b(%T* %p, i32 %p2) { } define i32 @caller2b(%T* %g) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@caller2b ; TUNIT-SAME: (%T* nocapture nofree readonly [[G:%.*]]) #[[ATTR3]] { -; TUNIT-NEXT: [[V:%.*]] = call noundef i32 @test2b(%T* nocapture nofree readonly [[G]], i32 undef) #[[ATTR6:[0-9]+]] +; TUNIT-NEXT: [[V:%.*]] = call noundef i32 @test2b(%T* nocapture nofree readonly [[G]], i32 undef) #[[ATTR4]] ; TUNIT-NEXT: ret i32 [[V]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@caller2b ; CGSCC-SAME: (%T* nocapture nofree readonly align 4 [[G:%.*]]) #[[ATTR4]] { -; CGSCC-NEXT: [[V:%.*]] = call noundef i32 @test2b(%T* nocapture nofree readonly [[G]], i32 noundef 0) #[[ATTR8:[0-9]+]] +; CGSCC-NEXT: [[V:%.*]] = call noundef i32 @test2b(%T* nocapture nofree readonly [[G]], i32 noundef 0) #[[ATTR7:[0-9]+]] ; CGSCC-NEXT: ret i32 [[V]] ; %v = call i32 @test2b(%T* %g, i32 0) ret i32 %v } ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR2]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR3]] = { argmemonly nofree norecurse nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR4]] = { nofree nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR5]] = { nofree nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR6]] = { nofree nosync nounwind willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; TUNIT: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } +; TUNIT: attributes #[[ATTR4]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR1]] = { argmemonly nofree nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR3]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR4]] = { argmemonly nofree nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR5]] = { readonly willreturn } -; CGSCC: attributes #[[ATTR6]] = { readnone willreturn } -; CGSCC: attributes #[[ATTR7]] = { nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR8]] = { nounwind willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(argmem: read) } +; CGSCC: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; CGSCC: attributes #[[ATTR4]] = { nofree nosync nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR5]] = { willreturn } +; CGSCC: attributes #[[ATTR6]] = { nounwind willreturn memory(write) } +; CGSCC: attributes #[[ATTR7]] = { nounwind willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll index c7182fbd3f5d6..bc5d660beb7f3 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll @@ -12,7 +12,7 @@ %fun_t = type void (%p_t)* define void @foo() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@foo ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[TMP:%.*]] = alloca void (i16*)*, align 8 @@ -24,7 +24,7 @@ define void @foo() { } define internal void @bar(%p_t %p) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@bar ; CGSCC-SAME: (i16* nocapture nofree readnone [[P:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: call void @llvm.dbg.value(metadata i16* [[P]], metadata [[META3:![0-9]+]], metadata !DIExpression()) #[[ATTR2:[0-9]+]], !dbg [[DBG5:![0-9]+]] @@ -47,12 +47,12 @@ declare void @llvm.dbg.value(metadata, metadata, metadata) !5 = !DIExpression() !6 = !DILocation(line: 1, column: 1, scope: !3) ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind readnone speculatable willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind readnone speculatable willreturn } -; CGSCC: attributes #[[ATTR2]] = { readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { willreturn } ;. ; TUNIT: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C, file: !1, isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug) ; TUNIT: [[META1:![0-9]+]] = !DIFile(filename: "test.c", directory: "") diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/sret.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/sret.ll index b9e8031e5b7b2..1ddebf1fe47a2 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/sret.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/sret.ll @@ -7,13 +7,13 @@ target triple = "x86_64-pc-windows-msvc" define internal void @add({i32, i32}* %this, i32* sret(i32) %r) { ; -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@add ; TUNIT-SAME: ({ i32, i32 }* noalias nocapture nofree nonnull readnone align 8 dereferenceable(8) [[THIS:%.*]], i32* noalias nocapture nofree noundef nonnull writeonly sret(i32) align 4 dereferenceable(4) [[R:%.*]]) #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: store i32 undef, i32* [[R]], align 4 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@add ; CGSCC-SAME: ({ i32, i32 }* noalias nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[THIS:%.*]], i32* noalias nocapture nofree noundef nonnull writeonly sret(i32) align 4 dereferenceable(4) [[R:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: [[AP:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[THIS]], i32 0, i32 0 @@ -34,14 +34,14 @@ define internal void @add({i32, i32}* %this, i32* sret(i32) %r) { } define void @f() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@f ; TUNIT-SAME: () #[[ATTR1:[0-9]+]] { ; TUNIT-NEXT: [[R:%.*]] = alloca i32, align 4 ; TUNIT-NEXT: call void @add({ i32, i32 }* noalias nocapture nofree nonnull readnone align 8 dereferenceable(8) undef, i32* noalias nocapture nofree noundef nonnull writeonly sret(i32) align 4 dereferenceable(4) [[R]]) #[[ATTR2:[0-9]+]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@f ; CGSCC-SAME: () #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: [[R:%.*]] = alloca i32, align 4 @@ -56,11 +56,11 @@ define void @f() { ret void } ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn writeonly } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } ; CGSCC: attributes #[[ATTR2]] = { nounwind willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll b/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll index dfa45b12b1d50..8992fb1e991a3 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll @@ -45,7 +45,7 @@ return: ; preds = %entry } define internal i32 @vfu2(%struct.MYstr* byval(%struct.MYstr) align 4 %u) nounwind readonly { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@vfu2 ; CHECK-SAME: (i8 [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: @@ -86,7 +86,7 @@ define i32 @unions() nounwind { ; TUNIT-NEXT: [[TMP2:%.*]] = load i8, i8* [[MYSTR_CAST1]], align 8 ; TUNIT-NEXT: [[MYSTR_0_12:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* @mystr, i64 0, i32 1 ; TUNIT-NEXT: [[TMP3:%.*]] = load i32, i32* [[MYSTR_0_12]], align 8 -; TUNIT-NEXT: [[RESULT:%.*]] = call i32 @vfu2(i8 [[TMP2]], i32 [[TMP3]]) #[[ATTR2:[0-9]+]] +; TUNIT-NEXT: [[RESULT:%.*]] = call i32 @vfu2(i8 [[TMP2]], i32 [[TMP3]]) #[[ATTR0]] ; TUNIT-NEXT: ret i32 [[RESULT]] ; ; CGSCC: Function Attrs: nounwind @@ -110,7 +110,7 @@ entry: } define internal i32 @vfu2_v2(%struct.MYstr* byval(%struct.MYstr) align 4 %u) nounwind readonly { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@vfu2_v2 ; CHECK-SAME: (i8 [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: entry: @@ -155,7 +155,7 @@ define i32 @unions_v2() nounwind { ; TUNIT-NEXT: [[TMP2:%.*]] = load i8, i8* [[MYSTR_CAST1]], align 8 ; TUNIT-NEXT: [[MYSTR_0_12:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* @mystr, i64 0, i32 1 ; TUNIT-NEXT: [[TMP3:%.*]] = load i32, i32* [[MYSTR_0_12]], align 8 -; TUNIT-NEXT: [[RESULT:%.*]] = call i32 @vfu2_v2(i8 [[TMP2]], i32 [[TMP3]]) #[[ATTR2]] +; TUNIT-NEXT: [[RESULT:%.*]] = call i32 @vfu2_v2(i8 [[TMP2]], i32 [[TMP3]]) #[[ATTR0]] ; TUNIT-NEXT: ret i32 [[RESULT]] ; ; CGSCC: Function Attrs: nounwind @@ -172,10 +172,6 @@ entry: ret i32 %result } ;. -; TUNIT: attributes #[[ATTR0]] = { nounwind } -; TUNIT: attributes #[[ATTR1]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR2]] = { nounwind readonly } -;. -; CGSCC: attributes #[[ATTR0]] = { nounwind } -; CGSCC: attributes #[[ATTR1]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } +; CHECK: attributes #[[ATTR0]] = { nounwind } +; CHECK: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/PR16052.ll b/llvm/test/Transforms/Attributor/IPConstantProp/PR16052.ll index 9e8e666991c9a..206db6d74e564 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/PR16052.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/PR16052.ll @@ -7,13 +7,13 @@ target triple = "x86_64-unknown-linux-gnu" define i64 @fn2() { ; -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@fn2 ; TUNIT-SAME: () #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: ret i64 undef ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@fn2 ; CGSCC-SAME: () #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -31,7 +31,7 @@ entry: define i64 @fn2b(i32 %arg) { ; -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@fn2b ; TUNIT-SAME: (i32 [[ARG:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: entry: @@ -39,7 +39,7 @@ define i64 @fn2b(i32 %arg) { ; TUNIT-NEXT: [[DIV:%.*]] = sdiv i64 8, [[CONV]] ; TUNIT-NEXT: ret i64 [[DIV]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@fn2b ; CGSCC-SAME: (i32 [[ARG:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: entry: @@ -56,13 +56,13 @@ entry: } define i64 @fn2c() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@fn2c ; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: ret i64 42 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@fn2c ; CGSCC-SAME: () #[[ATTR0]] { ; CGSCC-NEXT: entry: @@ -79,7 +79,7 @@ entry: } define internal i64 @fn1(i64 %p1) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@fn1 ; CGSCC-SAME: (i64 returned [[P1:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -91,9 +91,9 @@ entry: ret i64 %cond } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/PR26044.ll b/llvm/test/Transforms/Attributor/IPConstantProp/PR26044.ll index a2627ecd50542..b65be6a5d511d 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/PR26044.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/PR26044.ll @@ -6,7 +6,7 @@ target triple = "x86_64-unknown-linux-gnu" define void @fn2(i32* %P, i1 %C) { ; -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind +; TUNIT: Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@fn2 ; TUNIT-SAME: (i32* nocapture nofree [[P:%.*]], i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -21,7 +21,7 @@ define void @fn2(i32* %P, i1 %C) { ; TUNIT: exit: ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind +; CGSCC: Function Attrs: nofree nosync nounwind memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@fn2 ; CGSCC-SAME: (i32* nocapture nofree nonnull align 4 dereferenceable(4) [[P:%.*]], i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -54,7 +54,7 @@ exit: } define internal i32 @fn1(i32 %p1) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@fn1 ; CGSCC-SAME: (i32 returned [[P1:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -116,7 +116,7 @@ exit: } define internal i32 @fn0(i32 %p1) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@fn0 ; CGSCC-SAME: (i32 returned [[P1:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: entry: @@ -128,10 +128,10 @@ entry: ret i32 %cond } ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind memory(argmem: readwrite) } ; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind null_pointer_is_valid } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree nosync nounwind } -; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree nosync nounwind memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } ; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind null_pointer_is_valid } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/PR43857.ll b/llvm/test/Transforms/Attributor/IPConstantProp/PR43857.ll index 97168799d9814..79a6774836475 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/PR43857.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/PR43857.ll @@ -8,7 +8,7 @@ declare dso_local fastcc float @bar(%struct.wobble* noalias, <8 x i32>) unnamed_addr define %struct.zot @widget(<8 x i32> %arg) local_unnamed_addr { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@widget ; CHECK-SAME: (<8 x i32> [[ARG:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: bb: @@ -19,14 +19,14 @@ bb: } define void @baz(<8 x i32> %arg) local_unnamed_addr { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@baz ; TUNIT-SAME: (<8 x i32> [[ARG:%.*]]) local_unnamed_addr #[[ATTR0]] { ; TUNIT-NEXT: bb: ; TUNIT-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_ZOT:%.*]] undef, 0, 0 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@baz ; CGSCC-SAME: (<8 x i32> [[ARG:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: bb: @@ -38,8 +38,8 @@ bb: ret void } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/arg-count-mismatch.ll b/llvm/test/Transforms/Attributor/IPConstantProp/arg-count-mismatch.ll index 1222ca360a3e3..8ddb79b4517e5 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/arg-count-mismatch.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/arg-count-mismatch.ll @@ -40,7 +40,7 @@ define dso_local i16 @foo(i16 %a) { ; TUNIT-NEXT: [[CALL:%.*]] = call i16 bitcast (i16 (i16, i16)* @bar to i16 (i16)*)(i16 [[A]]) ; TUNIT-NEXT: ret i16 [[CALL]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone +; CGSCC: Function Attrs: nofree nosync nounwind ; CGSCC-LABEL: define {{[^@]+}}@foo ; CGSCC-SAME: (i16 [[A:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: [[CALL:%.*]] = call i16 bitcast (i16 (i16, i16)* @bar to i16 (i16)*)(i16 [[A]]) @@ -51,7 +51,7 @@ define dso_local i16 @foo(i16 %a) { } define internal i16 @bar(i16 %p1, i16 %p2) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@bar ; CHECK-SAME: (i16 [[P1:%.*]], i16 [[P2:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: ret i16 0 @@ -66,7 +66,7 @@ define dso_local i16 @foo2(i16 %a) { ; TUNIT-NEXT: [[CALL:%.*]] = call i16 bitcast (i16 (i16, i16)* @bar2 to i16 (i16)*)(i16 [[A]]) ; TUNIT-NEXT: ret i16 [[CALL]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone +; CGSCC: Function Attrs: nofree nosync nounwind ; CGSCC-LABEL: define {{[^@]+}}@foo2 ; CGSCC-SAME: (i16 [[A:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: [[CALL:%.*]] = call i16 bitcast (i16 (i16, i16)* @bar2 to i16 (i16)*)(i16 [[A]]) @@ -77,7 +77,7 @@ define dso_local i16 @foo2(i16 %a) { } define internal i16 @bar2(i16 %p1, i16 %p2) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@bar2 ; CHECK-SAME: (i16 [[P1:%.*]], i16 [[P2:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[A:%.*]] = add i16 [[P1]], [[P2]] @@ -101,7 +101,7 @@ define dso_local i16 @vararg_tests(i16 %a) { ; TUNIT-NEXT: [[ADD:%.*]] = add i16 7, [[CALL2]] ; TUNIT-NEXT: ret i16 [[ADD]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone +; CGSCC: Function Attrs: nofree nosync nounwind ; CGSCC-LABEL: define {{[^@]+}}@vararg_tests ; CGSCC-SAME: (i16 [[A:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: [[CALL1:%.*]] = call i16 (i16, ...) @vararg_prop(i16 noundef 7, i16 noundef 8, i16 [[A]]) #[[ATTR2:[0-9]+]] @@ -116,7 +116,7 @@ define dso_local i16 @vararg_tests(i16 %a) { } define internal i16 @vararg_prop(i16 %p1, ...) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@vararg_prop ; CGSCC-SAME: (i16 [[P1:%.*]], ...) #[[ATTR1]] { ; CGSCC-NEXT: ret i16 7 @@ -125,7 +125,7 @@ define internal i16 @vararg_prop(i16 %p1, ...) { } define internal i16 @vararg_no_prop(i16 %p1, i16 %p2, ...) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@vararg_no_prop ; CHECK-SAME: (i16 [[P1:%.*]], i16 [[P2:%.*]], ...) #[[ATTR1]] { ; CHECK-NEXT: ret i16 7 @@ -135,9 +135,9 @@ define internal i16 @vararg_no_prop(i16 %p1, i16 %p2, ...) { ;. ; TUNIT: attributes #[[ATTR0]] = { norecurse } -; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree nosync nounwind readnone } -; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree nosync nounwind } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/arg-type-mismatch.ll b/llvm/test/Transforms/Attributor/IPConstantProp/arg-type-mismatch.ll index 701aa37bf66eb..7b89d2cccf6b2 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/arg-type-mismatch.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/arg-type-mismatch.ll @@ -12,7 +12,7 @@ define dso_local i16 @foo(i16 %a) { ; TUNIT-NEXT: [[CALL:%.*]] = call i16 bitcast (i16 (i16, i16)* @bar to i16 (i16, i32)*)(i16 [[A]], i32 7) ; TUNIT-NEXT: ret i16 [[CALL]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone +; CGSCC: Function Attrs: nofree nosync nounwind ; CGSCC-LABEL: define {{[^@]+}}@foo ; CGSCC-SAME: (i16 [[A:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: [[CALL:%.*]] = call i16 bitcast (i16 (i16, i16)* @bar to i16 (i16, i32)*)(i16 [[A]], i32 7) @@ -23,7 +23,7 @@ define dso_local i16 @foo(i16 %a) { } define internal i16 @bar(i16 %p1, i16 %p2) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@bar ; CHECK-SAME: (i16 [[P1:%.*]], i16 returned [[P2:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: ret i16 [[P2]] @@ -34,8 +34,8 @@ define internal i16 @bar(i16 %p1, i16 %p2) { ;. ; TUNIT: attributes #[[ATTR0]] = { norecurse } -; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree nosync nounwind readnone } -; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree nosync nounwind } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/comdat-ipo.ll b/llvm/test/Transforms/Attributor/IPConstantProp/comdat-ipo.ll index 4d6ce9cb836c3..34141fc57e1bb 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/comdat-ipo.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/comdat-ipo.ll @@ -5,7 +5,7 @@ ; See PR26774 define i32 @baz() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@baz ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: ret i32 10 @@ -45,8 +45,8 @@ define i32 @bar() { ret i32 %val } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ; TUNIT: attributes #[[ATTR1]] = { norecurse } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/dangling-block-address.ll b/llvm/test/Transforms/Attributor/IPConstantProp/dangling-block-address.ll index 43b30e6bc7de2..10c4d87f53491 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/dangling-block-address.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/dangling-block-address.ll @@ -17,7 +17,7 @@ ; CGSCC: @[[BAR_L:[a-zA-Z0-9_$"\\.-]+]] = internal constant [2 x i8*] [i8* blockaddress(@bar, [[LAB0:%.*]]), i8* blockaddress(@bar, [[END:%.*]])] ;. define internal void @foo(i32 %x) nounwind readnone { -; CGSCC: Function Attrs: nounwind readnone +; CGSCC: Function Attrs: nounwind memory(none) ; CGSCC-LABEL: define {{[^@]+}}@foo ; CGSCC-SAME: (i32 [[X:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -32,7 +32,7 @@ entry: } define internal void @bar(i32* nocapture %pc) nounwind readonly { -; CGSCC: Function Attrs: nounwind readonly +; CGSCC: Function Attrs: nounwind memory(read) ; CGSCC-LABEL: define {{[^@]+}}@bar ; CGSCC-SAME: (i32* nocapture [[PC:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -70,13 +70,13 @@ indirectgoto: ; preds = %lab0, %entry } define i32 @main() nounwind readnone { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@main ; TUNIT-SAME: () #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: ret i32 0 ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@main ; CGSCC-SAME: () #[[ATTR2:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -86,9 +86,9 @@ entry: ret i32 0 } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { nounwind readnone } -; CGSCC: attributes #[[ATTR1]] = { nounwind readonly } -; CGSCC: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nounwind memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nounwind memory(read) } +; CGSCC: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/deadarg.ll b/llvm/test/Transforms/Attributor/IPConstantProp/deadarg.ll index 96fb44bd6a982..ef8005392407d 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/deadarg.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/deadarg.ll @@ -12,7 +12,7 @@ define internal void @foo(i32 %X) { } define void @bar() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@bar ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: ret void @@ -20,5 +20,5 @@ define void @bar() { ret void } ;. -; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/fp-bc-icmp-const-fold.ll b/llvm/test/Transforms/Attributor/IPConstantProp/fp-bc-icmp-const-fold.ll index 64cbc41ff7c53..c24c9b2f3bc80 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/fp-bc-icmp-const-fold.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/fp-bc-icmp-const-fold.ll @@ -5,7 +5,7 @@ target datalayout = "E-m:e-i64:64-n32:64" target triple = "powerpc64le-unknown-linux" define void @test(i32 signext %n) { -; CHECK: Function Attrs: nofree norecurse noreturn nosync nounwind readnone +; CHECK: Function Attrs: nofree norecurse noreturn nosync nounwind memory(none) ; CHECK-LABEL: define {{[^@]+}}@test ; CHECK-SAME: (i32 signext [[N:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -79,5 +79,5 @@ _ZN5boost4math4signIgEEiRKT_.exit30: ; preds = %cond.false.i28, %if } ;. -; CHECK: attributes #[[ATTR0]] = { nofree norecurse noreturn nosync nounwind readnone } +; CHECK: attributes #[[ATTR0]] = { nofree norecurse noreturn nosync nounwind memory(none) } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/global.ll b/llvm/test/Transforms/Attributor/IPConstantProp/global.ll index 3c567d10d6361..0dceadd6968d0 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/global.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/global.ll @@ -8,7 +8,7 @@ ; CHECK: @[[_ZL6TEST1G:[a-zA-Z0-9_$"\\.-]+]] = internal global i32 42, align 4 ;. define void @_Z7test1f1v() nounwind { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@_Z7test1f1v ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -32,7 +32,7 @@ if.end: ; preds = %if.then, %entry } define i32 @_Z7test1f2v() nounwind { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@_Z7test1f2v ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -43,5 +43,5 @@ entry: ret i32 %tmp } ;. -; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/multiple_callbacks.ll b/llvm/test/Transforms/Attributor/IPConstantProp/multiple_callbacks.ll index 835d8df51a94b..b86c92a306cc5 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/multiple_callbacks.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/multiple_callbacks.ll @@ -38,7 +38,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define internal i32 @cb0(i32 %zero) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@cb0 ; CHECK-SAME: (i32 [[ZERO:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -49,7 +49,7 @@ entry: } define internal i32 @cb1(i32 %unknown) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@cb1 ; CHECK-SAME: (i32 noundef [[UNKNOWN:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -60,13 +60,13 @@ entry: } define internal i32 @cb2(i32 %unknown) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@cb2 ; TUNIT-SAME: (i32 noundef [[UNKNOWN:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: ret i32 [[UNKNOWN]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@cb2 ; CGSCC-SAME: (i32 noundef [[UNKNOWN:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -78,7 +78,7 @@ entry: } define internal i32 @cb3(i32 %unknown) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@cb3 ; CHECK-SAME: (i32 noundef [[UNKNOWN:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -89,7 +89,7 @@ entry: } define internal i32 @cb4(i32 %unknown) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@cb4 ; CHECK-SAME: (i32 noundef [[UNKNOWN:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -125,10 +125,10 @@ declare !callback !3 void @broker(i32 (i32)*, i32 (i32)*, i32 (i32)*, i32, i32) !2 = !{i64 2, i64 3, i1 false} !3 = !{!0, !2, !1} ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } ;. ; CHECK: [[META0:![0-9]+]] = !{!1, !2, !3} ; CHECK: [[META1:![0-9]+]] = !{i64 0, i64 3, i1 false} diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/musttail-call.ll b/llvm/test/Transforms/Attributor/IPConstantProp/musttail-call.ll index 6bdd396c2a6ac..539faa222c5dd 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/musttail-call.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/musttail-call.ll @@ -78,7 +78,7 @@ define internal i8* @side_effects(i8 %v) { } define internal i8* @no_side_effects(i8 %v) readonly nounwind { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@no_side_effects ; CGSCC-SAME: (i8 [[V:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: ret i8* null @@ -96,5 +96,5 @@ define internal i8* @dont_zap_me(i8 %v) { ret i8* null } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll b/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll index 05f86d3d1b709..451ccd13d5b65 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll @@ -68,7 +68,7 @@ entry: declare !callback !0 dso_local i32 @pthread_create(i64*, %union.pthread_attr_t*, i8* (i8*)*, i8*) define internal i8* @foo(i8* %arg) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@foo ; CHECK-SAME: (i8* noalias nocapture nofree readnone align 4294967296 [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -79,7 +79,7 @@ entry: } define internal i8* @bar(i8* %arg) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@bar ; CHECK-SAME: (i8* noalias nocapture nofree nonnull readnone align 8 dereferenceable(8) [[ARG:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -90,7 +90,7 @@ entry: } define internal i8* @baz(i8* %arg) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@baz ; CHECK-SAME: (i8* noalias nofree noundef nonnull readnone returned align 8 dereferenceable(1) "no-capture-maybe-returned" [[ARG:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -101,7 +101,7 @@ entry: } define internal i8* @buz(i8* %arg) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@buz ; CHECK-SAME: (i8* noalias nofree noundef nonnull readnone returned align 8 dereferenceable(1) "no-capture-maybe-returned" [[ARG:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -114,7 +114,7 @@ entry: !1 = !{i64 2, i64 3, i1 false} !0 = !{!1} ;. -; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. ; CHECK: [[META0:![0-9]+]] = !{!1} ; CHECK: [[META1:![0-9]+]] = !{i64 2, i64 3, i1 false} diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/recursion.ll b/llvm/test/Transforms/Attributor/IPConstantProp/recursion.ll index d268731f8ddb2..ae2060ac876e0 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/recursion.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/recursion.ll @@ -5,7 +5,7 @@ ; CHECK-NOT: %X define internal i32 @foo(i32 %X) { -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@foo ; CGSCC-SAME: () #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: ret i32 undef @@ -16,12 +16,12 @@ define internal i32 @foo(i32 %X) { } define void @bar() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@bar ; TUNIT-SAME: () #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@bar ; CGSCC-SAME: () #[[ATTR0]] { ; CGSCC-NEXT: ret void @@ -31,7 +31,7 @@ define void @bar() { } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree nosync nounwind willreturn memory(none) } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/remove-call-inst.ll b/llvm/test/Transforms/Attributor/IPConstantProp/remove-call-inst.ll index 07c1c4f0296d1..ed8ebd2f02901 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/remove-call-inst.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/remove-call-inst.ll @@ -9,13 +9,13 @@ ; FIXME: Remove obsolete calls/instructions define i32 @main() noreturn nounwind { -; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@main ; TUNIT-SAME: () #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: ret i32 123 ; -; CGSCC: Function Attrs: nofree noreturn nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree noreturn nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@main ; CGSCC-SAME: () #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -28,7 +28,7 @@ entry: } define internal i32 @wwrite(i64 %i) nounwind readnone { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@wwrite ; CGSCC-SAME: () #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -54,9 +54,9 @@ return: ret i32 0 } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse noreturn nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse noreturn nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree noreturn nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree noreturn nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { nounwind willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/return-argument.ll b/llvm/test/Transforms/Attributor/IPConstantProp/return-argument.ll index 96772b1681ba9..af767ba061051 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/return-argument.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/return-argument.ll @@ -4,7 +4,7 @@ ;; This function returns its second argument on all return statements define internal i32* @incdec(i1 %C, i32* %V) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@incdec ; TUNIT-SAME: (i1 [[C:%.*]], i32* noalias nofree noundef nonnull returned writeonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[V:%.*]]) #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] @@ -13,7 +13,7 @@ define internal i32* @incdec(i1 %C, i32* %V) { ; TUNIT: F: ; TUNIT-NEXT: ret i32* [[V]] ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@incdec ; CGSCC-SAME: (i1 [[C:%.*]], i32* nofree noundef nonnull returned align 4 dereferenceable(4) "no-capture-maybe-returned" [[V:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: [[X:%.*]] = load i32, i32* [[V]], align 4 @@ -44,7 +44,7 @@ F: ; preds = %0 ;; This function returns its first argument as a part of a multiple return ;; value define internal { i32, i32 } @foo(i32 %A, i32 %B) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@foo ; CGSCC-SAME: (i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: [[X:%.*]] = add i32 [[A]], [[B]] @@ -59,7 +59,7 @@ define internal { i32, i32 } @foo(i32 %A, i32 %B) { } define void @caller(i1 %C) personality i32 (...)* @__gxx_personality_v0 { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@caller ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1:[0-9]+]] personality i32 (...)* @__gxx_personality_v0 { ; TUNIT-NEXT: [[Q:%.*]] = alloca i32, align 4 @@ -79,7 +79,7 @@ define void @caller(i1 %C) personality i32 (...)* @__gxx_personality_v0 { ; CGSCC-NEXT: [[W:%.*]] = call align 4 i32* @incdec(i1 [[C]], i32* nofree noundef nonnull align 4 dereferenceable(4) [[Q]]) #[[ATTR3:[0-9]+]] ; CGSCC-NEXT: [[S1:%.*]] = call { i32, i32 } @foo(i32 noundef 1, i32 noundef 2) #[[ATTR4:[0-9]+]] ; CGSCC-NEXT: [[X1:%.*]] = extractvalue { i32, i32 } [[S1]], 0 -; CGSCC-NEXT: [[S2:%.*]] = call { i32, i32 } @foo(i32 noundef 3, i32 noundef 4) #[[ATTR5:[0-9]+]] +; CGSCC-NEXT: [[S2:%.*]] = call { i32, i32 } @foo(i32 noundef 3, i32 noundef 4) #[[ATTR3]] ; CGSCC-NEXT: br label [[OK:%.*]] ; CGSCC: OK: ; CGSCC-NEXT: [[X2:%.*]] = extractvalue { i32, i32 } [[S2]], 0 @@ -118,14 +118,13 @@ RET: declare i32 @__gxx_personality_v0(...) ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn writeonly } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } ; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn } ; CGSCC: attributes #[[ATTR3]] = { nounwind willreturn } -; CGSCC: attributes #[[ATTR4]] = { readnone willreturn } -; CGSCC: attributes #[[ATTR5]] = { nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR4]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/return-constant.ll b/llvm/test/Transforms/Attributor/IPConstantProp/return-constant.ll index ea4bcd791ac97..604af5031c656 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/return-constant.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/return-constant.ll @@ -5,7 +5,7 @@ ; FIXME: icmp folding is missing define i1 @invokecaller(i1 %C) personality i32 (...)* @__gxx_personality_v0 { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@invokecaller ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] personality i32 (...)* @__gxx_personality_v0 { ; TUNIT-NEXT: [[X:%.*]] = call i32 @foo(i1 [[C]]) #[[ATTR1:[0-9]+]] @@ -15,7 +15,7 @@ define i1 @invokecaller(i1 %C) personality i32 (...)* @__gxx_personality_v0 { ; TUNIT: FAIL: ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@invokecaller ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] personality i32 (...)* @__gxx_personality_v0 { ; CGSCC-NEXT: [[X:%.*]] = call i32 @foo(i1 [[C]]) #[[ATTR2:[0-9]+]] @@ -37,7 +37,7 @@ FAIL: } define internal i32 @foo(i1 %C) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@foo ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] @@ -46,7 +46,7 @@ define internal i32 @foo(i1 %C) { ; TUNIT: F: ; TUNIT-NEXT: ret i32 undef ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@foo ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] @@ -65,12 +65,12 @@ F: ; preds = %0 } define i1 @caller(i1 %C) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@caller ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: ret i1 true ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@caller ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: [[X:%.*]] = call i32 @foo(i1 [[C]]) #[[ATTR3:[0-9]+]] @@ -84,11 +84,11 @@ define i1 @caller(i1 %C) { declare i32 @__gxx_personality_v0(...) ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR1]] = { nounwind readnone } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR1]] = { nounwind } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR3]] = { readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { nounwind willreturn } +; CGSCC: attributes #[[ATTR3]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/return-constants.ll b/llvm/test/Transforms/Attributor/IPConstantProp/return-constants.ll index 5bde8ff8bc66e..4327e6d0e9f42 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/return-constants.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/return-constants.ll @@ -7,7 +7,7 @@ %0 = type { i32, i32 } define internal %0 @foo(i1 %Q) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@foo ; CHECK-SAME: (i1 [[Q:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: br i1 [[Q]], label [[T:%.*]], label [[F:%.*]] @@ -34,7 +34,7 @@ F: ; preds = %0 } define internal %0 @bar(i1 %Q) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@bar ; CHECK-SAME: (i1 [[Q:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[A:%.*]] = insertvalue [[TMP0:%.*]] undef, i32 21, 0 @@ -59,13 +59,13 @@ F: ; preds = %0 } define %0 @caller(i1 %Q) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@caller ; TUNIT-SAME: (i1 [[Q:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: [[X:%.*]] = call [[TMP0:%.*]] @foo(i1 [[Q]]) #[[ATTR1:[0-9]+]] ; TUNIT-NEXT: ret [[TMP0]] [[X]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@caller ; CGSCC-SAME: (i1 [[Q:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: [[X:%.*]] = call [[TMP0:%.*]] @foo(i1 [[Q]]) #[[ATTR2:[0-9]+]] @@ -84,7 +84,7 @@ define %0 @caller(i1 %Q) { ; Similar to @caller but the result of both calls are actually used. define i32 @caller2(i1 %Q) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@caller2 ; TUNIT-SAME: (i1 [[Q:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: [[X:%.*]] = call [[TMP0:%.*]] @foo(i1 [[Q]]) #[[ATTR1]] @@ -98,7 +98,7 @@ define i32 @caller2(i1 %Q) { ; TUNIT-NEXT: [[R:%.*]] = add i32 [[N]], [[M]] ; TUNIT-NEXT: ret i32 [[R]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@caller2 ; CGSCC-SAME: (i1 [[Q:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: [[X:%.*]] = call [[TMP0:%.*]] @foo(i1 [[Q]]) #[[ATTR2]] @@ -125,10 +125,10 @@ define i32 @caller2(i1 %Q) { ret i32 %R } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/solve-after-each-resolving-undefs-for-function.ll b/llvm/test/Transforms/Attributor/IPConstantProp/solve-after-each-resolving-undefs-for-function.ll index 343c4c8f7a737..6921904488dd7 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/solve-after-each-resolving-undefs-for-function.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/solve-after-each-resolving-undefs-for-function.ll @@ -3,7 +3,7 @@ ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC define internal i32 @testf(i1 %c) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@testf ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -29,7 +29,7 @@ if.end: ; preds = %if.then1, %entry } define internal i32 @test1(i1 %c) { -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test1 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -59,12 +59,12 @@ ret2: ; preds = %if.then, %entry } define i32 @main(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@main ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: ret i32 99 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@main ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: [[RES:%.*]] = call noundef i32 @test1(i1 [[C]]) #[[ATTR2]] @@ -74,9 +74,9 @@ define i32 @main(i1 %c) { ret i32 %res } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/thread_local_acs.ll b/llvm/test/Transforms/Attributor/IPConstantProp/thread_local_acs.ll index 6c10fe2d77228..f7d145d49ae5e 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/thread_local_acs.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/thread_local_acs.ll @@ -26,7 +26,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; CHECK: @[[GSH:[a-zA-Z0-9_$"\\.-]+]] = dso_local global i32 0, align 4 ;. define internal i32 @callee(i32* %thread_local_ptr, i32* %shared_ptr) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) ; CHECK-LABEL: define {{[^@]+}}@callee ; CHECK-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[THREAD_LOCAL_PTR:%.*]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[SHARED_PTR:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -63,7 +63,7 @@ declare !callback !0 dso_local void @broker(i32*, i32 (i32*, i32*)*, i32*) !1 = !{i64 1, i64 0, i64 2, i1 false} !0 = !{!1} ;. -; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readonly willreturn } +; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(read) } ;. ; CHECK: [[META0:![0-9]+]] = !{!1} ; CHECK: [[META1:![0-9]+]] = !{i64 1, i64 0, i64 2, i1 false} diff --git a/llvm/test/Transforms/Attributor/align.ll b/llvm/test/Transforms/Attributor/align.ll index 3c86c78e4f3d4..5ffa951ab4766 100644 --- a/llvm/test/Transforms/Attributor/align.ll +++ b/llvm/test/Transforms/Attributor/align.ll @@ -17,7 +17,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = global i8 0, align 32 ;. define i32* @test1(i32* align 8 %0) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@test1 ; CHECK-SAME: (i32* nofree readnone returned align 8 "no-capture-maybe-returned" [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: ret i32* [[TMP0]] @@ -27,7 +27,7 @@ define i32* @test1(i32* align 8 %0) #0 { ; TEST 2 define i32* @test2(i32* %0) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@test2 ; CHECK-SAME: (i32* nofree readnone returned "no-capture-maybe-returned" [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret i32* [[TMP0]] @@ -37,7 +37,7 @@ define i32* @test2(i32* %0) #0 { ; TEST 3 define i32* @test3(i32* align 8 %0, i32* align 4 %1, i1 %2) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@test3 ; CHECK-SAME: (i32* nofree readnone align 8 "no-capture-maybe-returned" [[TMP0:%.*]], i32* nofree readnone align 4 "no-capture-maybe-returned" [[TMP1:%.*]], i1 [[TMP2:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = select i1 [[TMP2]], i32* [[TMP0]], i32* [[TMP1]] @@ -49,7 +49,7 @@ define i32* @test3(i32* align 8 %0, i32* align 4 %1, i1 %2) #0 { ; TEST 4 define i32* @test4(i32* align 32 %0, i32* align 32 %1, i1 %2) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@test4 ; CHECK-SAME: (i32* nofree readnone align 32 "no-capture-maybe-returned" [[TMP0:%.*]], i32* nofree readnone align 32 "no-capture-maybe-returned" [[TMP1:%.*]], i1 [[TMP2:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = select i1 [[TMP2]], i32* [[TMP0]], i32* [[TMP1]] @@ -85,12 +85,12 @@ define i32* @test5_2() { ; TEST 6 ; SCC define i32* @test6_1() #0 { -; TUNIT: Function Attrs: nofree noinline nosync nounwind readnone willreturn uwtable +; TUNIT: Function Attrs: nofree noinline nosync nounwind willreturn memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@test6_1 ; TUNIT-SAME: () #[[ATTR1:[0-9]+]] { ; TUNIT-NEXT: ret i32* undef ; -; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@test6_1 ; CGSCC-SAME: () #[[ATTR0]] { ; CGSCC-NEXT: ret i32* undef @@ -100,12 +100,12 @@ define i32* @test6_1() #0 { } define i32* @test6_2() #0 { -; TUNIT: Function Attrs: nofree noinline nosync nounwind readnone willreturn uwtable +; TUNIT: Function Attrs: nofree noinline nosync nounwind willreturn memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@test6_2 ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i32* undef ; -; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@test6_2 ; CGSCC-SAME: () #[[ATTR0]] { ; CGSCC-NEXT: ret i32* undef @@ -134,7 +134,7 @@ define i32* @test6_2() #0 { ; Function Attrs: nounwind readnone ssp uwtable define internal i8* @f1(i8* readnone %0) local_unnamed_addr #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@f1 ; CHECK-SAME: (i8* noalias nofree nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { ; CHECK-NEXT: br label [[TMP3:%.*]] @@ -192,7 +192,7 @@ define internal i8* @f2(i8* readnone %0) local_unnamed_addr #0 { ; Function Attrs: nounwind readnone ssp uwtable define internal i8* @f3(i8* readnone %0) local_unnamed_addr #0 { -; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@f3 ; CGSCC-SAME: () local_unnamed_addr #[[ATTR0]] { ; CGSCC-NEXT: br label [[TMP2:%.*]] @@ -216,13 +216,13 @@ define internal i8* @f3(i8* readnone %0) local_unnamed_addr #0 { ; TEST 7 ; Better than IR information define align 4 i8* @test7() #0 { -; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@test7 ; TUNIT-SAME: () #[[ATTR0]] { -; TUNIT-NEXT: [[C:%.*]] = tail call i8* @f1(i8* noalias nofree noundef nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" @a1) #[[ATTR9:[0-9]+]] +; TUNIT-NEXT: [[C:%.*]] = tail call i8* @f1(i8* noalias nofree noundef nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" @a1) #[[ATTR11:[0-9]+]] ; TUNIT-NEXT: ret i8* [[C]] ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone willreturn uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind willreturn memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@test7 ; CGSCC-SAME: () #[[ATTR2:[0-9]+]] { ; CGSCC-NEXT: [[C:%.*]] = tail call noundef nonnull align 8 dereferenceable(1) i8* @f1(i8* noalias nofree noundef nonnull readnone align 8 dereferenceable(1) @a1) #[[ATTR13:[0-9]+]] @@ -235,7 +235,7 @@ define align 4 i8* @test7() #0 { ; TEST 7b ; Function Attrs: nounwind readnone ssp uwtable define internal i8* @f1b(i8* readnone %0) local_unnamed_addr #0 { -; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@f1b ; CGSCC-SAME: (i8* noalias nofree nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { ; CGSCC-NEXT: br label [[TMP3:%.*]] @@ -296,7 +296,7 @@ define internal i8* @f2b(i8* readnone %0) local_unnamed_addr #0 { ; Function Attrs: nounwind readnone ssp uwtable define internal i8* @f3b(i8* readnone %0) local_unnamed_addr #0 { ; -; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@f3b ; CGSCC-SAME: () local_unnamed_addr #[[ATTR0]] { ; CGSCC-NEXT: br label [[TMP2:%.*]] @@ -318,12 +318,12 @@ define internal i8* @f3b(i8* readnone %0) local_unnamed_addr #0 { } define align 4 i32* @test7b(i32* align 32 %p) #0 { -; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@test7b ; TUNIT-SAME: (i32* nofree readnone returned align 32 "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: ret i32* [[P]] ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone willreturn uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind willreturn memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@test7b ; CGSCC-SAME: (i32* nofree readnone returned align 32 "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: ret i32* [[P]] @@ -525,14 +525,14 @@ e: define i64 @test11(i32* %p) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@test11 ; TUNIT-SAME: (i32* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[P:%.*]]) #[[ATTR4:[0-9]+]] { ; TUNIT-NEXT: [[P_CAST:%.*]] = bitcast i32* [[P]] to i64* ; TUNIT-NEXT: [[RET:%.*]] = load i64, i64* [[P_CAST]], align 8 ; TUNIT-NEXT: ret i64 [[RET]] ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@test11 ; CGSCC-SAME: (i32* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[P:%.*]]) #[[ATTR5:[0-9]+]] { ; CGSCC-NEXT: [[P_CAST:%.*]] = bitcast i32* [[P]] to i64* @@ -549,7 +549,7 @@ define i64 @test11(i32* %p) { ; FXIME: %p should have nonnull define i64 @test12-1(i32* align 4 %p) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@test12-1 ; TUNIT-SAME: (i32* nocapture nofree readonly align 16 [[P:%.*]]) #[[ATTR4]] { ; TUNIT-NEXT: [[P_CAST:%.*]] = bitcast i32* [[P]] to i64* @@ -558,7 +558,7 @@ define i64 @test12-1(i32* align 4 %p) { ; TUNIT-NEXT: [[RET:%.*]] = load i64, i64* [[ARRAYIDX1]], align 16 ; TUNIT-NEXT: ret i64 [[RET]] ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@test12-1 ; CGSCC-SAME: (i32* nocapture nofree readonly align 16 [[P:%.*]]) #[[ATTR5]] { ; CGSCC-NEXT: [[P_CAST:%.*]] = bitcast i32* [[P]] to i64* @@ -575,7 +575,7 @@ define i64 @test12-1(i32* align 4 %p) { } define i64 @test12-2(i32* align 4 %p) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@test12-2 ; TUNIT-SAME: (i32* nocapture nofree nonnull readonly align 16 dereferenceable(8) [[P:%.*]]) #[[ATTR4]] { ; TUNIT-NEXT: [[P_CAST:%.*]] = bitcast i32* [[P]] to i64* @@ -583,7 +583,7 @@ define i64 @test12-2(i32* align 4 %p) { ; TUNIT-NEXT: [[RET:%.*]] = load i64, i64* [[ARRAYIDX0]], align 16 ; TUNIT-NEXT: ret i64 [[RET]] ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@test12-2 ; CGSCC-SAME: (i32* nocapture nofree nonnull readonly align 16 dereferenceable(8) [[P:%.*]]) #[[ATTR5]] { ; CGSCC-NEXT: [[P_CAST:%.*]] = bitcast i32* [[P]] to i64* @@ -599,7 +599,7 @@ define i64 @test12-2(i32* align 4 %p) { ; FXIME: %p should have nonnull define void @test12-3(i32* align 4 %p) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@test12-3 ; TUNIT-SAME: (i32* nocapture nofree writeonly align 16 [[P:%.*]]) #[[ATTR5:[0-9]+]] { ; TUNIT-NEXT: [[P_CAST:%.*]] = bitcast i32* [[P]] to i64* @@ -608,7 +608,7 @@ define void @test12-3(i32* align 4 %p) { ; TUNIT-NEXT: store i64 0, i64* [[ARRAYIDX1]], align 16 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@test12-3 ; CGSCC-SAME: (i32* nocapture nofree writeonly align 16 [[P:%.*]]) #[[ATTR6:[0-9]+]] { ; CGSCC-NEXT: [[P_CAST:%.*]] = bitcast i32* [[P]] to i64* @@ -625,7 +625,7 @@ define void @test12-3(i32* align 4 %p) { } define void @test12-4(i32* align 4 %p) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@test12-4 ; TUNIT-SAME: (i32* nocapture nofree nonnull writeonly align 16 dereferenceable(8) [[P:%.*]]) #[[ATTR5]] { ; TUNIT-NEXT: [[P_CAST:%.*]] = bitcast i32* [[P]] to i64* @@ -633,7 +633,7 @@ define void @test12-4(i32* align 4 %p) { ; TUNIT-NEXT: store i64 0, i64* [[ARRAYIDX0]], align 16 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@test12-4 ; CGSCC-SAME: (i32* nocapture nofree nonnull writeonly align 16 dereferenceable(8) [[P:%.*]]) #[[ATTR6]] { ; CGSCC-NEXT: [[P_CAST:%.*]] = bitcast i32* [[P]] to i64* @@ -699,7 +699,7 @@ define void @test12-6(i32* align 4 %p) { } define void @test13(i1 %c, i32* align 32 %dst) #0 { -; TUNIT: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind willreturn writeonly uwtable +; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: write) uwtable ; TUNIT-LABEL: define {{[^@]+}}@test13 ; TUNIT-SAME: (i1 [[C:%.*]], i32* nocapture nofree writeonly align 32 [[DST:%.*]]) #[[ATTR7:[0-9]+]] { ; TUNIT-NEXT: br i1 [[C]], label [[TRUEBB:%.*]], label [[FALSEBB:%.*]] @@ -712,7 +712,7 @@ define void @test13(i1 %c, i32* align 32 %dst) #0 { ; TUNIT-NEXT: store i32 0, i32* [[PTR]], align 32 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind willreturn writeonly uwtable +; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: write) uwtable ; CGSCC-LABEL: define {{[^@]+}}@test13 ; CGSCC-SAME: (i1 [[C:%.*]], i32* nocapture nofree writeonly align 32 [[DST:%.*]]) #[[ATTR8:[0-9]+]] { ; CGSCC-NEXT: br i1 [[C]], label [[TRUEBB:%.*]], label [[FALSEBB:%.*]] @@ -737,7 +737,7 @@ end: } define void @test13-1(i1 %c, i32* align 32 %dst) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@test13-1 ; TUNIT-SAME: (i1 [[C:%.*]], i32* nocapture nofree writeonly align 32 [[DST:%.*]]) #[[ATTR8:[0-9]+]] { ; TUNIT-NEXT: br i1 [[C]], label [[TRUEBB:%.*]], label [[FALSEBB:%.*]] @@ -750,7 +750,7 @@ define void @test13-1(i1 %c, i32* align 32 %dst) { ; TUNIT-NEXT: store i32 0, i32* [[PTR]], align 16 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@test13-1 ; CGSCC-SAME: (i1 [[C:%.*]], i32* nocapture nofree writeonly align 32 [[DST:%.*]]) #[[ATTR9:[0-9]+]] { ; CGSCC-NEXT: br i1 [[C]], label [[TRUEBB:%.*]], label [[FALSEBB:%.*]] @@ -775,7 +775,7 @@ end: } define void @test13-2(i1 %c, i32* align 32 %dst) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@test13-2 ; TUNIT-SAME: (i1 [[C:%.*]], i32* nocapture nofree writeonly align 32 [[DST:%.*]]) #[[ATTR8]] { ; TUNIT-NEXT: br i1 [[C]], label [[TRUEBB:%.*]], label [[FALSEBB:%.*]] @@ -788,7 +788,7 @@ define void @test13-2(i1 %c, i32* align 32 %dst) { ; TUNIT-NEXT: store i32 0, i32* [[PTR]], align 32 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@test13-2 ; CGSCC-SAME: (i1 [[C:%.*]], i32* nocapture nofree writeonly align 32 [[DST:%.*]]) #[[ATTR9]] { ; CGSCC-NEXT: br i1 [[C]], label [[TRUEBB:%.*]], label [[FALSEBB:%.*]] @@ -813,7 +813,7 @@ end: } define void @test13-3(i1 %c, i32* align 32 %dst) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@test13-3 ; TUNIT-SAME: (i1 [[C:%.*]], i32* nocapture nofree writeonly align 32 [[DST:%.*]]) #[[ATTR8]] { ; TUNIT-NEXT: br i1 [[C]], label [[TRUEBB:%.*]], label [[FALSEBB:%.*]] @@ -826,7 +826,7 @@ define void @test13-3(i1 %c, i32* align 32 %dst) { ; TUNIT-NEXT: store i32 0, i32* [[PTR]], align 32 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@test13-3 ; CGSCC-SAME: (i1 [[C:%.*]], i32* nocapture nofree writeonly align 32 [[DST:%.*]]) #[[ATTR9]] { ; CGSCC-NEXT: br i1 [[C]], label [[TRUEBB:%.*]], label [[FALSEBB:%.*]] @@ -852,13 +852,13 @@ end: ; Don't crash on ptr2int/int2ptr uses. define i64 @ptr2int(i32* %p) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@ptr2int -; TUNIT-SAME: (i32* nofree readnone [[P:%.*]]) #[[ATTR9]] { +; TUNIT-SAME: (i32* nofree readnone [[P:%.*]]) #[[ATTR9:[0-9]+]] { ; TUNIT-NEXT: [[P2I:%.*]] = ptrtoint i32* [[P]] to i64 ; TUNIT-NEXT: ret i64 [[P2I]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@ptr2int ; CGSCC-SAME: (i32* nofree readnone [[P:%.*]]) #[[ATTR10:[0-9]+]] { ; CGSCC-NEXT: [[P2I:%.*]] = ptrtoint i32* [[P]] to i64 @@ -868,13 +868,13 @@ define i64 @ptr2int(i32* %p) { ret i64 %p2i } define i64* @int2ptr(i64 %i) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@int2ptr ; TUNIT-SAME: (i64 [[I:%.*]]) #[[ATTR9]] { ; TUNIT-NEXT: [[I2P:%.*]] = inttoptr i64 [[I]] to i64* ; TUNIT-NEXT: ret i64* [[I2P]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@int2ptr ; CGSCC-SAME: (i64 [[I:%.*]]) #[[ATTR10]] { ; CGSCC-NEXT: [[I2P:%.*]] = inttoptr i64 [[I]] to i64* @@ -886,13 +886,13 @@ define i64* @int2ptr(i64 %i) { ; Use the store alignment only for the pointer operand. define void @aligned_store(i8* %Value, i8** %Ptr) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@aligned_store ; TUNIT-SAME: (i8* nofree writeonly [[VALUE:%.*]], i8** nocapture nofree noundef nonnull writeonly align 32 dereferenceable(8) [[PTR:%.*]]) #[[ATTR5]] { ; TUNIT-NEXT: store i8* [[VALUE]], i8** [[PTR]], align 32 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@aligned_store ; CGSCC-SAME: (i8* nofree writeonly [[VALUE:%.*]], i8** nocapture nofree noundef nonnull writeonly align 32 dereferenceable(8) [[PTR:%.*]]) #[[ATTR6]] { ; CGSCC-NEXT: store i8* [[VALUE]], i8** [[PTR]], align 32 @@ -916,14 +916,14 @@ define void @align_call_op_not_store(i8* align 2048 %arg) { } define void @align_store_after_bc(i32* align 2048 %arg) { ; -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@align_store_after_bc ; TUNIT-SAME: (i32* nocapture nofree nonnull writeonly align 2048 dereferenceable(1) [[ARG:%.*]]) #[[ATTR5]] { ; TUNIT-NEXT: [[BC:%.*]] = bitcast i32* [[ARG]] to i8* ; TUNIT-NEXT: store i8 0, i8* [[BC]], align 2048 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@align_store_after_bc ; CGSCC-SAME: (i32* nocapture nofree nonnull writeonly align 2048 dereferenceable(1) [[ARG:%.*]]) #[[ATTR6]] { ; CGSCC-NEXT: [[BC:%.*]] = bitcast i32* [[ARG]] to i8* @@ -939,13 +939,13 @@ define void @align_store_after_bc(i32* align 2048 %arg) { ; we cannot also put on the caller. @cnd = external global i1 define i32 @musttail_callee_1(i32* %p) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@musttail_callee_1 ; TUNIT-SAME: (i32* nocapture nofree noundef nonnull readonly dereferenceable(4) [[P:%.*]]) #[[ATTR4]] { ; TUNIT-NEXT: [[V:%.*]] = load i32, i32* [[P]], align 32 ; TUNIT-NEXT: ret i32 [[V]] ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@musttail_callee_1 ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull readonly dereferenceable(4) [[P:%.*]]) #[[ATTR5]] { ; CGSCC-NEXT: [[V:%.*]] = load i32, i32* [[P]], align 32 @@ -955,24 +955,24 @@ define i32 @musttail_callee_1(i32* %p) { ret i32 %v } define i32 @musttail_caller_1(i32* %p) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) ; TUNIT-LABEL: define {{[^@]+}}@musttail_caller_1 ; TUNIT-SAME: (i32* nocapture nofree readonly [[P:%.*]]) #[[ATTR10:[0-9]+]] { ; TUNIT-NEXT: [[C:%.*]] = load i1, i1* @cnd, align 1 ; TUNIT-NEXT: br i1 [[C]], label [[MT:%.*]], label [[EXIT:%.*]] ; TUNIT: mt: -; TUNIT-NEXT: [[V:%.*]] = musttail call i32 @musttail_callee_1(i32* nocapture nofree readonly [[P]]) #[[ATTR11:[0-9]+]] +; TUNIT-NEXT: [[V:%.*]] = musttail call i32 @musttail_callee_1(i32* nocapture nofree readonly [[P]]) #[[ATTR12:[0-9]+]] ; TUNIT-NEXT: ret i32 [[V]] ; TUNIT: exit: ; TUNIT-NEXT: ret i32 0 ; -; CGSCC: Function Attrs: nofree nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(read) ; CGSCC-LABEL: define {{[^@]+}}@musttail_caller_1 ; CGSCC-SAME: (i32* nocapture nofree readonly [[P:%.*]]) #[[ATTR11:[0-9]+]] { ; CGSCC-NEXT: [[C:%.*]] = load i1, i1* @cnd, align 1 ; CGSCC-NEXT: br i1 [[C]], label [[MT:%.*]], label [[EXIT:%.*]] ; CGSCC: mt: -; CGSCC-NEXT: [[V:%.*]] = musttail call i32 @musttail_callee_1(i32* nocapture nofree noundef nonnull readonly dereferenceable(4) [[P]]) #[[ATTR14:[0-9]+]] +; CGSCC-NEXT: [[V:%.*]] = musttail call i32 @musttail_callee_1(i32* nocapture nofree noundef nonnull readonly dereferenceable(4) [[P]]) #[[ATTR13]] ; CGSCC-NEXT: ret i32 [[V]] ; CGSCC: exit: ; CGSCC-NEXT: ret i32 0 @@ -1051,7 +1051,7 @@ declare void @align4_callee(i8* align(4) %p) @G = global i8 0, align 32 define internal i8* @aligned_8_return(i8* %a, i1 %c1, i1 %c2) norecurse { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@aligned_8_return ; TUNIT-SAME: (i8* noalias nofree readnone align 16 "no-capture-maybe-returned" [[A:%.*]], i1 [[C1:%.*]], i1 [[C2:%.*]]) #[[ATTR9]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i8*, align 8 @@ -1068,7 +1068,7 @@ define internal i8* @aligned_8_return(i8* %a, i1 %c1, i1 %c2) norecurse { ; TUNIT-NEXT: [[L:%.*]] = load i8*, i8** [[STACK]], align 8 ; TUNIT-NEXT: ret i8* [[L]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@aligned_8_return ; CGSCC-SAME: (i8* noalias nofree readnone align 16 "no-capture-maybe-returned" [[A:%.*]], i1 [[C1:%.*]], i1 [[C2:%.*]]) #[[ATTR10]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i8*, align 8 @@ -1101,13 +1101,13 @@ end: } define i8* @aligned_8_return_caller(i8* align(16) %a, i1 %c1, i1 %c2) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@aligned_8_return_caller ; TUNIT-SAME: (i8* nofree readnone align 16 "no-capture-maybe-returned" [[A:%.*]], i1 [[C1:%.*]], i1 [[C2:%.*]]) #[[ATTR9]] { -; TUNIT-NEXT: [[R:%.*]] = call align 8 i8* @aligned_8_return(i8* noalias nofree readnone align 16 "no-capture-maybe-returned" [[A]], i1 [[C1]], i1 [[C2]]) #[[ATTR12:[0-9]+]] +; TUNIT-NEXT: [[R:%.*]] = call align 8 i8* @aligned_8_return(i8* noalias nofree readnone align 16 "no-capture-maybe-returned" [[A]], i1 [[C1]], i1 [[C2]]) #[[ATTR12]] ; TUNIT-NEXT: ret i8* [[R]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@aligned_8_return_caller ; CGSCC-SAME: (i8* nofree readnone align 16 [[A:%.*]], i1 [[C1:%.*]], i1 [[C2:%.*]]) #[[ATTR12:[0-9]+]] { ; CGSCC-NEXT: [[R:%.*]] = call align 8 i8* @aligned_8_return(i8* noalias nofree readnone align 16 [[A]], i1 [[C1]], i1 [[C2]]) #[[ATTR13]] @@ -1121,33 +1121,32 @@ attributes #0 = { nounwind uwtable noinline } attributes #1 = { uwtable noinline } attributes #2 = { null_pointer_is_valid } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind readnone willreturn uwtable } -; TUNIT: attributes #[[ATTR1]] = { nofree noinline nosync nounwind readnone willreturn uwtable } +; TUNIT: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable } +; TUNIT: attributes #[[ATTR1]] = { nofree noinline nosync nounwind willreturn memory(none) uwtable } ; TUNIT: attributes #[[ATTR2]] = { nounwind } ; TUNIT: attributes #[[ATTR3]] = { nofree nosync nounwind } -; TUNIT: attributes #[[ATTR4]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR5]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } +; TUNIT: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; TUNIT: attributes #[[ATTR5]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } ; TUNIT: attributes #[[ATTR6]] = { nounwind willreturn } -; TUNIT: attributes #[[ATTR7]] = { argmemonly nofree noinline norecurse nosync nounwind willreturn writeonly uwtable } -; TUNIT: attributes #[[ATTR8]] = { nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR10]] = { nofree norecurse nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR11]] = { nofree nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR12]] = { nofree nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR7]] = { nofree noinline norecurse nosync nounwind willreturn memory(argmem: write) uwtable } +; TUNIT: attributes #[[ATTR8]] = { nofree norecurse nosync nounwind willreturn memory(write) } +; TUNIT: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR10]] = { nofree norecurse nosync nounwind willreturn memory(read) } +; TUNIT: attributes #[[ATTR11]] = { nofree norecurse nosync nounwind willreturn } +; TUNIT: attributes #[[ATTR12]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind readnone willreturn uwtable } +; CGSCC: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable } ; CGSCC: attributes #[[ATTR1]] = { noinline nounwind uwtable } -; CGSCC: attributes #[[ATTR2]] = { nofree noinline nosync nounwind readnone willreturn uwtable } +; CGSCC: attributes #[[ATTR2]] = { nofree noinline nosync nounwind willreturn memory(none) uwtable } ; CGSCC: attributes #[[ATTR3]] = { nounwind } ; CGSCC: attributes #[[ATTR4]] = { nofree nosync nounwind } -; CGSCC: attributes #[[ATTR5]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR6]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } +; CGSCC: attributes #[[ATTR5]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; CGSCC: attributes #[[ATTR6]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } ; CGSCC: attributes #[[ATTR7]] = { nounwind willreturn } -; CGSCC: attributes #[[ATTR8]] = { argmemonly nofree noinline norecurse nosync nounwind willreturn writeonly uwtable } -; CGSCC: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR10]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR11]] = { nofree nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR12]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR13]] = { readnone willreturn } -; CGSCC: attributes #[[ATTR14]] = { readonly willreturn } +; CGSCC: attributes #[[ATTR8]] = { nofree noinline norecurse nosync nounwind willreturn memory(argmem: write) uwtable } +; CGSCC: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind willreturn memory(write) } +; CGSCC: attributes #[[ATTR10]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR11]] = { nofree nosync nounwind willreturn memory(read) } +; CGSCC: attributes #[[ATTR12]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR13]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/allow_list.ll b/llvm/test/Transforms/Attributor/allow_list.ll index 65650d174e3c1..31cb6bbc66281 100644 --- a/llvm/test/Transforms/Attributor/allow_list.ll +++ b/llvm/test/Transforms/Attributor/allow_list.ll @@ -35,7 +35,7 @@ define internal i32 @range_test(i32 %a) #0 { ; CHECK_DISABLED_FUNCTION-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32 ; CHECK_DISABLED_FUNCTION-NEXT: ret i32 [[TMP2]] ; -; CHECK_ENABLED_FUNCTION: Function Attrs: noinline nounwind readnone uwtable +; CHECK_ENABLED_FUNCTION: Function Attrs: noinline nounwind memory(none) uwtable ; CHECK_ENABLED_FUNCTION-LABEL: define {{[^@]+}}@range_test ; CHECK_ENABLED_FUNCTION-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK_ENABLED_FUNCTION-NEXT: ret i32 1 @@ -65,7 +65,7 @@ define i32 @range_use1() #0 { ; CHECK_DISABLED_FUNCTION-NEXT: [[TMP1:%.*]] = call i32 @range_test(i32 123) ; CHECK_DISABLED_FUNCTION-NEXT: ret i32 [[TMP1]] ; -; CHECK_ENABLED_FUNCTION: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK_ENABLED_FUNCTION: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK_ENABLED_FUNCTION-LABEL: define {{[^@]+}}@range_use1 ; CHECK_ENABLED_FUNCTION-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK_ENABLED_FUNCTION-NEXT: ret i32 1 @@ -112,7 +112,7 @@ attributes #0 = { nounwind uwtable noinline } ;. ; CHECK_DISABLED_FUNCTION: attributes #[[ATTR0]] = { noinline nounwind uwtable } ;. -; CHECK_ENABLED_FUNCTION: attributes #[[ATTR0]] = { noinline nounwind readnone uwtable } -; CHECK_ENABLED_FUNCTION: attributes #[[ATTR1]] = { nofree noinline norecurse nosync nounwind readnone willreturn uwtable } +; CHECK_ENABLED_FUNCTION: attributes #[[ATTR0]] = { noinline nounwind memory(none) uwtable } +; CHECK_ENABLED_FUNCTION: attributes #[[ATTR1]] = { nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable } ; CHECK_ENABLED_FUNCTION: attributes #[[ATTR2]] = { noinline nounwind uwtable } ;. diff --git a/llvm/test/Transforms/Attributor/alwaysinline.ll b/llvm/test/Transforms/Attributor/alwaysinline.ll index 72847475b68d2..e1602a38a9f6f 100644 --- a/llvm/test/Transforms/Attributor/alwaysinline.ll +++ b/llvm/test/Transforms/Attributor/alwaysinline.ll @@ -8,7 +8,7 @@ ; the function is not exactly defined, and marked alwaysinline and can be inlined, ; so the function can be analyzed define linkonce void @inner1() alwaysinline { -; CHECK: Function Attrs: alwaysinline nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: alwaysinline nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@inner1 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -19,13 +19,13 @@ entry: } define void @outer1() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@outer1 ; TUNIT-SAME: () #[[ATTR1:[0-9]+]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@outer1 ; CGSCC-SAME: () #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -122,12 +122,12 @@ define i32 @outer3(i32 %x) { ret i32 %call } ;. -; TUNIT: attributes #[[ATTR0]] = { alwaysinline nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { alwaysinline nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } ; TUNIT: attributes #[[ATTR2]] = { norecurse } ; TUNIT: attributes #[[ATTR3]] = { alwaysinline } ;. -; CGSCC: attributes #[[ATTR0]] = { alwaysinline nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { alwaysinline nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } ; CGSCC: attributes #[[ATTR2]] = { alwaysinline } ;. diff --git a/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll index 30a8283475178..2119961d37a12 100644 --- a/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll +++ b/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=TUNIT +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=TUNIT ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CGSCC ; define internal i8 @read_arg(i8* %p) { -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@read_arg ; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -17,14 +17,14 @@ entry: } define internal i8 @read_arg_index(i8* %p, i64 %index) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@read_arg_index ; TUNIT-SAME: (i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[P:%.*]]) #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[L:%.*]] = load i8, i8* [[P]], align 2 ; TUNIT-NEXT: ret i8 [[L]] ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@read_arg_index ; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: entry: @@ -38,7 +38,7 @@ entry: } define i8 @call_simplifiable_1() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@call_simplifiable_1 ; TUNIT-SAME: () #[[ATTR1:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -46,14 +46,14 @@ define i8 @call_simplifiable_1() { ; TUNIT-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 ; TUNIT-NEXT: ret i8 2 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@call_simplifiable_1 ; CGSCC-SAME: () #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 ; CGSCC-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 ; CGSCC-NEXT: store i8 2, i8* [[I0]], align 2 -; CGSCC-NEXT: [[R:%.*]] = call i8 @read_arg(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR3:[0-9]+]] +; CGSCC-NEXT: [[R:%.*]] = call i8 @read_arg(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR2:[0-9]+]] ; CGSCC-NEXT: ret i8 [[R]] ; entry: @@ -64,70 +64,8 @@ entry: ret i8 %r } -;;; Same as read_arg, but we need a copy to form distinct leaves in the callgraph. - -define internal i8 @read_arg_1(i8* %p) { -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn -; CGSCC-LABEL: define {{[^@]+}}@read_arg_1 -; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1) [[P:%.*]]) #[[ATTR0]] { -; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[L:%.*]] = load i8, i8* [[P]], align 1 -; CGSCC-NEXT: ret i8 [[L]] -; -entry: - %l = load i8, i8* %p, align 1 - ret i8 %l -} - -define internal i8 @sum_two_same_loads(i8* %p) { -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn -; CGSCC-LABEL: define {{[^@]+}}@sum_two_same_loads -; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P:%.*]]) #[[ATTR2:[0-9]+]] { -; CGSCC-NEXT: [[X:%.*]] = call i8 @read_arg_1(i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P]]) #[[ATTR3]] -; CGSCC-NEXT: [[Y:%.*]] = call i8 @read_arg_1(i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P]]) #[[ATTR3]] -; CGSCC-NEXT: [[Z:%.*]] = add nsw i8 [[X]], [[Y]] -; CGSCC-NEXT: ret i8 [[Z]] -; - %x = call i8 @read_arg_1(i8* %p) - %y = call i8 @read_arg_1(i8* %p) - %z = add nsw i8 %x, %y - ret i8 %z -} - -define i8 @call_simplifiable_2() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn -; TUNIT-LABEL: define {{[^@]+}}@call_simplifiable_2 -; TUNIT-SAME: () #[[ATTR1]] { -; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 -; TUNIT-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 -; TUNIT-NEXT: [[I1:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 3 -; TUNIT-NEXT: ret i8 4 -; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn -; CGSCC-LABEL: define {{[^@]+}}@call_simplifiable_2 -; CGSCC-SAME: () #[[ATTR1]] { -; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 -; CGSCC-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 -; CGSCC-NEXT: store i8 2, i8* [[I0]], align 2 -; CGSCC-NEXT: [[I1:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 3 -; CGSCC-NEXT: store i8 3, i8* [[I1]], align 1 -; CGSCC-NEXT: [[R:%.*]] = call i8 @sum_two_same_loads(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR3]] -; CGSCC-NEXT: ret i8 [[R]] -; -entry: - %Bytes = alloca [1024 x i8], align 16 - %i0 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 2 - store i8 2, i8* %i0 - %i1 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 3 - store i8 3, i8* %i1 - %r = call i8 @sum_two_same_loads(i8* %i0) - ret i8 %r -} - define i8 @call_not_simplifiable_1() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@call_not_simplifiable_1 ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: entry: @@ -137,14 +75,14 @@ define i8 @call_not_simplifiable_1() { ; TUNIT-NEXT: [[R:%.*]] = call i8 @read_arg_index(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR2:[0-9]+]] ; TUNIT-NEXT: ret i8 [[R]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@call_not_simplifiable_1 ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 ; CGSCC-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 ; CGSCC-NEXT: store i8 2, i8* [[I0]], align 2 -; CGSCC-NEXT: [[R:%.*]] = call i8 @read_arg_index(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR3]] +; CGSCC-NEXT: [[R:%.*]] = call i8 @read_arg_index(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]]) #[[ATTR2]] ; CGSCC-NEXT: ret i8 [[R]] ; entry: @@ -155,96 +93,12 @@ entry: ret i8 %r } -;;; Same as read_arg, but we need a copy to form distinct leaves in the callgraph. - -define internal i8 @read_arg_2(i8* %p) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn -; TUNIT-LABEL: define {{[^@]+}}@read_arg_2 -; TUNIT-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[P:%.*]]) #[[ATTR0]] { -; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[L:%.*]] = load i8, i8* [[P]], align 1 -; TUNIT-NEXT: ret i8 [[L]] -; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn -; CGSCC-LABEL: define {{[^@]+}}@read_arg_2 -; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1) [[P:%.*]]) #[[ATTR0]] { -; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[L:%.*]] = load i8, i8* [[P]], align 1 -; CGSCC-NEXT: ret i8 [[L]] -; -entry: - %l = load i8, i8* %p, align 1 - ret i8 %l -} - -define internal i8 @sum_two_different_loads(i8* %p, i8* %q) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn -; TUNIT-LABEL: define {{[^@]+}}@sum_two_different_loads -; TUNIT-SAME: (i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[P:%.*]], i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[Q:%.*]]) #[[ATTR0]] { -; TUNIT-NEXT: [[X:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[P]]) #[[ATTR2]] -; TUNIT-NEXT: [[Y:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[Q]]) #[[ATTR2]] -; TUNIT-NEXT: [[Z:%.*]] = add nsw i8 [[X]], [[Y]] -; TUNIT-NEXT: ret i8 [[Z]] -; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn -; CGSCC-LABEL: define {{[^@]+}}@sum_two_different_loads -; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P:%.*]], i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[Q:%.*]]) #[[ATTR2]] { -; CGSCC-NEXT: [[X:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly dereferenceable(1022) [[P]]) #[[ATTR3]] -; CGSCC-NEXT: [[Y:%.*]] = call i8 @read_arg_2(i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[Q]]) #[[ATTR3]] -; CGSCC-NEXT: [[Z:%.*]] = add nsw i8 [[X]], [[Y]] -; CGSCC-NEXT: ret i8 [[Z]] -; - %x = call i8 @read_arg_2(i8* %p) - %y = call i8 @read_arg_2(i8* %q) - %z = add nsw i8 %x, %y - ret i8 %z -} - -define i8 @call_not_simplifiable_2() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn -; TUNIT-LABEL: define {{[^@]+}}@call_not_simplifiable_2 -; TUNIT-SAME: () #[[ATTR1]] { -; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 -; TUNIT-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 -; TUNIT-NEXT: store i8 2, i8* [[I0]], align 2 -; TUNIT-NEXT: [[I1:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 3 -; TUNIT-NEXT: store i8 3, i8* [[I1]], align 1 -; TUNIT-NEXT: [[BASE:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 0 -; TUNIT-NEXT: [[R:%.*]] = call i8 @sum_two_different_loads(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]], i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[I1]]) #[[ATTR2]] -; TUNIT-NEXT: ret i8 [[R]] -; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn -; CGSCC-LABEL: define {{[^@]+}}@call_not_simplifiable_2 -; CGSCC-SAME: () #[[ATTR1]] { -; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16 -; CGSCC-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 2 -; CGSCC-NEXT: store i8 2, i8* [[I0]], align 2 -; CGSCC-NEXT: [[I1:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 3 -; CGSCC-NEXT: store i8 3, i8* [[I1]], align 1 -; CGSCC-NEXT: [[BASE:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 0 -; CGSCC-NEXT: [[R:%.*]] = call i8 @sum_two_different_loads(i8* nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I0]], i8* nocapture nofree noundef nonnull readonly dereferenceable(1021) [[I1]]) #[[ATTR3]] -; CGSCC-NEXT: ret i8 [[R]] -; -entry: - %Bytes = alloca [1024 x i8], align 16 - %i0 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 2 - store i8 2, i8* %i0 - %i1 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 3 - store i8 3, i8* %i1 - %base = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 0 - %r = call i8 @sum_two_different_loads(i8* %i0, i8* %i1) - ret i8 %r -} - ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR2]] = { nofree nosync nounwind readonly willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { argmemonly nofree nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR3]] = { readonly willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/cb_liveness_disabled.ll b/llvm/test/Transforms/Attributor/cb_liveness_disabled.ll index 16c57b1a1e8c6..d74fef2a0b373 100644 --- a/llvm/test/Transforms/Attributor/cb_liveness_disabled.ll +++ b/llvm/test/Transforms/Attributor/cb_liveness_disabled.ll @@ -192,10 +192,10 @@ attributes #0 = { noinline nounwind sspstrong uwtable} ; TUNIT_: !0 = !{i32 0, i32 101} ; TUNIT_: !1 = !{i32 100, i32 201} ;. -; TUNIT: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind readnone sspstrong willreturn uwtable } -; TUNIT: attributes #[[ATTR1:[0-9]+]] = { nofree nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind sspstrong willreturn memory(none) uwtable } +; TUNIT: attributes #[[ATTR1:[0-9]+]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind readnone sspstrong willreturn uwtable } -; CGSCC: attributes #[[ATTR1]] = { nofree noinline nosync nounwind readnone sspstrong willreturn uwtable } -; CGSCC: attributes #[[ATTR2:[0-9]+]] = { readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind sspstrong willreturn memory(none) uwtable } +; CGSCC: attributes #[[ATTR1]] = { nofree noinline nosync nounwind sspstrong willreturn memory(none) uwtable } +; CGSCC: attributes #[[ATTR2:[0-9]+]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/cb_liveness_enabled.ll b/llvm/test/Transforms/Attributor/cb_liveness_enabled.ll index 3f26a19cb0825..54c61aace5260 100644 --- a/llvm/test/Transforms/Attributor/cb_liveness_enabled.ll +++ b/llvm/test/Transforms/Attributor/cb_liveness_enabled.ll @@ -195,10 +195,10 @@ attributes #0 = { noinline nounwind sspstrong uwtable} ; TUNIT_: !0 = !{i32 0, i32 101} ; TUNIT_: !1 = !{i32 100, i32 201} ;. -; TUNIT: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind readnone sspstrong willreturn uwtable } -; TUNIT: attributes #[[ATTR1:[0-9]+]] = { nofree nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind sspstrong willreturn memory(none) uwtable } +; TUNIT: attributes #[[ATTR1:[0-9]+]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind readnone sspstrong willreturn uwtable } -; CGSCC: attributes #[[ATTR1]] = { nofree noinline nosync nounwind readnone sspstrong willreturn uwtable } -; CGSCC: attributes #[[ATTR2:[0-9]+]] = { readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind sspstrong willreturn memory(none) uwtable } +; CGSCC: attributes #[[ATTR1]] = { nofree noinline nosync nounwind sspstrong willreturn memory(none) uwtable } +; CGSCC: attributes #[[ATTR2:[0-9]+]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/cb_range_disabled.ll b/llvm/test/Transforms/Attributor/cb_range_disabled.ll index e3771fd70104f..9463b3d840138 100644 --- a/llvm/test/Transforms/Attributor/cb_range_disabled.ll +++ b/llvm/test/Transforms/Attributor/cb_range_disabled.ll @@ -141,10 +141,10 @@ define i32 @test2_ncheck(i32 %unknown) { ret i32 %3 } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR1:[0-9]+]] = { nofree nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR1:[0-9]+]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2:[0-9]+]] = { readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2:[0-9]+]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/cb_range_enabled.ll b/llvm/test/Transforms/Attributor/cb_range_enabled.ll index 674e45e0f1aaa..42663300eacf6 100644 --- a/llvm/test/Transforms/Attributor/cb_range_enabled.ll +++ b/llvm/test/Transforms/Attributor/cb_range_enabled.ll @@ -145,10 +145,10 @@ define i32 @test2_ncheck(i32 %unknown) { ret i32 %3 } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR1:[0-9]+]] = { nofree nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR1:[0-9]+]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2:[0-9]+]] = { readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2:[0-9]+]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/depgraph.ll b/llvm/test/Transforms/Attributor/depgraph.ll index 48d11a76e313e..81b13489703c2 100644 --- a/llvm/test/Transforms/Attributor/depgraph.ll +++ b/llvm/test/Transforms/Attributor/depgraph.ll @@ -14,7 +14,7 @@ ; } ; define i32* @checkAndAdvance(i32* align 16 %0) { -; CHECK: Function Attrs: argmemonly nofree nosync nounwind readonly +; CHECK: Function Attrs: nofree nosync nounwind memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@checkAndAdvance ; CHECK-SAME: (i32* nofree noundef nonnull readonly align 16 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 16 @@ -380,6 +380,6 @@ define i32* @checkAndAdvance(i32* align 16 %0) { ; DOT-DAG: Node[[Node44]] -> Node[[Node43]]; ; DOT-DAG: Node[[Node43]] -> Node[[Node44]]; ;. -; CHECK: attributes #[[ATTR0]] = { argmemonly nofree nosync nounwind readonly } -; CHECK: attributes #[[ATTR1]] = { nofree nosync nounwind readonly } +; CHECK: attributes #[[ATTR0]] = { nofree nosync nounwind memory(argmem: read) } +; CHECK: attributes #[[ATTR1]] = { nofree nosync nounwind } ;. diff --git a/llvm/test/Transforms/Attributor/dereferenceable-1.ll b/llvm/test/Transforms/Attributor/dereferenceable-1.ll index fee64076d90f2..ec3a106d5fa19 100644 --- a/llvm/test/Transforms/Attributor/dereferenceable-1.ll +++ b/llvm/test/Transforms/Attributor/dereferenceable-1.ll @@ -12,7 +12,7 @@ declare void @deref_phi_user(i32* %a); ; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = global i64 0 ;. define i32* @test1(i32* dereferenceable(4) %0, double* dereferenceable(8) %1, i1 zeroext %2) local_unnamed_addr { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test1 ; CHECK-SAME: (i32* nofree nonnull readnone dereferenceable(4) "no-capture-maybe-returned" [[TMP0:%.*]], double* nofree nonnull readnone dereferenceable(8) "no-capture-maybe-returned" [[TMP1:%.*]], i1 zeroext [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP1]] to i32* @@ -26,7 +26,7 @@ define i32* @test1(i32* dereferenceable(4) %0, double* dereferenceable(8) %1, i1 ; TEST 2 define i32* @test2(i32* dereferenceable_or_null(4) %0, double* dereferenceable(8) %1, i1 zeroext %2) local_unnamed_addr { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test2 ; CHECK-SAME: (i32* nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[TMP0:%.*]], double* nofree nonnull readnone dereferenceable(8) "no-capture-maybe-returned" [[TMP1:%.*]], i1 zeroext [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { ; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP1]] to i32* @@ -41,7 +41,7 @@ define i32* @test2(i32* dereferenceable_or_null(4) %0, double* dereferenceable(8 ; TEST 3 ; GEP inbounds define i32* @test3_1(i32* dereferenceable(8) %0) local_unnamed_addr { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test3_1 ; CHECK-SAME: (i32* nofree nonnull readnone dereferenceable(8) "no-capture-maybe-returned" [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 1 @@ -52,7 +52,7 @@ define i32* @test3_1(i32* dereferenceable(8) %0) local_unnamed_addr { } define i32* @test3_2(i32* dereferenceable_or_null(32) %0) local_unnamed_addr { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test3_2 ; CHECK-SAME: (i32* nofree readnone dereferenceable_or_null(32) "no-capture-maybe-returned" [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 4 @@ -63,7 +63,7 @@ define i32* @test3_2(i32* dereferenceable_or_null(32) %0) local_unnamed_addr { } define i32* @test3_3(i32* dereferenceable(8) %0, i32* dereferenceable(16) %1, i1 %2) local_unnamed_addr { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test3_3 ; CHECK-SAME: (i32* nofree nonnull readnone dereferenceable(8) "no-capture-maybe-returned" [[TMP0:%.*]], i32* nofree nonnull readnone dereferenceable(16) "no-capture-maybe-returned" [[TMP1:%.*]], i1 [[TMP2:%.*]]) local_unnamed_addr #[[ATTR0]] { ; CHECK-NEXT: [[RET1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 1 @@ -81,7 +81,7 @@ define i32* @test3_3(i32* dereferenceable(8) %0, i32* dereferenceable(16) %1, i1 ; Better than known in IR. define dereferenceable(4) i32* @test4(i32* dereferenceable(8) %0) local_unnamed_addr { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test4 ; CHECK-SAME: (i32* nofree nonnull readnone returned dereferenceable(8) "no-capture-maybe-returned" [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { ; CHECK-NEXT: ret i32* [[TMP0]] @@ -284,7 +284,7 @@ define i32* @f7_3() { ; FIXME: This should have a return dereferenceable(8) but we need to make sure it will work in loops as well. define i32* @test_for_minus_index(i32* %p) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@test_for_minus_index ; CHECK-SAME: (i32* nofree nonnull writeonly align 4 "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: [[Q:%.*]] = getelementptr inbounds i32, i32* [[P]], i32 -2 @@ -297,7 +297,7 @@ define i32* @test_for_minus_index(i32* %p) { } define void @deref_or_null_and_nonnull(i32* dereferenceable_or_null(100) %0) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@deref_or_null_and_nonnull ; CHECK-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(100) [[TMP0:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: store i32 1, i32* [[TMP0]], align 4 @@ -316,7 +316,7 @@ define void @deref_or_null_and_nonnull(i32* dereferenceable_or_null(100) %0) { ; FIXME: %ptr should be dereferenceable(31) define void @test8(i8* %ptr) #0 { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@test8 ; CHECK-SAME: (i8* nocapture nofree nonnull writeonly dereferenceable(21) [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: br label [[TMP1:%.*]] @@ -351,7 +351,7 @@ define void @test8(i8* %ptr) #0 { ; 8.2 (negative case) define void @test8_neg(i32 %i, i8* %ptr) #0 { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@test8_neg ; CHECK-SAME: (i32 [[I:%.*]], i8* nocapture nofree nonnull writeonly [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[I]] to i64 @@ -374,7 +374,7 @@ define void @test8_neg(i32 %i, i8* %ptr) #0 { ; NOTE: %p should not be dereferenceable define internal void @fill_range_not_inbounds(i32* %p, i64 %start){ -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@fill_range_not_inbounds ; CHECK-SAME: (i32* nocapture nofree writeonly [[P:%.*]], i64 [[START:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: entry: @@ -410,7 +410,7 @@ for.body: ; preds = %entry, %for.body ; FIXME: %p should be dereferenceable(40) define internal void @fill_range_inbounds(i32* %p, i64 %start){ -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@fill_range_inbounds ; CHECK-SAME: (i32* nocapture nofree writeonly [[P:%.*]], i64 [[START:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: entry: @@ -445,7 +445,7 @@ for.body: ; preds = %entry, %for.body } define void @call_fill_range(i32* nocapture %p, i64* nocapture readonly %range) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@call_fill_range ; TUNIT-SAME: (i32* nocapture nofree writeonly [[P:%.*]], i64* nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[RANGE:%.*]]) #[[ATTR3:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -454,13 +454,13 @@ define void @call_fill_range(i32* nocapture %p, i64* nocapture readonly %range) ; TUNIT-NEXT: tail call void @fill_range_not_inbounds(i32* nocapture nofree writeonly [[P]], i64 [[TMP0]]) #[[ATTR6]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@call_fill_range ; CGSCC-SAME: (i32* nocapture nofree writeonly [[P:%.*]], i64* nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[RANGE:%.*]]) #[[ATTR3:[0-9]+]] { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: [[TMP0:%.*]] = load i64, i64* [[RANGE]], align 8, !range [[RNG0:![0-9]+]] -; CGSCC-NEXT: tail call void @fill_range_inbounds(i32* nocapture nofree writeonly [[P]], i64 [[TMP0]]) #[[ATTR6:[0-9]+]] -; CGSCC-NEXT: tail call void @fill_range_not_inbounds(i32* nocapture nofree writeonly [[P]], i64 [[TMP0]]) #[[ATTR6]] +; CGSCC-NEXT: tail call void @fill_range_inbounds(i32* nocapture nofree writeonly [[P]], i64 [[TMP0]]) #[[ATTR1]] +; CGSCC-NEXT: tail call void @fill_range_not_inbounds(i32* nocapture nofree writeonly [[P]], i64 [[TMP0]]) #[[ATTR1]] ; CGSCC-NEXT: ret void ; entry: @@ -563,7 +563,7 @@ cont2: ; ; FIXME: %ptr should be dereferenceable(4) define dso_local void @rec-branch-1(i32 %a, i32 %b, i32 %c, i32* %ptr) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@rec-branch-1 ; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32* nocapture nofree writeonly [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: entry: @@ -637,32 +637,59 @@ if.end8: ; preds = %if.then5, %if.else6 ; } ; FIXME: %ptr should be dereferenceable(4) define dso_local void @rec-branch-2(i32 %a, i32 %b, i32 %c, i32* %ptr) { -; CHECK: Function Attrs: argmemonly nofree nosync nounwind writeonly -; CHECK-LABEL: define {{[^@]+}}@rec-branch-2 -; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32* nocapture nofree writeonly [[PTR:%.*]]) #[[ATTR4:[0-9]+]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[A]], 0 -; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_ELSE3:%.*]], label [[IF_THEN:%.*]] -; CHECK: if.then: -; CHECK-NEXT: [[TOBOOL1:%.*]] = icmp eq i32 [[B]], 0 -; CHECK-NEXT: br i1 [[TOBOOL1]], label [[IF_ELSE:%.*]], label [[IF_THEN2:%.*]] -; CHECK: if.then2: -; CHECK-NEXT: store i32 1, i32* [[PTR]], align 4 -; CHECK-NEXT: br label [[IF_END8:%.*]] -; CHECK: if.else: -; CHECK-NEXT: store i32 2, i32* [[PTR]], align 4 -; CHECK-NEXT: br label [[IF_END8]] -; CHECK: if.else3: -; CHECK-NEXT: [[TOBOOL4:%.*]] = icmp eq i32 [[C]], 0 -; CHECK-NEXT: br i1 [[TOBOOL4]], label [[IF_ELSE6:%.*]], label [[IF_THEN5:%.*]] -; CHECK: if.then5: -; CHECK-NEXT: store i32 3, i32* [[PTR]], align 4 -; CHECK-NEXT: br label [[IF_END8]] -; CHECK: if.else6: -; CHECK-NEXT: tail call void @rec-branch-2(i32 noundef 1, i32 noundef 1, i32 noundef 1, i32* nocapture nofree writeonly [[PTR]]) #[[ATTR7:[0-9]+]] -; CHECK-NEXT: br label [[IF_END8]] -; CHECK: if.end8: -; CHECK-NEXT: ret void +; TUNIT: Function Attrs: nofree nosync nounwind memory(argmem: write) +; TUNIT-LABEL: define {{[^@]+}}@rec-branch-2 +; TUNIT-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32* nocapture nofree writeonly [[PTR:%.*]]) #[[ATTR4:[0-9]+]] { +; TUNIT-NEXT: entry: +; TUNIT-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[A]], 0 +; TUNIT-NEXT: br i1 [[TOBOOL]], label [[IF_ELSE3:%.*]], label [[IF_THEN:%.*]] +; TUNIT: if.then: +; TUNIT-NEXT: [[TOBOOL1:%.*]] = icmp eq i32 [[B]], 0 +; TUNIT-NEXT: br i1 [[TOBOOL1]], label [[IF_ELSE:%.*]], label [[IF_THEN2:%.*]] +; TUNIT: if.then2: +; TUNIT-NEXT: store i32 1, i32* [[PTR]], align 4 +; TUNIT-NEXT: br label [[IF_END8:%.*]] +; TUNIT: if.else: +; TUNIT-NEXT: store i32 2, i32* [[PTR]], align 4 +; TUNIT-NEXT: br label [[IF_END8]] +; TUNIT: if.else3: +; TUNIT-NEXT: [[TOBOOL4:%.*]] = icmp eq i32 [[C]], 0 +; TUNIT-NEXT: br i1 [[TOBOOL4]], label [[IF_ELSE6:%.*]], label [[IF_THEN5:%.*]] +; TUNIT: if.then5: +; TUNIT-NEXT: store i32 3, i32* [[PTR]], align 4 +; TUNIT-NEXT: br label [[IF_END8]] +; TUNIT: if.else6: +; TUNIT-NEXT: tail call void @rec-branch-2(i32 noundef 1, i32 noundef 1, i32 noundef 1, i32* nocapture nofree writeonly [[PTR]]) #[[ATTR7:[0-9]+]] +; TUNIT-NEXT: br label [[IF_END8]] +; TUNIT: if.end8: +; TUNIT-NEXT: ret void +; +; CGSCC: Function Attrs: nofree nosync nounwind memory(argmem: write) +; CGSCC-LABEL: define {{[^@]+}}@rec-branch-2 +; CGSCC-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i32* nocapture nofree writeonly [[PTR:%.*]]) #[[ATTR4:[0-9]+]] { +; CGSCC-NEXT: entry: +; CGSCC-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[A]], 0 +; CGSCC-NEXT: br i1 [[TOBOOL]], label [[IF_ELSE3:%.*]], label [[IF_THEN:%.*]] +; CGSCC: if.then: +; CGSCC-NEXT: [[TOBOOL1:%.*]] = icmp eq i32 [[B]], 0 +; CGSCC-NEXT: br i1 [[TOBOOL1]], label [[IF_ELSE:%.*]], label [[IF_THEN2:%.*]] +; CGSCC: if.then2: +; CGSCC-NEXT: store i32 1, i32* [[PTR]], align 4 +; CGSCC-NEXT: br label [[IF_END8:%.*]] +; CGSCC: if.else: +; CGSCC-NEXT: store i32 2, i32* [[PTR]], align 4 +; CGSCC-NEXT: br label [[IF_END8]] +; CGSCC: if.else3: +; CGSCC-NEXT: [[TOBOOL4:%.*]] = icmp eq i32 [[C]], 0 +; CGSCC-NEXT: br i1 [[TOBOOL4]], label [[IF_ELSE6:%.*]], label [[IF_THEN5:%.*]] +; CGSCC: if.then5: +; CGSCC-NEXT: store i32 3, i32* [[PTR]], align 4 +; CGSCC-NEXT: br label [[IF_END8]] +; CGSCC: if.else6: +; CGSCC-NEXT: tail call void @rec-branch-2(i32 noundef 1, i32 noundef 1, i32 noundef 1, i32* nocapture nofree writeonly [[PTR]]) #[[ATTR6:[0-9]+]] +; CGSCC-NEXT: br label [[IF_END8]] +; CGSCC: if.end8: +; CGSCC-NEXT: ret void ; entry: %tobool = icmp eq i32 %a, 0 @@ -704,11 +731,17 @@ define void @nonnull_assume_pos(i8* %arg1, i8* %arg2, i8* %arg3, i8* %arg4) { ; ATTRIBUTOR-NEXT: call void @unknown() ; ATTRIBUTOR-NEXT: ret void ; -; CHECK-LABEL: define {{[^@]+}}@nonnull_assume_pos -; CHECK-SAME: (i8* nocapture nofree nonnull readnone dereferenceable(101) [[ARG1:%.*]], i8* nocapture nofree readnone dereferenceable_or_null(31) [[ARG2:%.*]], i8* nocapture nofree nonnull readnone [[ARG3:%.*]], i8* nocapture nofree readnone dereferenceable_or_null(42) [[ARG4:%.*]]) { -; CHECK-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR8:[0-9]+]] [ "nonnull"(i8* [[ARG3]]), "dereferenceable"(i8* [[ARG1]], i64 1), "dereferenceable"(i8* [[ARG1]], i64 2), "dereferenceable"(i8* [[ARG1]], i64 101), "dereferenceable_or_null"(i8* [[ARG2]], i64 31), "dereferenceable_or_null"(i8* [[ARG4]], i64 42) ] -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: ret void +; TUNIT-LABEL: define {{[^@]+}}@nonnull_assume_pos +; TUNIT-SAME: (i8* nocapture nofree nonnull readnone dereferenceable(101) [[ARG1:%.*]], i8* nocapture nofree readnone dereferenceable_or_null(31) [[ARG2:%.*]], i8* nocapture nofree nonnull readnone [[ARG3:%.*]], i8* nocapture nofree readnone dereferenceable_or_null(42) [[ARG4:%.*]]) { +; TUNIT-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR8:[0-9]+]] [ "nonnull"(i8* [[ARG3]]), "dereferenceable"(i8* [[ARG1]], i64 1), "dereferenceable"(i8* [[ARG1]], i64 2), "dereferenceable"(i8* [[ARG1]], i64 101), "dereferenceable_or_null"(i8* [[ARG2]], i64 31), "dereferenceable_or_null"(i8* [[ARG4]], i64 42) ] +; TUNIT-NEXT: call void @unknown() +; TUNIT-NEXT: ret void +; +; CGSCC-LABEL: define {{[^@]+}}@nonnull_assume_pos +; CGSCC-SAME: (i8* nocapture nofree nonnull readnone dereferenceable(101) [[ARG1:%.*]], i8* nocapture nofree readnone dereferenceable_or_null(31) [[ARG2:%.*]], i8* nocapture nofree nonnull readnone [[ARG3:%.*]], i8* nocapture nofree readnone dereferenceable_or_null(42) [[ARG4:%.*]]) { +; CGSCC-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR7:[0-9]+]] [ "nonnull"(i8* [[ARG3]]), "dereferenceable"(i8* [[ARG1]], i64 1), "dereferenceable"(i8* [[ARG1]], i64 2), "dereferenceable"(i8* [[ARG1]], i64 101), "dereferenceable_or_null"(i8* [[ARG2]], i64 31), "dereferenceable_or_null"(i8* [[ARG4]], i64 42) ] +; CGSCC-NEXT: call void @unknown() +; CGSCC-NEXT: ret void ; call void @llvm.assume(i1 true) [ "nonnull"(i8* %arg3), "dereferenceable"(i8* %arg1, i64 1), "dereferenceable"(i8* %arg1, i64 2), "dereferenceable"(i8* %arg1, i64 101), "dereferenceable_or_null"(i8* %arg2, i64 31), "dereferenceable_or_null"(i8* %arg4, i64 42)] call void @unknown() @@ -750,23 +783,41 @@ define void @nonnull_assume_call(i8* %arg1, i8* %arg2, i8* %arg3, i8* %arg4) { ; ATTRIBUTOR-NEXT: call void @unknown() ; ATTRIBUTOR-NEXT: ret void ; -; CHECK-LABEL: define {{[^@]+}}@nonnull_assume_call -; CHECK-SAME: (i8* [[ARG1:%.*]], i8* [[ARG2:%.*]], i8* [[ARG3:%.*]], i8* [[ARG4:%.*]]) { -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: [[P:%.*]] = call nonnull dereferenceable(101) i32* @unkown_ptr() #[[ATTR9:[0-9]+]] -; CHECK-NEXT: call void @unknown_use32(i32* nonnull dereferenceable(101) [[P]]) #[[ATTR9]] -; CHECK-NEXT: call void @unknown_use8(i8* dereferenceable_or_null(42) [[ARG4]]) #[[ATTR9]] -; CHECK-NEXT: call void @unknown_use8(i8* nonnull [[ARG3]]) #[[ATTR9]] -; CHECK-NEXT: call void @unknown_use8(i8* dereferenceable_or_null(31) [[ARG2]]) #[[ATTR9]] -; CHECK-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(2) [[ARG1]]) #[[ATTR9]] -; CHECK-NEXT: call void @llvm.assume(i1 noundef true) [ "nonnull"(i8* [[ARG3]]), "dereferenceable"(i8* [[ARG1]], i64 1), "dereferenceable"(i8* [[ARG1]], i64 2), "dereferenceable"(i32* [[P]], i64 101), "dereferenceable_or_null"(i8* [[ARG2]], i64 31), "dereferenceable_or_null"(i8* [[ARG4]], i64 42) ] -; CHECK-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(2) [[ARG1]]) #[[ATTR9]] -; CHECK-NEXT: call void @unknown_use8(i8* dereferenceable_or_null(31) [[ARG2]]) #[[ATTR9]] -; CHECK-NEXT: call void @unknown_use8(i8* nonnull [[ARG3]]) #[[ATTR9]] -; CHECK-NEXT: call void @unknown_use8(i8* dereferenceable_or_null(42) [[ARG4]]) #[[ATTR9]] -; CHECK-NEXT: call void @unknown_use32(i32* nonnull dereferenceable(101) [[P]]) #[[ATTR9]] -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: ret void +; TUNIT-LABEL: define {{[^@]+}}@nonnull_assume_call +; TUNIT-SAME: (i8* [[ARG1:%.*]], i8* [[ARG2:%.*]], i8* [[ARG3:%.*]], i8* [[ARG4:%.*]]) { +; TUNIT-NEXT: call void @unknown() +; TUNIT-NEXT: [[P:%.*]] = call nonnull dereferenceable(101) i32* @unkown_ptr() #[[ATTR9:[0-9]+]] +; TUNIT-NEXT: call void @unknown_use32(i32* nonnull dereferenceable(101) [[P]]) #[[ATTR9]] +; TUNIT-NEXT: call void @unknown_use8(i8* dereferenceable_or_null(42) [[ARG4]]) #[[ATTR9]] +; TUNIT-NEXT: call void @unknown_use8(i8* nonnull [[ARG3]]) #[[ATTR9]] +; TUNIT-NEXT: call void @unknown_use8(i8* dereferenceable_or_null(31) [[ARG2]]) #[[ATTR9]] +; TUNIT-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(2) [[ARG1]]) #[[ATTR9]] +; TUNIT-NEXT: call void @llvm.assume(i1 noundef true) [ "nonnull"(i8* [[ARG3]]), "dereferenceable"(i8* [[ARG1]], i64 1), "dereferenceable"(i8* [[ARG1]], i64 2), "dereferenceable"(i32* [[P]], i64 101), "dereferenceable_or_null"(i8* [[ARG2]], i64 31), "dereferenceable_or_null"(i8* [[ARG4]], i64 42) ] +; TUNIT-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(2) [[ARG1]]) #[[ATTR9]] +; TUNIT-NEXT: call void @unknown_use8(i8* dereferenceable_or_null(31) [[ARG2]]) #[[ATTR9]] +; TUNIT-NEXT: call void @unknown_use8(i8* nonnull [[ARG3]]) #[[ATTR9]] +; TUNIT-NEXT: call void @unknown_use8(i8* dereferenceable_or_null(42) [[ARG4]]) #[[ATTR9]] +; TUNIT-NEXT: call void @unknown_use32(i32* nonnull dereferenceable(101) [[P]]) #[[ATTR9]] +; TUNIT-NEXT: call void @unknown() +; TUNIT-NEXT: ret void +; +; CGSCC-LABEL: define {{[^@]+}}@nonnull_assume_call +; CGSCC-SAME: (i8* [[ARG1:%.*]], i8* [[ARG2:%.*]], i8* [[ARG3:%.*]], i8* [[ARG4:%.*]]) { +; CGSCC-NEXT: call void @unknown() +; CGSCC-NEXT: [[P:%.*]] = call nonnull dereferenceable(101) i32* @unkown_ptr() #[[ATTR8:[0-9]+]] +; CGSCC-NEXT: call void @unknown_use32(i32* nonnull dereferenceable(101) [[P]]) #[[ATTR8]] +; CGSCC-NEXT: call void @unknown_use8(i8* dereferenceable_or_null(42) [[ARG4]]) #[[ATTR8]] +; CGSCC-NEXT: call void @unknown_use8(i8* nonnull [[ARG3]]) #[[ATTR8]] +; CGSCC-NEXT: call void @unknown_use8(i8* dereferenceable_or_null(31) [[ARG2]]) #[[ATTR8]] +; CGSCC-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(2) [[ARG1]]) #[[ATTR8]] +; CGSCC-NEXT: call void @llvm.assume(i1 noundef true) [ "nonnull"(i8* [[ARG3]]), "dereferenceable"(i8* [[ARG1]], i64 1), "dereferenceable"(i8* [[ARG1]], i64 2), "dereferenceable"(i32* [[P]], i64 101), "dereferenceable_or_null"(i8* [[ARG2]], i64 31), "dereferenceable_or_null"(i8* [[ARG4]], i64 42) ] +; CGSCC-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(2) [[ARG1]]) #[[ATTR8]] +; CGSCC-NEXT: call void @unknown_use8(i8* dereferenceable_or_null(31) [[ARG2]]) #[[ATTR8]] +; CGSCC-NEXT: call void @unknown_use8(i8* nonnull [[ARG3]]) #[[ATTR8]] +; CGSCC-NEXT: call void @unknown_use8(i8* dereferenceable_or_null(42) [[ARG4]]) #[[ATTR8]] +; CGSCC-NEXT: call void @unknown_use32(i32* nonnull dereferenceable(101) [[P]]) #[[ATTR8]] +; CGSCC-NEXT: call void @unknown() +; CGSCC-NEXT: ret void ; call void @unknown() %p = call i32* @unkown_ptr() @@ -818,27 +869,26 @@ f: !0 = !{i64 10, i64 100} ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ; TUNIT: attributes #[[ATTR1]] = { nounwind willreturn } -; TUNIT: attributes #[[ATTR2]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR3]] = { argmemonly nofree norecurse nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR4]] = { argmemonly nofree nosync nounwind writeonly } -; TUNIT: attributes #[[ATTR5:[0-9]+]] = { inaccessiblememonly nocallback nofree nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR6]] = { nofree nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR7]] = { nofree nosync nounwind writeonly } +; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; TUNIT: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } +; TUNIT: attributes #[[ATTR4]] = { nofree nosync nounwind memory(argmem: write) } +; TUNIT: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +; TUNIT: attributes #[[ATTR6]] = { nofree nosync nounwind willreturn } +; TUNIT: attributes #[[ATTR7]] = { nofree nosync nounwind } ; TUNIT: attributes #[[ATTR8]] = { willreturn } ; TUNIT: attributes #[[ATTR9]] = { nounwind } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ; CGSCC: attributes #[[ATTR1]] = { nounwind willreturn } -; CGSCC: attributes #[[ATTR2]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR3]] = { argmemonly nofree nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR4]] = { argmemonly nofree nosync nounwind writeonly } -; CGSCC: attributes #[[ATTR5:[0-9]+]] = { inaccessiblememonly nocallback nofree nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR6]] = { nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR7]] = { nofree nosync nounwind writeonly } -; CGSCC: attributes #[[ATTR8]] = { willreturn } -; CGSCC: attributes #[[ATTR9]] = { nounwind } +; CGSCC: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; CGSCC: attributes #[[ATTR3]] = { nofree nosync nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR4]] = { nofree nosync nounwind memory(argmem: write) } +; CGSCC: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +; CGSCC: attributes #[[ATTR6]] = { nofree nosync nounwind } +; CGSCC: attributes #[[ATTR7]] = { willreturn } +; CGSCC: attributes #[[ATTR8]] = { nounwind } ;. ; CHECK: [[META0:![0-9]+]] = !{i64 10, i64 100} ;. diff --git a/llvm/test/Transforms/Attributor/dereferenceable-2-inseltpoison.ll b/llvm/test/Transforms/Attributor/dereferenceable-2-inseltpoison.ll index 2f0b8986ccac5..68066817c54bf 100644 --- a/llvm/test/Transforms/Attributor/dereferenceable-2-inseltpoison.ll +++ b/llvm/test/Transforms/Attributor/dereferenceable-2-inseltpoison.ll @@ -6,7 +6,7 @@ ; https://bugs.llvm.org/show_bug.cgi?id=21780 define <4 x double> @PR21780(double* %ptr) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@PR21780 ; CHECK-SAME: (double* nocapture nofree noundef nonnull readonly align 8 dereferenceable(32) [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[PTR]], i64 1 @@ -44,7 +44,7 @@ define <4 x double> @PR21780(double* %ptr) { define double @PR21780_only_access3_with_inbounds(double* %ptr) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@PR21780_only_access3_with_inbounds ; CHECK-SAME: (double* nocapture nofree nonnull readonly align 8 dereferenceable(32) [[PTR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[PTR]], i64 3 @@ -58,7 +58,7 @@ define double @PR21780_only_access3_with_inbounds(double* %ptr) { } define double @PR21780_only_access3_without_inbounds(double* %ptr) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@PR21780_only_access3_without_inbounds ; CHECK-SAME: (double* nocapture nofree readonly align 8 [[PTR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr double, double* [[PTR]], i64 3 @@ -71,7 +71,7 @@ define double @PR21780_only_access3_without_inbounds(double* %ptr) { } define double @PR21780_without_inbounds(double* %ptr) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@PR21780_without_inbounds ; CHECK-SAME: (double* nocapture nofree noundef nonnull readonly align 8 dereferenceable(32) [[PTR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr double, double* [[PTR]], i64 3 @@ -94,7 +94,7 @@ define double @PR21780_without_inbounds(double* %ptr) { ; Unsimplified, but still valid. Also, throw in some bogus arguments. define void @gep0(i8* %unused, i8* %other, i8* %ptr) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@gep0 ; CHECK-SAME: (i8* nocapture nofree readnone [[UNUSED:%.*]], i8* nocapture nofree noundef nonnull writeonly dereferenceable(1) [[OTHER:%.*]], i8* nocapture nofree nonnull readonly dereferenceable(3) [[PTR:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, i8* [[PTR]], i64 2 @@ -116,7 +116,7 @@ define void @gep0(i8* %unused, i8* %other, i8* %ptr) { ; Multiple arguments may be dereferenceable. define void @ordering(i8* %ptr1, i32* %ptr2) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@ordering ; CHECK-SAME: (i8* nocapture nofree nonnull readnone dereferenceable(3) [[PTR1:%.*]], i32* nocapture nofree nonnull readnone align 4 dereferenceable(8) [[PTR2:%.*]]) #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: ret void @@ -137,7 +137,7 @@ define void @ordering(i8* %ptr1, i32* %ptr2) { ; Not in entry block. define void @not_entry_but_guaranteed_to_execute(i8* %ptr) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@not_entry_but_guaranteed_to_execute ; CHECK-SAME: (i8* nocapture nofree nonnull readnone dereferenceable(3) [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: entry: @@ -160,7 +160,7 @@ exit: ; Not in entry block and not guaranteed to execute. define void @not_entry_not_guaranteed_to_execute(i8* %ptr, i1 %cond) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@not_entry_not_guaranteed_to_execute ; CHECK-SAME: (i8* nocapture nofree readnone [[PTR:%.*]], i1 [[COND:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: entry: @@ -187,7 +187,7 @@ exit: ; The last load may not execute, so derefenceable bytes only covers the 1st two loads. define void @partial_in_entry(i16* %ptr, i1 %cond) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@partial_in_entry ; CHECK-SAME: (i16* nocapture nofree nonnull readnone align 2 dereferenceable(4) [[PTR:%.*]], i1 [[COND:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: entry: @@ -215,7 +215,7 @@ exit: ; The 2nd and 3rd loads may never execute. define void @volatile_is_not_dereferenceable(i16* %ptr) { -; CHECK: Function Attrs: argmemonly nofree norecurse nounwind willreturn +; CHECK: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@volatile_is_not_dereferenceable ; CHECK-SAME: (i16* nofree align 2 [[PTR:%.*]]) #[[ATTR3:[0-9]+]] { ; CHECK-NEXT: [[T0:%.*]] = load volatile i16, i16* [[PTR]], align 2 @@ -233,7 +233,7 @@ define void @volatile_is_not_dereferenceable(i16* %ptr) { ; TODO: We should allow inference for atomic (but not volatile) ops. define void @atomic_is_alright(i16* %ptr) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@atomic_is_alright ; CHECK-SAME: (i16* nocapture nofree nonnull readnone align 2 dereferenceable(6) [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -268,7 +268,7 @@ define void @not_guaranteed_to_transfer_execution(i16* %ptr) { ; We must have consecutive accesses. define void @variable_gep_index(i8* %unused, i8* %ptr, i64 %variable_index) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@variable_gep_index ; CHECK-SAME: (i8* nocapture nofree readnone [[UNUSED:%.*]], i8* nocapture nofree nonnull readnone dereferenceable(1) [[PTR:%.*]], i64 [[VARIABLE_INDEX:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -285,7 +285,7 @@ define void @variable_gep_index(i8* %unused, i8* %ptr, i64 %variable_index) { define void @multi_index_gep(<4 x i8>* %ptr) { ; FIXME: %ptr should be dereferenceable(4) -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@multi_index_gep ; CHECK-SAME: (<4 x i8>* nocapture nofree nonnull readnone dereferenceable(1) [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -298,7 +298,7 @@ define void @multi_index_gep(<4 x i8>* %ptr) { ; Could round weird bitwidths down? define void @not_byte_multiple(i9* %ptr) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@not_byte_multiple ; CHECK-SAME: (i9* nocapture nofree nonnull readnone align 2 dereferenceable(2) [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -311,7 +311,7 @@ define void @not_byte_multiple(i9* %ptr) { ; Missing direct access from the pointer. define void @no_pointer_deref(i16* %ptr) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@no_pointer_deref ; CHECK-SAME: (i16* nocapture nofree readnone align 2 [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -326,7 +326,7 @@ define void @no_pointer_deref(i16* %ptr) { ; Out-of-order is ok, but missing access concludes dereferenceable range. define void @non_consecutive(i32* %ptr) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@non_consecutive ; CHECK-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(8) [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -343,7 +343,7 @@ define void @non_consecutive(i32* %ptr) { ; Improve on existing dereferenceable attribute. define void @more_bytes(i32* dereferenceable(8) %ptr) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@more_bytes ; CHECK-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(16) [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -362,7 +362,7 @@ define void @more_bytes(i32* dereferenceable(8) %ptr) { ; Improve on existing dereferenceable_or_null attribute. define void @more_bytes_and_not_null(i32* dereferenceable_or_null(8) %ptr) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@more_bytes_and_not_null ; CHECK-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(16) [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -381,7 +381,7 @@ define void @more_bytes_and_not_null(i32* dereferenceable_or_null(8) %ptr) { ; But don't pessimize existing dereferenceable attribute. define void @better_bytes(i32* dereferenceable(100) %ptr) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@better_bytes ; CHECK-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(100) [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -398,7 +398,7 @@ define void @better_bytes(i32* dereferenceable(100) %ptr) { } define void @bitcast(i32* %arg) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@bitcast ; CHECK-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(8) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -412,7 +412,7 @@ define void @bitcast(i32* %arg) { } define void @bitcast_different_sizes(double* %arg1, i8* %arg2) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@bitcast_different_sizes ; CHECK-SAME: (double* nocapture nofree nonnull readnone align 4 dereferenceable(12) [[ARG1:%.*]], i8* nocapture nofree nonnull readnone align 4 dereferenceable(16) [[ARG2:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -434,7 +434,7 @@ define void @bitcast_different_sizes(double* %arg1, i8* %arg2) { } define void @negative_offset(i32* %arg) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@negative_offset ; CHECK-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -448,7 +448,7 @@ define void @negative_offset(i32* %arg) { } define void @stores(i32* %arg) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@stores ; CHECK-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(8) [[ARG:%.*]]) #[[ATTR4:[0-9]+]] { ; CHECK-NEXT: [[PTR:%.*]] = bitcast i32* [[ARG]] to float* @@ -467,7 +467,7 @@ define void @stores(i32* %arg) { } define void @load_store(i32* %arg) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@load_store ; CHECK-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(8) [[ARG:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: [[PTR:%.*]] = bitcast i32* [[ARG]] to float* @@ -484,7 +484,7 @@ define void @load_store(i32* %arg) { } define void @different_size1(i32* %arg) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@different_size1 ; CHECK-SAME: (i32* nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[ARG:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: [[ARG_CAST:%.*]] = bitcast i32* [[ARG]] to double* @@ -499,7 +499,7 @@ define void @different_size1(i32* %arg) { } define void @different_size2(i32* %arg) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@different_size2 ; CHECK-SAME: (i32* nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[ARG:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: store i32 0, i32* [[ARG]], align 8 @@ -532,7 +532,7 @@ define void @different_size2(i32* %arg) { ; ; ATTRIBUTOR_CGSCC_NPM-LABEL: define i32 @require_cfg_analysis(i32 %c, i32* {{.*}} dereferenceable(4) %p) define i32 @require_cfg_analysis(i32 %c, i32* %p) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@require_cfg_analysis ; CHECK-SAME: (i32 [[C:%.*]], i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: [[TOBOOL1:%.*]] = icmp eq i32 [[C]], 0 @@ -584,9 +584,9 @@ end: ret i32 1 } ;. -; CHECK: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; CHECK: attributes #[[ATTR1]] = { argmemonly nofree norecurse nosync nounwind willreturn } -; CHECK: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind readnone willreturn } -; CHECK: attributes #[[ATTR3]] = { argmemonly nofree norecurse nounwind willreturn } -; CHECK: attributes #[[ATTR4]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } +; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; CHECK: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } +; CHECK: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR3]] = { nofree norecurse nounwind willreturn memory(argmem: readwrite) } +; CHECK: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } ;. diff --git a/llvm/test/Transforms/Attributor/dereferenceable-2.ll b/llvm/test/Transforms/Attributor/dereferenceable-2.ll index 76ed8c9f7a6e2..9c8bcfc0aed56 100644 --- a/llvm/test/Transforms/Attributor/dereferenceable-2.ll +++ b/llvm/test/Transforms/Attributor/dereferenceable-2.ll @@ -6,7 +6,7 @@ ; https://bugs.llvm.org/show_bug.cgi?id=21780 define <4 x double> @PR21780(double* %ptr) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@PR21780 ; CHECK-SAME: (double* nocapture nofree noundef nonnull readonly align 8 dereferenceable(32) [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[PTR]], i64 1 @@ -44,7 +44,7 @@ define <4 x double> @PR21780(double* %ptr) { define double @PR21780_only_access3_with_inbounds(double* %ptr) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@PR21780_only_access3_with_inbounds ; CHECK-SAME: (double* nocapture nofree nonnull readonly align 8 dereferenceable(32) [[PTR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[PTR]], i64 3 @@ -58,7 +58,7 @@ define double @PR21780_only_access3_with_inbounds(double* %ptr) { } define double @PR21780_only_access3_without_inbounds(double* %ptr) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@PR21780_only_access3_without_inbounds ; CHECK-SAME: (double* nocapture nofree readonly align 8 [[PTR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr double, double* [[PTR]], i64 3 @@ -71,7 +71,7 @@ define double @PR21780_only_access3_without_inbounds(double* %ptr) { } define double @PR21780_without_inbounds(double* %ptr) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@PR21780_without_inbounds ; CHECK-SAME: (double* nocapture nofree noundef nonnull readonly align 8 dereferenceable(32) [[PTR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr double, double* [[PTR]], i64 3 @@ -94,7 +94,7 @@ define double @PR21780_without_inbounds(double* %ptr) { ; Unsimplified, but still valid. Also, throw in some bogus arguments. define void @gep0(i8* %unused, i8* %other, i8* %ptr) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@gep0 ; CHECK-SAME: (i8* nocapture nofree readnone [[UNUSED:%.*]], i8* nocapture nofree noundef nonnull writeonly dereferenceable(1) [[OTHER:%.*]], i8* nocapture nofree nonnull readonly dereferenceable(3) [[PTR:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, i8* [[PTR]], i64 2 @@ -116,7 +116,7 @@ define void @gep0(i8* %unused, i8* %other, i8* %ptr) { ; Multiple arguments may be dereferenceable. define void @ordering(i8* %ptr1, i32* %ptr2) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@ordering ; CHECK-SAME: (i8* nocapture nofree nonnull readnone dereferenceable(3) [[PTR1:%.*]], i32* nocapture nofree nonnull readnone align 4 dereferenceable(8) [[PTR2:%.*]]) #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: ret void @@ -137,7 +137,7 @@ define void @ordering(i8* %ptr1, i32* %ptr2) { ; Not in entry block. define void @not_entry_but_guaranteed_to_execute(i8* %ptr) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@not_entry_but_guaranteed_to_execute ; CHECK-SAME: (i8* nocapture nofree nonnull readnone dereferenceable(3) [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: entry: @@ -160,7 +160,7 @@ exit: ; Not in entry block and not guaranteed to execute. define void @not_entry_not_guaranteed_to_execute(i8* %ptr, i1 %cond) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@not_entry_not_guaranteed_to_execute ; CHECK-SAME: (i8* nocapture nofree readnone [[PTR:%.*]], i1 [[COND:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: entry: @@ -187,7 +187,7 @@ exit: ; The last load may not execute, so derefenceable bytes only covers the 1st two loads. define void @partial_in_entry(i16* %ptr, i1 %cond) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@partial_in_entry ; CHECK-SAME: (i16* nocapture nofree nonnull readnone align 2 dereferenceable(4) [[PTR:%.*]], i1 [[COND:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: entry: @@ -215,7 +215,7 @@ exit: ; The 2nd and 3rd loads may never execute. define void @volatile_is_not_dereferenceable(i16* %ptr) { -; CHECK: Function Attrs: argmemonly nofree norecurse nounwind willreturn +; CHECK: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@volatile_is_not_dereferenceable ; CHECK-SAME: (i16* nofree align 2 [[PTR:%.*]]) #[[ATTR3:[0-9]+]] { ; CHECK-NEXT: [[T0:%.*]] = load volatile i16, i16* [[PTR]], align 2 @@ -233,7 +233,7 @@ define void @volatile_is_not_dereferenceable(i16* %ptr) { ; TODO: We should allow inference for atomic (but not volatile) ops. define void @atomic_is_alright(i16* %ptr) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@atomic_is_alright ; CHECK-SAME: (i16* nocapture nofree nonnull readnone align 2 dereferenceable(6) [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -268,7 +268,7 @@ define void @not_guaranteed_to_transfer_execution(i16* %ptr) { ; We must have consecutive accesses. define void @variable_gep_index(i8* %unused, i8* %ptr, i64 %variable_index) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@variable_gep_index ; CHECK-SAME: (i8* nocapture nofree readnone [[UNUSED:%.*]], i8* nocapture nofree nonnull readnone dereferenceable(1) [[PTR:%.*]], i64 [[VARIABLE_INDEX:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -285,7 +285,7 @@ define void @variable_gep_index(i8* %unused, i8* %ptr, i64 %variable_index) { define void @multi_index_gep(<4 x i8>* %ptr) { ; FIXME: %ptr should be dereferenceable(4) -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@multi_index_gep ; CHECK-SAME: (<4 x i8>* nocapture nofree nonnull readnone dereferenceable(1) [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -298,7 +298,7 @@ define void @multi_index_gep(<4 x i8>* %ptr) { ; Could round weird bitwidths down? define void @not_byte_multiple(i9* %ptr) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@not_byte_multiple ; CHECK-SAME: (i9* nocapture nofree nonnull readnone align 2 dereferenceable(2) [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -311,7 +311,7 @@ define void @not_byte_multiple(i9* %ptr) { ; Missing direct access from the pointer. define void @no_pointer_deref(i16* %ptr) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@no_pointer_deref ; CHECK-SAME: (i16* nocapture nofree readnone align 2 [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -326,7 +326,7 @@ define void @no_pointer_deref(i16* %ptr) { ; Out-of-order is ok, but missing access concludes dereferenceable range. define void @non_consecutive(i32* %ptr) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@non_consecutive ; CHECK-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(8) [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -343,7 +343,7 @@ define void @non_consecutive(i32* %ptr) { ; Improve on existing dereferenceable attribute. define void @more_bytes(i32* dereferenceable(8) %ptr) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@more_bytes ; CHECK-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(16) [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -362,7 +362,7 @@ define void @more_bytes(i32* dereferenceable(8) %ptr) { ; Improve on existing dereferenceable_or_null attribute. define void @more_bytes_and_not_null(i32* dereferenceable_or_null(8) %ptr) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@more_bytes_and_not_null ; CHECK-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(16) [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -381,7 +381,7 @@ define void @more_bytes_and_not_null(i32* dereferenceable_or_null(8) %ptr) { ; But don't pessimize existing dereferenceable attribute. define void @better_bytes(i32* dereferenceable(100) %ptr) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@better_bytes ; CHECK-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(100) [[PTR:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -398,7 +398,7 @@ define void @better_bytes(i32* dereferenceable(100) %ptr) { } define void @bitcast(i32* %arg) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@bitcast ; CHECK-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(8) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -412,7 +412,7 @@ define void @bitcast(i32* %arg) { } define void @bitcast_different_sizes(double* %arg1, i8* %arg2) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@bitcast_different_sizes ; CHECK-SAME: (double* nocapture nofree nonnull readnone align 4 dereferenceable(12) [[ARG1:%.*]], i8* nocapture nofree nonnull readnone align 4 dereferenceable(16) [[ARG2:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -434,7 +434,7 @@ define void @bitcast_different_sizes(double* %arg1, i8* %arg2) { } define void @negative_offset(i32* %arg) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@negative_offset ; CHECK-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: ret void @@ -448,7 +448,7 @@ define void @negative_offset(i32* %arg) { } define void @stores(i32* %arg) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@stores ; CHECK-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(8) [[ARG:%.*]]) #[[ATTR4:[0-9]+]] { ; CHECK-NEXT: [[PTR:%.*]] = bitcast i32* [[ARG]] to float* @@ -467,7 +467,7 @@ define void @stores(i32* %arg) { } define void @load_store(i32* %arg) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@load_store ; CHECK-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(8) [[ARG:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: [[PTR:%.*]] = bitcast i32* [[ARG]] to float* @@ -484,7 +484,7 @@ define void @load_store(i32* %arg) { } define void @different_size1(i32* %arg) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@different_size1 ; CHECK-SAME: (i32* nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[ARG:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: [[ARG_CAST:%.*]] = bitcast i32* [[ARG]] to double* @@ -499,7 +499,7 @@ define void @different_size1(i32* %arg) { } define void @different_size2(i32* %arg) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@different_size2 ; CHECK-SAME: (i32* nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[ARG:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: store i32 0, i32* [[ARG]], align 8 @@ -532,7 +532,7 @@ define void @different_size2(i32* %arg) { ; ; ATTRIBUTOR_CGSCC_NPM-LABEL: define i32 @require_cfg_analysis(i32 %c, i32* {{.*}} dereferenceable(4) %p) define i32 @require_cfg_analysis(i32 %c, i32* %p) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@require_cfg_analysis ; CHECK-SAME: (i32 [[C:%.*]], i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: [[TOBOOL1:%.*]] = icmp eq i32 [[C]], 0 @@ -584,9 +584,9 @@ end: ret i32 1 } ;. -; CHECK: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; CHECK: attributes #[[ATTR1]] = { argmemonly nofree norecurse nosync nounwind willreturn } -; CHECK: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind readnone willreturn } -; CHECK: attributes #[[ATTR3]] = { argmemonly nofree norecurse nounwind willreturn } -; CHECK: attributes #[[ATTR4]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } +; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; CHECK: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } +; CHECK: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR3]] = { nofree norecurse nounwind willreturn memory(argmem: readwrite) } +; CHECK: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } ;. diff --git a/llvm/test/Transforms/Attributor/heap_to_stack.ll b/llvm/test/Transforms/Attributor/heap_to_stack.ll index 270797ee9d574..d1240f1449944 100644 --- a/llvm/test/Transforms/Attributor/heap_to_stack.ll +++ b/llvm/test/Transforms/Attributor/heap_to_stack.ll @@ -656,9 +656,9 @@ define void @test16d(i8 %v, i8** %P) { ; CHECK: attributes #[[ATTR3:[0-9]+]] = { nofree nounwind } ; CHECK: attributes #[[ATTR4]] = { noreturn } ; CHECK: attributes #[[ATTR5:[0-9]+]] = { allockind("free") } -; CHECK: attributes #[[ATTR6:[0-9]+]] = { argmemonly nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } ; CHECK: attributes #[[ATTR7:[0-9]+]] = { allockind("alloc,uninitialized,aligned") allocsize(1) } ; CHECK: attributes #[[ATTR8:[0-9]+]] = { allockind("alloc,zeroed") allocsize(0,1) } -; CHECK: attributes #[[ATTR9:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn writeonly } +; CHECK: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } ; CHECK: attributes #[[ATTR10]] = { nounwind } ;. diff --git a/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll b/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll index 04295912d589a..7f2e16d878bbd 100644 --- a/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll +++ b/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll @@ -662,7 +662,7 @@ not_entry: ; CHECK: attributes #[[ATTR1:[0-9]+]] = { nofree nosync willreturn } ; CHECK: attributes #[[ATTR2:[0-9]+]] = { nofree nounwind } ; CHECK: attributes #[[ATTR3]] = { noreturn } -; CHECK: attributes #[[ATTR4:[0-9]+]] = { argmemonly nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } ; CHECK: attributes #[[ATTR5]] = { nounwind } ; CHECK: attributes #[[ATTR6]] = { nosync nounwind willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/internal-noalias.ll b/llvm/test/Transforms/Attributor/internal-noalias.ll index f55c00ed9258f..b7cc29822560f 100644 --- a/llvm/test/Transforms/Attributor/internal-noalias.ll +++ b/llvm/test/Transforms/Attributor/internal-noalias.ll @@ -3,7 +3,7 @@ ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC define dso_local i32 @visible(i32* noalias %A, i32* noalias %B) #0 { -; TUNIT: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind readonly willreturn uwtable +; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: read) uwtable ; TUNIT-LABEL: define {{[^@]+}}@visible ; TUNIT-SAME: (i32* noalias nocapture nofree readonly [[A:%.*]], i32* noalias nocapture nofree readonly align 4 [[B:%.*]]) #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -12,12 +12,12 @@ define dso_local i32 @visible(i32* noalias %A, i32* noalias %B) #0 { ; TUNIT-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL1]], [[CALL2]] ; TUNIT-NEXT: ret i32 [[ADD]] ; -; CGSCC: Function Attrs: argmemonly nofree noinline nosync nounwind readonly willreturn uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind willreturn memory(argmem: read) uwtable ; CGSCC-LABEL: define {{[^@]+}}@visible ; CGSCC-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[CALL1:%.*]] = call i32 @noalias_args(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) #[[ATTR5:[0-9]+]] -; CGSCC-NEXT: [[CALL2:%.*]] = call i32 @noalias_args_argmem(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) #[[ATTR5]] +; CGSCC-NEXT: [[CALL1:%.*]] = call i32 @noalias_args(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) +; CGSCC-NEXT: [[CALL2:%.*]] = call i32 @noalias_args_argmem(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) ; CGSCC-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL1]], [[CALL2]] ; CGSCC-NEXT: ret i32 [[ADD]] ; @@ -29,7 +29,7 @@ entry: } define private i32 @noalias_args(i32* %A, i32* %B) #0 { -; TUNIT: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind readonly willreturn uwtable +; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: read) uwtable ; TUNIT-LABEL: define {{[^@]+}}@noalias_args ; TUNIT-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: entry: @@ -40,14 +40,14 @@ define private i32 @noalias_args(i32* %A, i32* %B) #0 { ; TUNIT-NEXT: [[ADD2:%.*]] = add nsw i32 [[ADD]], [[CALL]] ; TUNIT-NEXT: ret i32 [[ADD2]] ; -; CGSCC: Function Attrs: argmemonly nofree noinline nosync nounwind readonly willreturn uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind willreturn memory(argmem: read) uwtable ; CGSCC-LABEL: define {{[^@]+}}@noalias_args ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 ; CGSCC-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4 ; CGSCC-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] -; CGSCC-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) #[[ATTR5]] +; CGSCC-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) #[[ATTR5:[0-9]+]] ; CGSCC-NEXT: [[ADD2:%.*]] = add nsw i32 [[ADD]], [[CALL]] ; CGSCC-NEXT: ret i32 [[ADD2]] ; @@ -62,7 +62,7 @@ entry: define internal i32 @noalias_args_argmem(i32* %A, i32* %B) #1 { -; TUNIT: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind readonly willreturn uwtable +; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: read) uwtable ; TUNIT-LABEL: define {{[^@]+}}@noalias_args_argmem ; TUNIT-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: entry: @@ -71,7 +71,7 @@ define internal i32 @noalias_args_argmem(i32* %A, i32* %B) #1 { ; TUNIT-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] ; TUNIT-NEXT: ret i32 [[ADD]] ; -; CGSCC: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind readonly willreturn uwtable +; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: read) uwtable ; CGSCC-LABEL: define {{[^@]+}}@noalias_args_argmem ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -88,7 +88,7 @@ entry: } define dso_local i32 @visible_local(i32* %A) #0 { -; TUNIT: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind willreturn uwtable +; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; TUNIT-LABEL: define {{[^@]+}}@visible_local ; TUNIT-SAME: (i32* nocapture nofree readonly [[A:%.*]]) #[[ATTR1:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -99,13 +99,13 @@ define dso_local i32 @visible_local(i32* %A) #0 { ; TUNIT-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL1]], [[CALL2]] ; TUNIT-NEXT: ret i32 [[ADD]] ; -; CGSCC: Function Attrs: argmemonly nofree noinline nosync nounwind willreturn uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CGSCC-LABEL: define {{[^@]+}}@visible_local ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2:[0-9]+]] { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: [[B:%.*]] = alloca i32, align 4 ; CGSCC-NEXT: store i32 5, i32* [[B]], align 4 -; CGSCC-NEXT: [[CALL1:%.*]] = call i32 @noalias_args(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) #[[ATTR5]] +; CGSCC-NEXT: [[CALL1:%.*]] = call i32 @noalias_args(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) ; CGSCC-NEXT: [[CALL2:%.*]] = call i32 @noalias_args_argmem(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) ; CGSCC-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL1]], [[CALL2]] ; CGSCC-NEXT: ret i32 [[ADD]] @@ -120,7 +120,7 @@ entry: } define internal i32 @noalias_args_argmem_ro(i32* %A, i32* %B) #1 { -; CGSCC: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind readonly willreturn uwtable +; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: read) uwtable ; CGSCC-LABEL: define {{[^@]+}}@noalias_args_argmem_ro ; CGSCC-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: [[B_PRIV:%.*]] = alloca i32, align 4 @@ -139,13 +139,13 @@ define internal i32 @noalias_args_argmem_ro(i32* %A, i32* %B) #1 { } define i32 @visible_local_2() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@visible_local_2 ; TUNIT-SAME: () #[[ATTR2:[0-9]+]] { ; TUNIT-NEXT: [[B:%.*]] = alloca i32, align 4 ; TUNIT-NEXT: ret i32 10 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@visible_local_2 ; CGSCC-SAME: () #[[ATTR3:[0-9]+]] { ; CGSCC-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem_ro(i32 noundef 5, i32 noundef 5) #[[ATTR6:[0-9]+]] @@ -158,13 +158,13 @@ define i32 @visible_local_2() { } define internal i32 @noalias_args_argmem_rn(i32* %A, i32* %B) #1 { -; TUNIT: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind willreturn uwtable +; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; TUNIT-LABEL: define {{[^@]+}}@noalias_args_argmem_rn ; TUNIT-SAME: (i32* noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: [[T0:%.*]] = load i32, i32* [[B]], align 4 ; TUNIT-NEXT: ret i32 [[T0]] ; -; CGSCC: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind willreturn uwtable +; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CGSCC-LABEL: define {{[^@]+}}@noalias_args_argmem_rn ; CGSCC-SAME: (i32* noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR4:[0-9]+]] { ; CGSCC-NEXT: [[T0:%.*]] = load i32, i32* [[B]], align 4 @@ -177,7 +177,7 @@ define internal i32 @noalias_args_argmem_rn(i32* %A, i32* %B) #1 { } define i32 @visible_local_3() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@visible_local_3 ; TUNIT-SAME: () #[[ATTR2]] { ; TUNIT-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -185,7 +185,7 @@ define i32 @visible_local_3() { ; TUNIT-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem_rn(i32* noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[B]]) #[[ATTR4:[0-9]+]] ; TUNIT-NEXT: ret i32 [[CALL]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@visible_local_3 ; CGSCC-SAME: () #[[ATTR3]] { ; CGSCC-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -202,18 +202,18 @@ define i32 @visible_local_3() { attributes #0 = { noinline nounwind uwtable willreturn } attributes #1 = { argmemonly noinline nounwind uwtable willreturn} ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly nofree noinline norecurse nosync nounwind readonly willreturn uwtable } -; TUNIT: attributes #[[ATTR1]] = { argmemonly nofree noinline norecurse nosync nounwind willreturn uwtable } -; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR3]] = { nofree nosync nounwind readonly } +; TUNIT: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind willreturn memory(argmem: read) uwtable } +; TUNIT: attributes #[[ATTR1]] = { nofree noinline norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable } +; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR3]] = { nofree nosync nounwind } ; TUNIT: attributes #[[ATTR4]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree noinline nosync nounwind readonly willreturn uwtable } -; CGSCC: attributes #[[ATTR1]] = { argmemonly nofree noinline norecurse nosync nounwind readonly willreturn uwtable } -; CGSCC: attributes #[[ATTR2]] = { argmemonly nofree noinline nosync nounwind willreturn uwtable } -; CGSCC: attributes #[[ATTR3]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR4]] = { argmemonly nofree noinline norecurse nosync nounwind willreturn uwtable } -; CGSCC: attributes #[[ATTR5]] = { readonly } -; CGSCC: attributes #[[ATTR6]] = { readonly willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree noinline nosync nounwind willreturn memory(argmem: read) uwtable } +; CGSCC: attributes #[[ATTR1]] = { nofree noinline norecurse nosync nounwind willreturn memory(argmem: read) uwtable } +; CGSCC: attributes #[[ATTR2]] = { nofree noinline nosync nounwind willreturn memory(argmem: readwrite) uwtable } +; CGSCC: attributes #[[ATTR3]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR4]] = { nofree noinline norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable } +; CGSCC: attributes #[[ATTR5]] = { memory(read) } +; CGSCC: attributes #[[ATTR6]] = { willreturn } ; CGSCC: attributes #[[ATTR7]] = { nounwind willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/internalize.ll b/llvm/test/Transforms/Attributor/internalize.ll index 5574e1ef07cc5..80c2ac7c5eca1 100644 --- a/llvm/test/Transforms/Attributor/internalize.ll +++ b/llvm/test/Transforms/Attributor/internalize.ll @@ -135,7 +135,7 @@ define void @unused_arg_caller() { ; CHECK_DISABLED-NEXT: call void @unused_arg(i8 noundef 0) ; CHECK_DISABLED-NEXT: ret void ; -; CHECK_ENABLED: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK_ENABLED: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK_ENABLED-LABEL: define {{[^@]+}}@unused_arg_caller ; CHECK_ENABLED-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK_ENABLED-NEXT: unreachable @@ -164,7 +164,7 @@ define linkonce_odr hidden void @__clang_call_terminate() { ;. ; CHECK_DISABLED: attributes #[[ATTR0]] = { norecurse } ;. -; CHECK_ENABLED: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; CHECK_ENABLED: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ; CHECK_ENABLED: attributes #[[ATTR1]] = { norecurse } -; CHECK_ENABLED: attributes #[[ATTR2:[0-9]+]] = { nounwind readnone } +; CHECK_ENABLED: attributes #[[ATTR2:[0-9]+]] = { nounwind } ;. diff --git a/llvm/test/Transforms/Attributor/liveness.ll b/llvm/test/Transforms/Attributor/liveness.ll index cad70b04e7943..87b8519c8551f 100644 --- a/llvm/test/Transforms/Attributor/liveness.ll +++ b/llvm/test/Transforms/Attributor/liveness.ll @@ -37,7 +37,7 @@ declare i32 @bar() nosync readnone ; CGSCC: @[[P:[a-zA-Z0-9_$"\\.-]+]] = global i8 0 ;. define internal i32 @dead_internal_func(i32 %0) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@dead_internal_func ; CGSCC-SAME: () #[[ATTR6:[0-9]+]] { ; CGSCC-NEXT: br label [[TMP2:%.*]] @@ -68,13 +68,13 @@ define internal i32 @dead_internal_func(i32 %0) { } define i32 @volatile_load(i32*) norecurse nounwind uwtable { -; TUNIT: Function Attrs: argmemonly nofree norecurse nounwind willreturn uwtable +; TUNIT: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) uwtable ; TUNIT-LABEL: define {{[^@]+}}@volatile_load ; TUNIT-SAME: (i32* nofree noundef align 4 [[TMP0:%.*]]) #[[ATTR6:[0-9]+]] { ; TUNIT-NEXT: [[TMP2:%.*]] = load volatile i32, i32* [[TMP0]], align 4 ; TUNIT-NEXT: ret i32 [[TMP2]] ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nounwind willreturn uwtable +; CGSCC: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) uwtable ; CGSCC-LABEL: define {{[^@]+}}@volatile_load ; CGSCC-SAME: (i32* nofree noundef align 4 [[TMP0:%.*]]) #[[ATTR7:[0-9]+]] { ; CGSCC-NEXT: [[TMP2:%.*]] = load volatile i32, i32* [[TMP0]], align 4 @@ -85,7 +85,7 @@ define i32 @volatile_load(i32*) norecurse nounwind uwtable { } define internal i32 @internal_load(i32*) norecurse nounwind uwtable { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn uwtable +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@internal_load ; CGSCC-SAME: () #[[ATTR8:[0-9]+]] { ; CGSCC-NEXT: ret i32 undef @@ -498,7 +498,7 @@ cleanup: ; FIXME: Should be able to detect undefined behavior. define void @ub(i32* %0) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@ub ; TUNIT-SAME: (i32* nocapture nofree writeonly [[TMP0:%.*]]) #[[ATTR7:[0-9]+]] { ; TUNIT-NEXT: [[POISON:%.*]] = sub nuw i32 0, 1 @@ -507,7 +507,7 @@ define void @ub(i32* %0) { ; TUNIT-NEXT: store i32 0, i32* [[POISON_YET_AGAIN]], align 4 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@ub ; CGSCC-SAME: (i32* nocapture nofree writeonly [[TMP0:%.*]]) #[[ATTR9:[0-9]+]] { ; CGSCC-NEXT: [[POISON:%.*]] = sub nuw i32 0, 1 @@ -524,7 +524,7 @@ define void @ub(i32* %0) { } define void @inf_loop() #0 { -; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind readnone +; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind memory(none) ; TUNIT-LABEL: define {{[^@]+}}@inf_loop ; TUNIT-SAME: () #[[ATTR8:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -532,7 +532,7 @@ define void @inf_loop() #0 { ; TUNIT: while.body: ; TUNIT-NEXT: br label [[WHILE_BODY]] ; -; CGSCC: Function Attrs: nofree norecurse noreturn nosync nounwind readnone +; CGSCC: Function Attrs: nofree norecurse noreturn nosync nounwind memory(none) ; CGSCC-LABEL: define {{[^@]+}}@inf_loop ; CGSCC-SAME: () #[[ATTR10:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -551,7 +551,7 @@ while.body: ; preds = %entry, %while.body ; FIXME: Detect infloops, and mark affected blocks dead. define i32 @test5(i32, i32) #0 { -; CHECK: Function Attrs: nosync readnone +; CHECK: Function Attrs: nosync memory(none) ; CHECK-LABEL: define {{[^@]+}}@test5 ; CHECK-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR5:[0-9]+]] { ; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP0]], [[TMP1]] @@ -588,13 +588,13 @@ cond.end: ; preds = %cond.if, %con } define void @rec() #0 { -; TUNIT: Function Attrs: nofree nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@rec ; TUNIT-SAME: () #[[ATTR9:[0-9]+]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@rec ; CGSCC-SAME: () #[[ATTR11:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -2228,7 +2228,7 @@ define i32 @switch_default_caller() { } define internal i32 @switch_default_dead(i64 %i) nounwind { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@switch_default_dead ; CGSCC-SAME: () #[[ATTR6]] { ; CGSCC-NEXT: entry: @@ -2255,12 +2255,12 @@ return: } define i32 @switch_default_dead_caller() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@switch_default_dead_caller ; TUNIT-SAME: () #[[ATTR11:[0-9]+]] { ; TUNIT-NEXT: ret i32 123 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@switch_default_dead_caller ; CGSCC-SAME: () #[[ATTR11]] { ; CGSCC-NEXT: [[CALL2:%.*]] = tail call noundef i32 @switch_default_dead() #[[ATTR16:[0-9]+]] @@ -2377,7 +2377,7 @@ declare void @use_i32p(i32*) ; Allow blockaddress users define internal void @dead_with_blockaddress_users(i32* nocapture %pc) nounwind readonly { -; CGSCC: Function Attrs: nounwind readonly +; CGSCC: Function Attrs: nounwind memory(read) ; CGSCC-LABEL: define {{[^@]+}}@dead_with_blockaddress_users ; CGSCC-SAME: (i32* nocapture [[PC:%.*]]) #[[ATTR13:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -2422,59 +2422,33 @@ indirectgoto: ; preds = %lab0, %entry @e = global %struct.a* null define i32 @main() { -; TUNIT-LABEL: define {{[^@]+}}@main() { -; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[F:%.*]] = alloca i32, align 4 -; TUNIT-NEXT: br label [[FOR_COND_0:%.*]] -; TUNIT: for.cond.0: -; TUNIT-NEXT: [[G_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_0:%.*]] ] -; TUNIT-NEXT: [[CMP_0:%.*]] = icmp ult i32 [[G_0]], 100 -; TUNIT-NEXT: br i1 [[CMP_0]], label [[FOR_BODY_0]], label [[FOR_END_0:%.*]] -; TUNIT: for.body.0: -; TUNIT-NEXT: [[INC]] = add nuw nsw i32 [[G_0]], 1 -; TUNIT-NEXT: br label [[FOR_COND_0]] -; TUNIT: for.end.0: -; TUNIT-NEXT: [[CALL:%.*]] = call i8* @malloc(i64 noundef 8) -; TUNIT-NEXT: store i8* [[CALL]], i8** bitcast (%struct.a** @e to i8**), align 8 -; TUNIT-NEXT: [[B:%.*]] = bitcast i8* [[CALL]] to %struct.a** -; TUNIT-NEXT: store %struct.a* null, %struct.a** [[B]], align 8 -; TUNIT-NEXT: br label [[FOR_COND_1:%.*]] -; TUNIT: for.cond.1: -; TUNIT-NEXT: [[G_1:%.*]] = phi i32 [ 0, [[FOR_END_0]] ], [ [[INC6:%.*]], [[FOR_BODY_1:%.*]] ] -; TUNIT-NEXT: [[CMP_1:%.*]] = icmp ult i32 [[G_1]], 100 -; TUNIT-NEXT: br i1 [[CMP_1]], label [[FOR_BODY_1]], label [[FOR_END_1:%.*]] -; TUNIT: for.body.1: -; TUNIT-NEXT: [[CALL4:%.*]] = call i32 (i32*, ...) bitcast (i32 (i32)* @h to i32 (i32*, ...)*)(i32* nonnull [[F]]) -; TUNIT-NEXT: [[INC6]] = add nuw nsw i32 [[G_1]], 1 -; TUNIT-NEXT: br label [[FOR_COND_1]] -; TUNIT: for.end.1: -; TUNIT-NEXT: ret i32 0 -; -; CGSCC-LABEL: define {{[^@]+}}@main() { -; CGSCC-NEXT: entry: -; CGSCC-NEXT: br label [[FOR_COND_0:%.*]] -; CGSCC: for.cond.0: -; CGSCC-NEXT: [[G_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_0:%.*]] ] -; CGSCC-NEXT: [[CMP_0:%.*]] = icmp ult i32 [[G_0]], 100 -; CGSCC-NEXT: br i1 [[CMP_0]], label [[FOR_BODY_0]], label [[FOR_END_0:%.*]] -; CGSCC: for.body.0: -; CGSCC-NEXT: [[INC]] = add nuw nsw i32 [[G_0]], 1 -; CGSCC-NEXT: br label [[FOR_COND_0]] -; CGSCC: for.end.0: -; CGSCC-NEXT: [[CALL:%.*]] = call i8* @malloc(i64 noundef 8) -; CGSCC-NEXT: store i8* [[CALL]], i8** bitcast (%struct.a** @e to i8**), align 8 -; CGSCC-NEXT: [[B:%.*]] = bitcast i8* [[CALL]] to %struct.a** -; CGSCC-NEXT: store %struct.a* null, %struct.a** [[B]], align 8 -; CGSCC-NEXT: br label [[FOR_COND_1:%.*]] -; CGSCC: for.cond.1: -; CGSCC-NEXT: [[G_1:%.*]] = phi i32 [ 0, [[FOR_END_0]] ], [ [[INC6:%.*]], [[FOR_BODY_1:%.*]] ] -; CGSCC-NEXT: [[CMP_1:%.*]] = icmp ult i32 [[G_1]], 100 -; CGSCC-NEXT: br i1 [[CMP_1]], label [[FOR_BODY_1]], label [[FOR_END_1:%.*]] -; CGSCC: for.body.1: -; CGSCC-NEXT: [[INC6]] = add nuw nsw i32 [[G_1]], 1 -; CGSCC-NEXT: br label [[FOR_COND_1]] -; CGSCC: for.end.1: -; CGSCC-NEXT: ret i32 0 +; CHECK-LABEL: define {{[^@]+}}@main() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[F:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label [[FOR_COND_0:%.*]] +; CHECK: for.cond.0: +; CHECK-NEXT: [[G_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_0:%.*]] ] +; CHECK-NEXT: [[CMP_0:%.*]] = icmp ult i32 [[G_0]], 100 +; CHECK-NEXT: br i1 [[CMP_0]], label [[FOR_BODY_0]], label [[FOR_END_0:%.*]] +; CHECK: for.body.0: +; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[G_0]], 1 +; CHECK-NEXT: br label [[FOR_COND_0]] +; CHECK: for.end.0: +; CHECK-NEXT: [[CALL:%.*]] = call i8* @malloc(i64 noundef 8) +; CHECK-NEXT: store i8* [[CALL]], i8** bitcast (%struct.a** @e to i8**), align 8 +; CHECK-NEXT: [[B:%.*]] = bitcast i8* [[CALL]] to %struct.a** +; CHECK-NEXT: store %struct.a* null, %struct.a** [[B]], align 8 +; CHECK-NEXT: br label [[FOR_COND_1:%.*]] +; CHECK: for.cond.1: +; CHECK-NEXT: [[G_1:%.*]] = phi i32 [ 0, [[FOR_END_0]] ], [ [[INC6:%.*]], [[FOR_BODY_1:%.*]] ] +; CHECK-NEXT: [[CMP_1:%.*]] = icmp ult i32 [[G_1]], 100 +; CHECK-NEXT: br i1 [[CMP_1]], label [[FOR_BODY_1]], label [[FOR_END_1:%.*]] +; CHECK: for.body.1: +; CHECK-NEXT: [[CALL4:%.*]] = call i32 (i32*, ...) bitcast (i32 (i32)* @h to i32 (i32*, ...)*)(i32* nonnull [[F]]) +; CHECK-NEXT: [[INC6]] = add nuw nsw i32 [[G_1]], 1 +; CHECK-NEXT: br label [[FOR_COND_1]] +; CHECK: for.end.1: +; CHECK-NEXT: ret i32 0 ; entry: %f = alloca i32 @@ -2513,12 +2487,12 @@ for.end.1: declare noalias i8* @malloc(i64) define i32 @h(i32 %i) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@h ; TUNIT-SAME: (i32 [[I:%.*]]) #[[ATTR11]] { ; TUNIT-NEXT: ret i32 0 ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@h ; CGSCC-SAME: (i32 [[I:%.*]]) #[[ATTR6]] { ; CGSCC-NEXT: ret i32 0 @@ -2532,7 +2506,7 @@ define i32 @h(i32 %i) { @p = global i8 0 define void @bad_gep() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@bad_gep ; TUNIT-SAME: () #[[ATTR11]] { ; TUNIT-NEXT: entry: @@ -2550,13 +2524,13 @@ define void @bad_gep() { ; TUNIT-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 1, i8* noalias nocapture nofree noundef nonnull dereferenceable(1) [[N]]) #[[ATTR14]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@bad_gep ; CGSCC-SAME: () #[[ATTR6]] { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: [[N:%.*]] = alloca i8, align 1 ; CGSCC-NEXT: [[M:%.*]] = alloca i8, align 1 -; CGSCC-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 1, i8* noalias nocapture nofree noundef nonnull dereferenceable(1) [[N]]) #[[ATTR17:[0-9]+]] +; CGSCC-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 1, i8* noalias nocapture nofree noundef nonnull dereferenceable(1) [[N]]) #[[ATTR16]] ; CGSCC-NEXT: br label [[EXIT:%.*]] ; CGSCC: while.body: ; CGSCC-NEXT: unreachable @@ -2565,7 +2539,7 @@ define void @bad_gep() { ; CGSCC: if.end: ; CGSCC-NEXT: unreachable ; CGSCC: exit: -; CGSCC-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 1, i8* noalias nocapture nofree noundef nonnull dereferenceable(1) [[N]]) #[[ATTR17]] +; CGSCC-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 1, i8* noalias nocapture nofree noundef nonnull dereferenceable(1) [[N]]) #[[ATTR16]] ; CGSCC-NEXT: ret void ; entry: @@ -2594,7 +2568,7 @@ exit: } define i8 @edge_vs_block_liveness() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@edge_vs_block_liveness ; TUNIT-SAME: () #[[ATTR11]] { ; TUNIT-NEXT: entry: @@ -2605,7 +2579,7 @@ define i8 @edge_vs_block_liveness() { ; TUNIT-NEXT: [[PHI:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ 1, [[B1]] ] ; TUNIT-NEXT: ret i8 1 ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@edge_vs_block_liveness ; CGSCC-SAME: () #[[ATTR6]] { ; CGSCC-NEXT: entry: @@ -2632,37 +2606,36 @@ declare void @llvm.lifetime.start.p0i8(i64 %0, i8* %1) declare void @llvm.lifetime.end.p0i8(i64 %0, i8* %1) ;. ; TUNIT: attributes #[[ATTR0]] = { nofree noreturn nosync nounwind } -; TUNIT: attributes #[[ATTR1:[0-9]+]] = { readnone } +; TUNIT: attributes #[[ATTR1:[0-9]+]] = { memory(none) } ; TUNIT: attributes #[[ATTR2]] = { nounwind } ; TUNIT: attributes #[[ATTR3]] = { noreturn nounwind } ; TUNIT: attributes #[[ATTR4]] = { noreturn } -; TUNIT: attributes #[[ATTR5]] = { nosync readnone } -; TUNIT: attributes #[[ATTR6]] = { argmemonly nofree norecurse nounwind willreturn uwtable } -; TUNIT: attributes #[[ATTR7]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR8]] = { nofree norecurse noreturn nosync nounwind readnone } -; TUNIT: attributes #[[ATTR9]] = { nofree nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR5]] = { nosync memory(none) } +; TUNIT: attributes #[[ATTR6]] = { nofree norecurse nounwind willreturn memory(argmem: readwrite) uwtable } +; TUNIT: attributes #[[ATTR7]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; TUNIT: attributes #[[ATTR8]] = { nofree norecurse noreturn nosync nounwind memory(none) } +; TUNIT: attributes #[[ATTR9]] = { nofree nosync nounwind willreturn memory(none) } ; TUNIT: attributes #[[ATTR10]] = { nofree nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR11]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR12:[0-9]+]] = { argmemonly nocallback nofree nosync nounwind willreturn } +; TUNIT: attributes #[[ATTR11]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR12:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } ; TUNIT: attributes #[[ATTR13]] = { nounwind willreturn } ; TUNIT: attributes #[[ATTR14]] = { willreturn } ;. ; CGSCC: attributes #[[ATTR0]] = { nofree noreturn nosync nounwind } -; CGSCC: attributes #[[ATTR1:[0-9]+]] = { readnone } +; CGSCC: attributes #[[ATTR1:[0-9]+]] = { memory(none) } ; CGSCC: attributes #[[ATTR2]] = { nounwind } ; CGSCC: attributes #[[ATTR3]] = { noreturn nounwind } ; CGSCC: attributes #[[ATTR4]] = { noreturn } -; CGSCC: attributes #[[ATTR5]] = { nosync readnone } -; CGSCC: attributes #[[ATTR6]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR7]] = { argmemonly nofree norecurse nounwind willreturn uwtable } -; CGSCC: attributes #[[ATTR8]] = { nofree norecurse nosync nounwind readnone willreturn uwtable } -; CGSCC: attributes #[[ATTR9]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR10]] = { nofree norecurse noreturn nosync nounwind readnone } -; CGSCC: attributes #[[ATTR11]] = { nofree nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR5]] = { nosync memory(none) } +; CGSCC: attributes #[[ATTR6]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR7]] = { nofree norecurse nounwind willreturn memory(argmem: readwrite) uwtable } +; CGSCC: attributes #[[ATTR8]] = { nofree norecurse nosync nounwind willreturn memory(none) uwtable } +; CGSCC: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; CGSCC: attributes #[[ATTR10]] = { nofree norecurse noreturn nosync nounwind memory(none) } +; CGSCC: attributes #[[ATTR11]] = { nofree nosync nounwind willreturn memory(none) } ; CGSCC: attributes #[[ATTR12]] = { nofree nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR13]] = { nounwind readonly } -; CGSCC: attributes #[[ATTR14:[0-9]+]] = { argmemonly nocallback nofree nosync nounwind willreturn } +; CGSCC: attributes #[[ATTR13]] = { nounwind memory(read) } +; CGSCC: attributes #[[ATTR14:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } ; CGSCC: attributes #[[ATTR15]] = { nounwind willreturn } -; CGSCC: attributes #[[ATTR16]] = { readnone willreturn } -; CGSCC: attributes #[[ATTR17]] = { willreturn } +; CGSCC: attributes #[[ATTR16]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/liveness_chains.ll b/llvm/test/Transforms/Attributor/liveness_chains.ll index 0f405fa2f7ef4..b7063a02350b0 100644 --- a/llvm/test/Transforms/Attributor/liveness_chains.ll +++ b/llvm/test/Transforms/Attributor/liveness_chains.ll @@ -7,7 +7,7 @@ declare i32 @source() nounwind readonly define i32 @chain_dead(i32 %arg) { -; CHECK: Function Attrs: nofree nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@chain_dead ; CHECK-SAME: (i32 [[ARG:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: ret i32 0 @@ -27,10 +27,10 @@ define i32 @chain_dead(i32 %arg) { } define i32 @chain_alive(i32 %arg) { -; CHECK: Function Attrs: nounwind readonly +; CHECK: Function Attrs: nounwind memory(read) ; CHECK-LABEL: define {{[^@]+}}@chain_alive ; CHECK-SAME: (i32 [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[INIT:%.*]] = call i32 @source() #[[ATTR0]] +; CHECK-NEXT: [[INIT:%.*]] = call i32 @source() #[[ATTR2:[0-9]+]] ; CHECK-NEXT: [[V0:%.*]] = add i32 [[ARG]], [[INIT]] ; CHECK-NEXT: [[V1:%.*]] = add i32 [[INIT]], [[V0]] ; CHECK-NEXT: [[V2:%.*]] = add i32 [[V0]], [[V1]] @@ -57,6 +57,7 @@ define i32 @chain_alive(i32 %arg) { ret i32 %v9 } ;. -; CHECK: attributes #[[ATTR0]] = { nounwind readonly } -; CHECK: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } +; CHECK: attributes #[[ATTR0]] = { nounwind memory(read) } +; CHECK: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR2]] = { nounwind } ;. diff --git a/llvm/test/Transforms/Attributor/lowerheap.ll b/llvm/test/Transforms/Attributor/lowerheap.ll index d3575911415d8..89790cb678b43 100644 --- a/llvm/test/Transforms/Attributor/lowerheap.ll +++ b/llvm/test/Transforms/Attributor/lowerheap.ll @@ -47,6 +47,6 @@ attributes #0 = { nounwind willreturn } ; CHECK: attributes #[[ATTR1:[0-9]+]] = { allockind("alloc,uninitialized") allocsize(0) "alloc-family"="malloc" } ; CHECK: attributes #[[ATTR2:[0-9]+]] = { allockind("alloc,zeroed") allocsize(0,1) "alloc-family"="malloc" } ; CHECK: attributes #[[ATTR3:[0-9]+]] = { allockind("free") "alloc-family"="malloc" } -; CHECK: attributes #[[ATTR4:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn writeonly } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } ; CHECK: attributes #[[ATTR5]] = { nounwind } ;. diff --git a/llvm/test/Transforms/Attributor/lvi-after-jumpthreading.ll b/llvm/test/Transforms/Attributor/lvi-after-jumpthreading.ll index fe2a6e0f64c52..619a18eba47f5 100644 --- a/llvm/test/Transforms/Attributor/lvi-after-jumpthreading.ll +++ b/llvm/test/Transforms/Attributor/lvi-after-jumpthreading.ll @@ -3,7 +3,7 @@ ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC define i8 @test1(i32 %a, i32 %length) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test1 ; CHECK-SAME: (i32 [[A:%.*]], i32 [[LENGTH:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -37,7 +37,7 @@ exit: } define i8 @test2(i32 %n) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test2 ; CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -185,8 +185,8 @@ declare void @llvm.assume(i1) nounwind declare void @dummy(i1) nounwind declare void @llvm.experimental.guard(i1, ...) ;. -; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; CHECK: attributes #[[ATTR1:[0-9]+]] = { inaccessiblememonly nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } ; CHECK: attributes #[[ATTR2]] = { nounwind } ; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/lvi-for-ashr.ll b/llvm/test/Transforms/Attributor/lvi-for-ashr.ll index ea5618acd201d..23dcc47b943d6 100644 --- a/llvm/test/Transforms/Attributor/lvi-for-ashr.ll +++ b/llvm/test/Transforms/Attributor/lvi-for-ashr.ll @@ -5,7 +5,7 @@ ; FIXME: DOT should be replaced with 3 define i32 @test-ashr(i32 %c) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test-ashr ; CHECK-SAME: (i32 [[C:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: chk65: @@ -49,5 +49,5 @@ return: ret i32 %retval } ;. -; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. diff --git a/llvm/test/Transforms/Attributor/memory_locations.ll b/llvm/test/Transforms/Attributor/memory_locations.ll index 104aa858e183a..ae1084f8abcb8 100644 --- a/llvm/test/Transforms/Attributor/memory_locations.ll +++ b/llvm/test/Transforms/Attributor/memory_locations.ll @@ -11,7 +11,7 @@ declare noalias i8* @malloc(i64) inaccessiblememonly ; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external dso_local global i32, align 4 ;. define dso_local i8* @internal_only(i32 %arg) { -; CHECK: Function Attrs: inaccessiblememonly +; CHECK: Function Attrs: memory(inaccessiblemem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@internal_only ; CHECK-SAME: (i32 [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -26,7 +26,7 @@ entry: } define dso_local i8* @internal_only_rec(i32 %arg) { -; CHECK: Function Attrs: inaccessiblememonly +; CHECK: Function Attrs: memory(inaccessiblemem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@internal_only_rec ; CHECK-SAME: (i32 [[ARG:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -66,7 +66,7 @@ return: ; preds = %if.end, %if.then } define dso_local i8* @internal_only_rec_static_helper(i32 %arg) { -; CHECK: Function Attrs: inaccessiblememonly +; CHECK: Function Attrs: memory(inaccessiblemem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@internal_only_rec_static_helper ; CHECK-SAME: (i32 [[ARG:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -79,7 +79,7 @@ entry: } define internal i8* @internal_only_rec_static(i32 %arg) { -; CHECK: Function Attrs: inaccessiblememonly +; CHECK: Function Attrs: memory(inaccessiblemem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@internal_only_rec_static ; CHECK-SAME: (i32 [[ARG:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -173,7 +173,7 @@ return: ; preds = %if.end, %if.then } define dso_local i8* @internal_argmem_only_read(i32* %arg) { -; CHECK: Function Attrs: inaccessiblemem_or_argmemonly +; CHECK: Function Attrs: memory(argmem: readwrite, inaccessiblemem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@internal_argmem_only_read ; CHECK-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: @@ -190,7 +190,7 @@ entry: } define dso_local i8* @internal_argmem_only_write(i32* %arg) { -; CHECK: Function Attrs: inaccessiblemem_or_argmemonly +; CHECK: Function Attrs: memory(argmem: readwrite, inaccessiblemem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@internal_argmem_only_write ; CHECK-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: entry: @@ -205,14 +205,14 @@ entry: } define dso_local i8* @internal_argmem_only_rec(i32* %arg) { -; TUNIT: Function Attrs: inaccessiblemem_or_argmemonly +; TUNIT: Function Attrs: memory(argmem: readwrite, inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@internal_argmem_only_rec ; TUNIT-SAME: (i32* nocapture nofree [[ARG:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[CALL:%.*]] = call noalias i8* @internal_argmem_only_rec_1(i32* nocapture nofree align 4 [[ARG]]) ; TUNIT-NEXT: ret i8* [[CALL]] ; -; CGSCC: Function Attrs: inaccessiblemem_or_argmemonly +; CGSCC: Function Attrs: memory(argmem: readwrite, inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@internal_argmem_only_rec ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: entry: @@ -225,7 +225,7 @@ entry: } define internal i8* @internal_argmem_only_rec_1(i32* %arg) { -; CHECK: Function Attrs: inaccessiblemem_or_argmemonly +; CHECK: Function Attrs: memory(argmem: readwrite, inaccessiblemem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@internal_argmem_only_rec_1 ; CHECK-SAME: (i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: entry: @@ -281,7 +281,7 @@ return: ; preds = %if.end3, %if.then2, } define internal i8* @internal_argmem_only_rec_2(i32* %arg) { -; CHECK: Function Attrs: inaccessiblemem_or_argmemonly +; CHECK: Function Attrs: memory(argmem: readwrite, inaccessiblemem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@internal_argmem_only_rec_2 ; CHECK-SAME: (i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: entry: @@ -303,7 +303,7 @@ declare i8* @inaccesible_argmem_only_decl(i8* %arg) inaccessiblemem_or_argmemonl declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) nounwind argmemonly willreturn define void @callerA1(i8* %arg) { -; CHECK: Function Attrs: argmemonly +; CHECK: Function Attrs: memory(argmem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@callerA1 ; CHECK-SAME: (i8* [[ARG:%.*]]) #[[ATTR3:[0-9]+]] { ; CHECK-NEXT: [[TMP1:%.*]] = call i8* @argmem_only(i8* [[ARG]]) @@ -313,7 +313,7 @@ define void @callerA1(i8* %arg) { ret void } define void @callerA2(i8* %arg) { -; CHECK: Function Attrs: inaccessiblemem_or_argmemonly +; CHECK: Function Attrs: memory(argmem: readwrite, inaccessiblemem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@callerA2 ; CHECK-SAME: (i8* [[ARG:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[TMP1:%.*]] = call i8* @inaccesible_argmem_only_decl(i8* [[ARG]]) @@ -323,7 +323,7 @@ define void @callerA2(i8* %arg) { ret void } define void @callerB1() { -; CHECK: Function Attrs: readnone +; CHECK: Function Attrs: memory(none) ; CHECK-LABEL: define {{[^@]+}}@callerB1 ; CHECK-SAME: () #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: [[STACK:%.*]] = alloca i8, align 1 @@ -335,7 +335,7 @@ define void @callerB1() { ret void } define void @callerB2() { -; CHECK: Function Attrs: inaccessiblememonly +; CHECK: Function Attrs: memory(inaccessiblemem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@callerB2 ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: [[STACK:%.*]] = alloca i8, align 1 @@ -388,7 +388,7 @@ define void @callerD2() { } define void @callerE(i8* %arg) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@callerE ; CHECK-SAME: (i8* nocapture nofree readnone [[ARG:%.*]]) #[[ATTR5:[0-9]+]] { ; CHECK-NEXT: ret void @@ -399,7 +399,7 @@ define void @callerE(i8* %arg) { define void @write_global() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CHECK-LABEL: define {{[^@]+}}@write_global ; CHECK-SAME: () #[[ATTR6:[0-9]+]] { ; CHECK-NEXT: store i32 0, i32* @G, align 4 @@ -409,7 +409,7 @@ define void @write_global() { ret void } define void @write_global_via_arg(i32* %GPtr) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@write_global_via_arg ; CHECK-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[GPTR:%.*]]) #[[ATTR7:[0-9]+]] { ; CHECK-NEXT: store i32 0, i32* [[GPTR]], align 4 @@ -419,7 +419,7 @@ define void @write_global_via_arg(i32* %GPtr) { ret void } define internal void @write_global_via_arg_internal(i32* %GPtr) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CHECK-LABEL: define {{[^@]+}}@write_global_via_arg_internal ; CHECK-SAME: () #[[ATTR6]] { ; CHECK-NEXT: store i32 0, i32* @G, align 4 @@ -430,13 +430,13 @@ define internal void @write_global_via_arg_internal(i32* %GPtr) { } define void @writeonly_global() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@writeonly_global ; TUNIT-SAME: () #[[ATTR6]] { ; TUNIT-NEXT: call void @write_global() #[[ATTR10:[0-9]+]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@writeonly_global ; CGSCC-SAME: () #[[ATTR8:[0-9]+]] { ; CGSCC-NEXT: call void @write_global() #[[ATTR11:[0-9]+]] @@ -446,13 +446,13 @@ define void @writeonly_global() { ret void } define void @writeonly_global_via_arg() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@writeonly_global_via_arg ; TUNIT-SAME: () #[[ATTR6]] { ; TUNIT-NEXT: call void @write_global_via_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) @G) #[[ATTR10]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@writeonly_global_via_arg ; CGSCC-SAME: () #[[ATTR8]] { ; CGSCC-NEXT: call void @write_global_via_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) @G) #[[ATTR11]] @@ -464,13 +464,13 @@ define void @writeonly_global_via_arg() { define void @writeonly_global_via_arg_internal() { ; -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@writeonly_global_via_arg_internal ; TUNIT-SAME: () #[[ATTR6]] { ; TUNIT-NEXT: call void @write_global_via_arg_internal() #[[ATTR10]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@writeonly_global_via_arg_internal ; CGSCC-SAME: () #[[ATTR8]] { ; CGSCC-NEXT: call void @write_global_via_arg_internal() #[[ATTR11]] @@ -481,7 +481,7 @@ define void @writeonly_global_via_arg_internal() { } define i8 @recursive_not_readnone(i8* %ptr, i1 %c) { -; TUNIT: Function Attrs: argmemonly nofree nosync nounwind writeonly +; TUNIT: Function Attrs: nofree nosync nounwind memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@recursive_not_readnone ; TUNIT-SAME: (i8* nocapture nofree writeonly [[PTR:%.*]], i1 [[C:%.*]]) #[[ATTR8:[0-9]+]] { ; TUNIT-NEXT: [[ALLOC:%.*]] = alloca i8, align 1 @@ -493,7 +493,7 @@ define i8 @recursive_not_readnone(i8* %ptr, i1 %c) { ; TUNIT-NEXT: store i8 1, i8* [[PTR]], align 1 ; TUNIT-NEXT: ret i8 0 ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind writeonly +; CGSCC: Function Attrs: nofree nosync nounwind memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@recursive_not_readnone ; CGSCC-SAME: (i8* nocapture nofree writeonly [[PTR:%.*]], i1 [[C:%.*]]) #[[ATTR9:[0-9]+]] { ; CGSCC-NEXT: [[ALLOC:%.*]] = alloca i8, align 1 @@ -517,7 +517,7 @@ f: } define internal i8 @recursive_not_readnone_internal(i8* %ptr, i1 %c) { -; TUNIT: Function Attrs: argmemonly nofree nosync nounwind writeonly +; TUNIT: Function Attrs: nofree nosync nounwind memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@recursive_not_readnone_internal ; TUNIT-SAME: (i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[PTR:%.*]], i1 [[C:%.*]]) #[[ATTR8]] { ; TUNIT-NEXT: [[ALLOC:%.*]] = alloca i8, align 1 @@ -529,7 +529,7 @@ define internal i8 @recursive_not_readnone_internal(i8* %ptr, i1 %c) { ; TUNIT-NEXT: store i8 1, i8* [[PTR]], align 1 ; TUNIT-NEXT: ret i8 0 ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind writeonly +; CGSCC: Function Attrs: nofree nosync nounwind memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@recursive_not_readnone_internal ; CGSCC-SAME: (i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[PTR:%.*]], i1 [[C:%.*]]) #[[ATTR9]] { ; CGSCC-NEXT: [[ALLOC:%.*]] = alloca i8, align 1 @@ -553,14 +553,14 @@ f: } define i8 @readnone_caller(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone +; TUNIT: Function Attrs: nofree norecurse nosync nounwind memory(none) ; TUNIT-LABEL: define {{[^@]+}}@readnone_caller ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR9:[0-9]+]] { ; TUNIT-NEXT: [[A:%.*]] = alloca i8, align 1 ; TUNIT-NEXT: [[R:%.*]] = call i8 @recursive_not_readnone_internal(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[A]], i1 [[C]]) #[[ATTR11]] ; TUNIT-NEXT: ret i8 [[R]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone +; CGSCC: Function Attrs: nofree nosync nounwind memory(none) ; CGSCC-LABEL: define {{[^@]+}}@readnone_caller ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR10:[0-9]+]] { ; CGSCC-NEXT: [[A:%.*]] = alloca i8, align 1 @@ -573,7 +573,7 @@ define i8 @readnone_caller(i1 %c) { } define internal i8 @recursive_readnone_internal2(i8* %ptr, i1 %c) { -; TUNIT: Function Attrs: argmemonly nofree nosync nounwind writeonly +; TUNIT: Function Attrs: nofree nosync nounwind memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@recursive_readnone_internal2 ; TUNIT-SAME: (i8* nocapture nofree nonnull writeonly [[PTR:%.*]], i1 [[C:%.*]]) #[[ATTR8]] { ; TUNIT-NEXT: [[ALLOC:%.*]] = alloca i8, align 1 @@ -585,7 +585,7 @@ define internal i8 @recursive_readnone_internal2(i8* %ptr, i1 %c) { ; TUNIT-NEXT: store i8 1, i8* [[PTR]], align 1 ; TUNIT-NEXT: ret i8 0 ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind writeonly +; CGSCC: Function Attrs: nofree nosync nounwind memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@recursive_readnone_internal2 ; CGSCC-SAME: (i8* nocapture nofree nonnull writeonly [[PTR:%.*]], i1 [[C:%.*]]) #[[ATTR9]] { ; CGSCC-NEXT: [[ALLOC:%.*]] = alloca i8, align 1 @@ -609,13 +609,13 @@ f: } define i8 @readnone_caller2(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone +; TUNIT: Function Attrs: nofree norecurse nosync nounwind memory(none) ; TUNIT-LABEL: define {{[^@]+}}@readnone_caller2 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR9]] { ; TUNIT-NEXT: [[R:%.*]] = call i8 @recursive_readnone_internal2(i8* undef, i1 [[C]]) #[[ATTR11]] ; TUNIT-NEXT: ret i8 [[R]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone +; CGSCC: Function Attrs: nofree nosync nounwind memory(none) ; CGSCC-LABEL: define {{[^@]+}}@readnone_caller2 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR10]] { ; CGSCC-NEXT: [[R:%.*]] = call i8 @recursive_readnone_internal2(i8* undef, i1 [[C]]) #[[ATTR13]] @@ -626,7 +626,7 @@ define i8 @readnone_caller2(i1 %c) { } define internal i8 @recursive_not_readnone_internal3(i8* %ptr, i1 %c) { -; TUNIT: Function Attrs: argmemonly nofree nosync nounwind writeonly +; TUNIT: Function Attrs: nofree nosync nounwind memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@recursive_not_readnone_internal3 ; TUNIT-SAME: (i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[PTR:%.*]], i1 [[C:%.*]]) #[[ATTR8]] { ; TUNIT-NEXT: [[ALLOC:%.*]] = alloca i8, align 1 @@ -638,7 +638,7 @@ define internal i8 @recursive_not_readnone_internal3(i8* %ptr, i1 %c) { ; TUNIT-NEXT: store i8 1, i8* [[PTR]], align 1 ; TUNIT-NEXT: ret i8 0 ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind writeonly +; CGSCC: Function Attrs: nofree nosync nounwind memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@recursive_not_readnone_internal3 ; CGSCC-SAME: (i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[PTR:%.*]], i1 [[C:%.*]]) #[[ATTR9]] { ; CGSCC-NEXT: [[ALLOC:%.*]] = alloca i8, align 1 @@ -662,14 +662,14 @@ f: } define i8 @readnone_caller3(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone +; TUNIT: Function Attrs: nofree norecurse nosync nounwind memory(none) ; TUNIT-LABEL: define {{[^@]+}}@readnone_caller3 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR9]] { ; TUNIT-NEXT: [[ALLOC:%.*]] = alloca i8, align 1 ; TUNIT-NEXT: [[R:%.*]] = call i8 @recursive_not_readnone_internal3(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[ALLOC]], i1 [[C]]) #[[ATTR11]] ; TUNIT-NEXT: ret i8 [[R]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone +; CGSCC: Function Attrs: nofree nosync nounwind memory(none) ; CGSCC-LABEL: define {{[^@]+}}@readnone_caller3 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR10]] { ; CGSCC-NEXT: [[ALLOC:%.*]] = alloca i8, align 1 @@ -682,7 +682,7 @@ define i8 @readnone_caller3(i1 %c) { } define internal void @argmemonly_before_ipconstprop(i32* %p) argmemonly { -; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CHECK-LABEL: define {{[^@]+}}@argmemonly_before_ipconstprop ; CHECK-SAME: () #[[ATTR6]] { ; CHECK-NEXT: store i32 0, i32* @G, align 4 @@ -693,13 +693,13 @@ define internal void @argmemonly_before_ipconstprop(i32* %p) argmemonly { } define void @argmemonky_caller() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@argmemonky_caller ; TUNIT-SAME: () #[[ATTR6]] { ; TUNIT-NEXT: call void @argmemonly_before_ipconstprop() #[[ATTR10]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@argmemonky_caller ; CGSCC-SAME: () #[[ATTR8]] { ; CGSCC-NEXT: call void @argmemonly_before_ipconstprop() #[[ATTR11]] @@ -709,31 +709,31 @@ define void @argmemonky_caller() { ret void } ;. -; TUNIT: attributes #[[ATTR0]] = { inaccessiblememonly } -; TUNIT: attributes #[[ATTR1]] = { inaccessiblemem_or_argmemonly } -; TUNIT: attributes #[[ATTR2]] = { readnone } -; TUNIT: attributes #[[ATTR3]] = { argmemonly } -; TUNIT: attributes #[[ATTR4:[0-9]+]] = { argmemonly nocallback nofree nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR5]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR6]] = { nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR7]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR8]] = { argmemonly nofree nosync nounwind writeonly } -; TUNIT: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind readnone } -; TUNIT: attributes #[[ATTR10]] = { nofree nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR11]] = { nofree nosync nounwind writeonly } +; TUNIT: attributes #[[ATTR0]] = { memory(inaccessiblemem: readwrite) } +; TUNIT: attributes #[[ATTR1]] = { memory(argmem: readwrite, inaccessiblemem: readwrite) } +; TUNIT: attributes #[[ATTR2]] = { memory(none) } +; TUNIT: attributes #[[ATTR3]] = { memory(argmem: readwrite) } +; TUNIT: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +; TUNIT: attributes #[[ATTR5]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR6]] = { nofree norecurse nosync nounwind willreturn memory(write) } +; TUNIT: attributes #[[ATTR7]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; TUNIT: attributes #[[ATTR8]] = { nofree nosync nounwind memory(argmem: write) } +; TUNIT: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind memory(none) } +; TUNIT: attributes #[[ATTR10]] = { nofree nosync nounwind willreturn } +; TUNIT: attributes #[[ATTR11]] = { nofree nosync nounwind } ;. -; CGSCC: attributes #[[ATTR0]] = { inaccessiblememonly } -; CGSCC: attributes #[[ATTR1]] = { inaccessiblemem_or_argmemonly } -; CGSCC: attributes #[[ATTR2]] = { readnone } -; CGSCC: attributes #[[ATTR3]] = { argmemonly } -; CGSCC: attributes #[[ATTR4:[0-9]+]] = { argmemonly nocallback nofree nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR5]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR6]] = { nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR7]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR8]] = { nofree nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR9]] = { argmemonly nofree nosync nounwind writeonly } -; CGSCC: attributes #[[ATTR10]] = { nofree nosync nounwind readnone } -; CGSCC: attributes #[[ATTR11]] = { nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR12]] = { nofree nosync nounwind writeonly } -; CGSCC: attributes #[[ATTR13]] = { nounwind writeonly } +; CGSCC: attributes #[[ATTR0]] = { memory(inaccessiblemem: readwrite) } +; CGSCC: attributes #[[ATTR1]] = { memory(argmem: readwrite, inaccessiblemem: readwrite) } +; CGSCC: attributes #[[ATTR2]] = { memory(none) } +; CGSCC: attributes #[[ATTR3]] = { memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR5]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR6]] = { nofree norecurse nosync nounwind willreturn memory(write) } +; CGSCC: attributes #[[ATTR7]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; CGSCC: attributes #[[ATTR8]] = { nofree nosync nounwind willreturn memory(write) } +; CGSCC: attributes #[[ATTR9]] = { nofree nosync nounwind memory(argmem: write) } +; CGSCC: attributes #[[ATTR10]] = { nofree nosync nounwind memory(none) } +; CGSCC: attributes #[[ATTR11]] = { nounwind willreturn } +; CGSCC: attributes #[[ATTR12]] = { nofree nosync nounwind } +; CGSCC: attributes #[[ATTR13]] = { nounwind } ;. diff --git a/llvm/test/Transforms/Attributor/misc.ll b/llvm/test/Transforms/Attributor/misc.ll index c6a09901fb946..44e7f41abb58a 100644 --- a/llvm/test/Transforms/Attributor/misc.ll +++ b/llvm/test/Transforms/Attributor/misc.ll @@ -67,7 +67,7 @@ define void @external(void (i8*)* %fp) { ; CGSCC-SAME: (void (i8*)* [[FP:%.*]]) { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: [[A:%.*]] = alloca i32, align 4 -; CGSCC-NEXT: call void @foo(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[A]]) #[[ATTR1]] +; CGSCC-NEXT: call void @foo(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[A]]) #[[ATTR2:[0-9]+]] ; CGSCC-NEXT: call void @callback1(void (i32*)* noundef nonnull @foo) ; CGSCC-NEXT: call void @callback2(void (i8*)* noundef bitcast (void (i32*)* @foo to void (i8*)*)) ; CGSCC-NEXT: call void @callback2(void (i8*)* [[FP]]) @@ -93,7 +93,7 @@ entry: define internal void @foo(i32* %a) { ; -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@foo ; CHECK-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -108,9 +108,10 @@ entry: declare void @callback1(void (i32*)*) declare void @callback2(void (i8*)*) ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn writeonly } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; TUNIT: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR1]] = { nounwind willreturn writeonly } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; CGSCC: attributes #[[ATTR1]] = { nounwind willreturn memory(write) } +; CGSCC: attributes #[[ATTR2]] = { nounwind willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/misc_crash.ll b/llvm/test/Transforms/Attributor/misc_crash.ll index ab4fceafe39fb..b28bf102e6bed 100644 --- a/llvm/test/Transforms/Attributor/misc_crash.ll +++ b/llvm/test/Transforms/Attributor/misc_crash.ll @@ -9,7 +9,7 @@ ; CHECK: @[[VAR2:[a-zA-Z0-9_$"\\.-]+]] = internal global i32 0 ;. define i32 addrspace(1)* @foo(i32 addrspace(4)* %arg) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@foo ; CHECK-SAME: (i32 addrspace(4)* nofree readnone [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -22,7 +22,7 @@ entry: } define i32* @func1() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@func1 ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: ret i32* getelementptr inbounds ([1 x i32], [1 x i32]* @var1, i32 0, i32 0) @@ -37,7 +37,7 @@ define internal i32* @func1a([1 x i32]* %arg) { } define internal void @func2a(i32* %0) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CHECK-LABEL: define {{[^@]+}}@func2a ; CHECK-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: store i32 0, i32* @var2, align 4 @@ -118,7 +118,7 @@ define i16 @foo3() { ret i16 %call } define internal i16 @bar3(i16* %p1, i16 %p2) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@bar3 ; CHECK-SAME: (i16* nocapture nofree readnone [[P1:%.*]], i16 returned [[P2:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret i16 [[P2]] @@ -130,7 +130,7 @@ define internal i16 @bar3(i16* %p1, i16 %p2) { ; CHECK-SAME: (i8*) declare void @func6(i8*) ;. -; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; CHECK: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn writeonly } +; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(write) } ; CHECK: attributes #[[ATTR2]] = { norecurse } ;. diff --git a/llvm/test/Transforms/Attributor/noalias.ll b/llvm/test/Transforms/Attributor/noalias.ll index 84140995e4c8a..dcb93fe46cc33 100644 --- a/llvm/test/Transforms/Attributor/noalias.ll +++ b/llvm/test/Transforms/Attributor/noalias.ll @@ -43,7 +43,7 @@ define i8* @return_noalias(){ } define void @nocapture(i8* %a){ -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@nocapture ; CHECK-SAME: (i8* nocapture nofree readnone [[A:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: ret void @@ -159,7 +159,7 @@ declare i8* @baz(...) nounwind uwtable ; Returning global pointer. Should not be noalias. define i8** @getter() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@getter ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: ret i8** @G @@ -169,12 +169,12 @@ define i8** @getter() { ; Returning global pointer. Should not be noalias. define i8** @calle1(){ -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@calle1 ; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: ret i8** @G ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@calle1 ; CGSCC-SAME: () #[[ATTR2:[0-9]+]] { ; CGSCC-NEXT: [[TMP1:%.*]] = call noundef nonnull align 8 dereferenceable(8) i8** @getter() #[[ATTR11:[0-9]+]] @@ -520,7 +520,7 @@ define void @test13_use_alias(){ ; TEST 14 i2p casts define internal i32 @p2i(i32* %arg) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@p2i ; CHECK-SAME: (i32* noalias nofree readnone [[ARG:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[P2I:%.*]] = ptrtoint i32* [[ARG]] to i32 @@ -531,22 +531,22 @@ define internal i32 @p2i(i32* %arg) { } define i32 @i2p(i32* %arg) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) ; TUNIT-LABEL: define {{[^@]+}}@i2p ; TUNIT-SAME: (i32* nofree readonly [[ARG:%.*]]) #[[ATTR4:[0-9]+]] { -; TUNIT-NEXT: [[C:%.*]] = call i32 @p2i(i32* noalias nofree readnone [[ARG]]) #[[ATTR9:[0-9]+]] +; TUNIT-NEXT: [[C:%.*]] = call i32 @p2i(i32* noalias nofree readnone [[ARG]]) #[[ATTR10:[0-9]+]] ; TUNIT-NEXT: [[I2P:%.*]] = inttoptr i32 [[C]] to i8* ; TUNIT-NEXT: [[BC:%.*]] = bitcast i8* [[I2P]] to i32* -; TUNIT-NEXT: [[CALL:%.*]] = call i32 @ret(i32* nocapture nofree readonly align 4 [[BC]]) #[[ATTR10:[0-9]+]] +; TUNIT-NEXT: [[CALL:%.*]] = call i32 @ret(i32* nocapture nofree readonly align 4 [[BC]]) #[[ATTR10]] ; TUNIT-NEXT: ret i32 [[CALL]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(read) ; CGSCC-LABEL: define {{[^@]+}}@i2p ; CGSCC-SAME: (i32* nofree readonly [[ARG:%.*]]) #[[ATTR5:[0-9]+]] { ; CGSCC-NEXT: [[C:%.*]] = call i32 @p2i(i32* noalias nofree readnone [[ARG]]) #[[ATTR11]] ; CGSCC-NEXT: [[I2P:%.*]] = inttoptr i32 [[C]] to i8* ; CGSCC-NEXT: [[BC:%.*]] = bitcast i8* [[I2P]] to i32* -; CGSCC-NEXT: [[CALL:%.*]] = call i32 @ret(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[BC]]) #[[ATTR12:[0-9]+]] +; CGSCC-NEXT: [[CALL:%.*]] = call i32 @ret(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[BC]]) #[[ATTR11]] ; CGSCC-NEXT: ret i32 [[CALL]] ; %c = call i32 @p2i(i32* %arg) @@ -556,13 +556,13 @@ define i32 @i2p(i32* %arg) { ret i32 %call } define internal i32 @ret(i32* %arg) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@ret ; TUNIT-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR5:[0-9]+]] { ; TUNIT-NEXT: [[L:%.*]] = load i32, i32* [[ARG]], align 4 ; TUNIT-NEXT: ret i32 [[L]] ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@ret ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR6:[0-9]+]] { ; CGSCC-NEXT: [[L:%.*]] = load i32, i32* [[ARG]], align 4 @@ -599,7 +599,7 @@ define internal fastcc double @strtox(i8* %s, i8** %p, i32 %prec) unnamed_addr { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: [[F:%.*]] = alloca [[STRUCT__IO_FILE:%.*]], align 8 ; CGSCC-NEXT: [[TMP0:%.*]] = bitcast %struct._IO_FILE* [[F]] to i8* -; CGSCC-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 144, i8* nocapture nofree noundef nonnull align 8 dereferenceable(240) [[TMP0]]) #[[ATTR13:[0-9]+]] +; CGSCC-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 144, i8* nocapture nofree noundef nonnull align 8 dereferenceable(240) [[TMP0]]) #[[ATTR12:[0-9]+]] ; CGSCC-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @sh_fromstring to i32 (%struct._IO_FILE*, i8*)*)(%struct._IO_FILE* nonnull align 8 dereferenceable(240) [[F]], i8* [[S]]) ; CGSCC-NEXT: call void @__shlim(%struct._IO_FILE* noundef nonnull align 8 dereferenceable(240) [[F]], i64 noundef 0) ; CGSCC-NEXT: [[CALL1:%.*]] = call double @__floatscan(%struct._IO_FILE* noundef nonnull align 8 dereferenceable(240) [[F]], i32 noundef 1, i32 noundef 1) @@ -652,13 +652,13 @@ declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) @alias_of_p = external global i32* define void @make_alias(i32* %p) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@make_alias ; TUNIT-SAME: (i32* nofree writeonly [[P:%.*]]) #[[ATTR7:[0-9]+]] { ; TUNIT-NEXT: store i32* [[P]], i32** @alias_of_p, align 8 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@make_alias ; CGSCC-SAME: (i32* nofree writeonly [[P:%.*]]) #[[ATTR8:[0-9]+]] { ; CGSCC-NEXT: store i32* [[P]], i32** @alias_of_p, align 8 @@ -669,13 +669,13 @@ define void @make_alias(i32* %p) { } define void @only_store(i32* %p) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@only_store ; TUNIT-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR8:[0-9]+]] { ; TUNIT-NEXT: store i32 0, i32* [[P]], align 4 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@only_store ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR9:[0-9]+]] { ; CGSCC-NEXT: store i32 0, i32* [[P]], align 4 @@ -686,28 +686,28 @@ define void @only_store(i32* %p) { } define void @test15_caller(i32* noalias %p, i32 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@test15_caller ; TUNIT-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C:%.*]]) #[[ATTR7]] { ; TUNIT-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C]], 0 ; TUNIT-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; TUNIT: if.then: -; TUNIT-NEXT: tail call void @only_store(i32* noalias nocapture nofree writeonly align 4 [[P]]) #[[ATTR12:[0-9]+]] +; TUNIT-NEXT: tail call void @only_store(i32* noalias nocapture nofree writeonly align 4 [[P]]) #[[ATTR10]] ; TUNIT-NEXT: br label [[IF_END]] ; TUNIT: if.end: -; TUNIT-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) #[[ATTR12]] +; TUNIT-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) #[[ATTR10]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@test15_caller ; CGSCC-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C:%.*]]) #[[ATTR10:[0-9]+]] { ; CGSCC-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C]], 0 ; CGSCC-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CGSCC: if.then: -; CGSCC-NEXT: tail call void @only_store(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[P]]) #[[ATTR14:[0-9]+]] +; CGSCC-NEXT: tail call void @only_store(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[P]]) #[[ATTR13:[0-9]+]] ; CGSCC-NEXT: br label [[IF_END]] ; CGSCC: if.end: -; CGSCC-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) #[[ATTR14]] +; CGSCC-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) #[[ATTR13]] ; CGSCC-NEXT: ret void ; %tobool = icmp eq i32 %c, 0 @@ -743,32 +743,32 @@ if.end: ; Therefore, only one of the two conditions of if statementes will be fulfilled. define internal void @test16_sub(i32* noalias %p, i32 %c1, i32 %c2) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@test16_sub ; TUNIT-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C1:%.*]], i32 [[C2:%.*]]) #[[ATTR7]] { ; TUNIT-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C1]], 0 ; TUNIT-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; TUNIT: if.then: -; TUNIT-NEXT: tail call void @only_store(i32* noalias nocapture nofree writeonly align 4 [[P]]) #[[ATTR12]] -; TUNIT-NEXT: tail call void @make_alias(i32* nofree writeonly align 4 [[P]]) #[[ATTR12]] +; TUNIT-NEXT: tail call void @only_store(i32* noalias nocapture nofree writeonly align 4 [[P]]) #[[ATTR10]] +; TUNIT-NEXT: tail call void @make_alias(i32* nofree writeonly align 4 [[P]]) #[[ATTR10]] ; TUNIT-NEXT: br label [[IF_END]] ; TUNIT: if.end: ; TUNIT-NEXT: [[TOBOOL1:%.*]] = icmp eq i32 [[C2]], 0 ; TUNIT-NEXT: br i1 [[TOBOOL1]], label [[IF_THEN2:%.*]], label [[IF_END3:%.*]] ; TUNIT: if.then2: -; TUNIT-NEXT: tail call void @only_store(i32* nocapture nofree writeonly align 4 [[P]]) #[[ATTR12]] +; TUNIT-NEXT: tail call void @only_store(i32* nocapture nofree writeonly align 4 [[P]]) #[[ATTR10]] ; TUNIT-NEXT: br label [[IF_END3]] ; TUNIT: if.end3: ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@test16_sub ; CGSCC-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C1:%.*]], i32 [[C2:%.*]]) #[[ATTR10]] { ; CGSCC-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C1]], 0 ; CGSCC-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CGSCC: if.then: -; CGSCC-NEXT: tail call void @only_store(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[P]]) #[[ATTR14]] -; CGSCC-NEXT: tail call void @make_alias(i32* nofree nonnull writeonly align 4 dereferenceable(4) [[P]]) #[[ATTR14]] +; CGSCC-NEXT: tail call void @only_store(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[P]]) #[[ATTR14:[0-9]+]] +; CGSCC-NEXT: tail call void @make_alias(i32* nofree nonnull writeonly align 4 dereferenceable(4) [[P]]) #[[ATTR13]] ; CGSCC-NEXT: br label [[IF_END]] ; CGSCC: if.end: ; CGSCC-NEXT: [[TOBOOL1:%.*]] = icmp eq i32 [[C2]], 0 @@ -800,16 +800,16 @@ if.end3: } define void @test16_caller(i32* %p, i32 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@test16_caller ; TUNIT-SAME: (i32* nofree writeonly [[P:%.*]], i32 [[C:%.*]]) #[[ATTR7]] { -; TUNIT-NEXT: tail call void @test16_sub(i32* noalias nofree writeonly [[P]], i32 [[C]], i32 [[C]]) #[[ATTR12]] +; TUNIT-NEXT: tail call void @test16_sub(i32* noalias nofree writeonly [[P]], i32 [[C]], i32 [[C]]) #[[ATTR10]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@test16_caller ; CGSCC-SAME: (i32* nofree writeonly [[P:%.*]], i32 [[C:%.*]]) #[[ATTR10]] { -; CGSCC-NEXT: tail call void @test16_sub(i32* noalias nofree writeonly [[P]], i32 [[C]], i32 [[C]]) #[[ATTR14]] +; CGSCC-NEXT: tail call void @test16_sub(i32* noalias nofree writeonly [[P]], i32 [[C]], i32 [[C]]) #[[ATTR13]] ; CGSCC-NEXT: ret void ; tail call void @test16_sub(i32* %p, i32 %c, i32 %c) @@ -836,32 +836,32 @@ define void @test16_caller(i32* %p, i32 %c) { ; } define void @test17_caller(i32* noalias %p, i32 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@test17_caller ; TUNIT-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C:%.*]]) #[[ATTR7]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C]], 0 ; TUNIT-NEXT: br i1 [[TOBOOL]], label [[L1:%.*]], label [[L2:%.*]] ; TUNIT: l1: -; TUNIT-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) #[[ATTR12]] +; TUNIT-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) #[[ATTR10]] ; TUNIT-NEXT: br label [[L3:%.*]] ; TUNIT: l2: -; TUNIT-NEXT: tail call void @only_store(i32* nocapture nofree writeonly align 4 [[P]]) #[[ATTR12]] +; TUNIT-NEXT: tail call void @only_store(i32* nocapture nofree writeonly align 4 [[P]]) #[[ATTR10]] ; TUNIT-NEXT: br label [[L3]] ; TUNIT: l3: ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@test17_caller ; CGSCC-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C:%.*]]) #[[ATTR10]] { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C]], 0 ; CGSCC-NEXT: br i1 [[TOBOOL]], label [[L1:%.*]], label [[L2:%.*]] ; CGSCC: l1: -; CGSCC-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) #[[ATTR14]] +; CGSCC-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) #[[ATTR13]] ; CGSCC-NEXT: br label [[L3:%.*]] ; CGSCC: l2: -; CGSCC-NEXT: tail call void @only_store(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[P]]) #[[ATTR14]] +; CGSCC-NEXT: tail call void @only_store(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[P]]) #[[ATTR13]] ; CGSCC-NEXT: br label [[L3]] ; CGSCC: l3: ; CGSCC-NEXT: ret void @@ -894,12 +894,12 @@ l3: ; } define void @noreturn() { -; TUNIT: Function Attrs: nofree nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@noreturn -; TUNIT-SAME: () #[[ATTR9]] { +; TUNIT-SAME: () #[[ATTR9:[0-9]+]] { ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@noreturn ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: ret void @@ -909,30 +909,30 @@ define void @noreturn() { } define void @test18_caller(i32* noalias %p, i32 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@test18_caller ; TUNIT-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C:%.*]]) #[[ATTR7]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C]], 0 ; TUNIT-NEXT: br i1 [[TOBOOL]], label [[L1:%.*]], label [[L2:%.*]] ; TUNIT: l1: -; TUNIT-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) #[[ATTR12]] +; TUNIT-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) #[[ATTR10]] ; TUNIT-NEXT: br label [[L2]] ; TUNIT: l2: -; TUNIT-NEXT: tail call void @only_store(i32* nocapture nofree writeonly align 4 [[P]]) #[[ATTR12]] +; TUNIT-NEXT: tail call void @only_store(i32* nocapture nofree writeonly align 4 [[P]]) #[[ATTR10]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@test18_caller ; CGSCC-SAME: (i32* noalias nofree nonnull writeonly align 4 dereferenceable(4) [[P:%.*]], i32 [[C:%.*]]) #[[ATTR10]] { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C]], 0 ; CGSCC-NEXT: br i1 [[TOBOOL]], label [[L1:%.*]], label [[L2:%.*]] ; CGSCC: l1: -; CGSCC-NEXT: tail call void @make_alias(i32* nofree nonnull writeonly align 4 dereferenceable(4) [[P]]) #[[ATTR14]] +; CGSCC-NEXT: tail call void @make_alias(i32* nofree nonnull writeonly align 4 dereferenceable(4) [[P]]) #[[ATTR13]] ; CGSCC-NEXT: br label [[L2]] ; CGSCC: l2: -; CGSCC-NEXT: tail call void @only_store(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[P]]) #[[ATTR14]] +; CGSCC-NEXT: tail call void @only_store(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[P]]) #[[ATTR13]] ; CGSCC-NEXT: ret void ; entry: @@ -949,33 +949,32 @@ l2: ret void } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ; TUNIT: attributes #[[ATTR1]] = { nounwind uwtable } ; TUNIT: attributes #[[ATTR2]] = { nounwind } ; TUNIT: attributes #[[ATTR3]] = { nounwind ssp uwtable } -; TUNIT: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR5]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR6:[0-9]+]] = { argmemonly nocallback nofree nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR7]] = { nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR8]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR9]] = { nofree nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR10]] = { nofree nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR11]] = { willreturn } -; TUNIT: attributes #[[ATTR12]] = { nofree nosync nounwind willreturn writeonly } +; TUNIT: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind willreturn memory(read) } +; TUNIT: attributes #[[ATTR5]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; TUNIT: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +; TUNIT: attributes #[[ATTR7]] = { nofree norecurse nosync nounwind willreturn memory(write) } +; TUNIT: attributes #[[ATTR8]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; TUNIT: attributes #[[ATTR9]] = { nofree nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR10]] = { nofree nosync nounwind willreturn } +; TUNIT: attributes #[[ATTR11]] = { willreturn memory(readwrite) } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ; CGSCC: attributes #[[ATTR1]] = { nounwind uwtable } -; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn memory(none) } ; CGSCC: attributes #[[ATTR3]] = { nounwind } ; CGSCC: attributes #[[ATTR4]] = { nounwind ssp uwtable } -; CGSCC: attributes #[[ATTR5]] = { nofree nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR6]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR7:[0-9]+]] = { argmemonly nocallback nofree nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR8]] = { nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR9]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR10]] = { nofree nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR11]] = { readnone willreturn } -; CGSCC: attributes #[[ATTR12]] = { readonly willreturn } -; CGSCC: attributes #[[ATTR13]] = { willreturn } -; CGSCC: attributes #[[ATTR14]] = { nounwind willreturn writeonly } +; CGSCC: attributes #[[ATTR5]] = { nofree nosync nounwind willreturn memory(read) } +; CGSCC: attributes #[[ATTR6]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; CGSCC: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR8]] = { nofree norecurse nosync nounwind willreturn memory(write) } +; CGSCC: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; CGSCC: attributes #[[ATTR10]] = { nofree nosync nounwind willreturn memory(write) } +; CGSCC: attributes #[[ATTR11]] = { willreturn } +; CGSCC: attributes #[[ATTR12]] = { willreturn memory(readwrite) } +; CGSCC: attributes #[[ATTR13]] = { nounwind willreturn } +; CGSCC: attributes #[[ATTR14]] = { nounwind willreturn memory(write) } ;. diff --git a/llvm/test/Transforms/Attributor/nocapture-1.ll b/llvm/test/Transforms/Attributor/nocapture-1.ll index 49c17d61575fc..2e0f702792fd0 100644 --- a/llvm/test/Transforms/Attributor/nocapture-1.ll +++ b/llvm/test/Transforms/Attributor/nocapture-1.ll @@ -11,7 +11,7 @@ ; CHECK: @[[G3:[a-zA-Z0-9_$"\\.-]+]] = global i8* null ;. define i32* @c1(i32* %q) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@c1 ; CHECK-SAME: (i32* nofree readnone returned "no-capture-maybe-returned" [[Q:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: ret i32* [[Q]] @@ -21,7 +21,7 @@ define i32* @c1(i32* %q) { ; It would also be acceptable to mark %q as readnone. Update @c3 too. define void @c2(i32* %q) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CHECK-LABEL: define {{[^@]+}}@c2 ; CHECK-SAME: (i32* nofree writeonly [[Q:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: store i32* [[Q]], i32** @g, align 8 @@ -32,16 +32,16 @@ define void @c2(i32* %q) { } define void @c3(i32* %q) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@c3 ; TUNIT-SAME: (i32* nofree writeonly [[Q:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: call void @c2(i32* nofree writeonly [[Q]]) #[[ATTR14:[0-9]+]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@c3 ; CGSCC-SAME: (i32* nofree writeonly [[Q:%.*]]) #[[ATTR2:[0-9]+]] { -; CGSCC-NEXT: call void @c2(i32* nofree writeonly [[Q]]) #[[ATTR17:[0-9]+]] +; CGSCC-NEXT: call void @c2(i32* nofree writeonly [[Q]]) #[[ATTR14:[0-9]+]] ; CGSCC-NEXT: ret void ; call void @c2(i32* %q) @@ -49,7 +49,7 @@ define void @c3(i32* %q) { } define i1 @c4(i32* %q, i32 %bitno) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@c4 ; CHECK-SAME: (i32* nofree readnone [[Q:%.*]], i32 [[BITNO:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP:%.*]] = ptrtoint i32* [[Q]] to i32 @@ -73,7 +73,7 @@ l1: ; c4b is c4 but without the escaping part define i1 @c4b(i32* %q, i32 %bitno) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@c4b ; CHECK-SAME: (i32* nocapture nofree readnone [[Q:%.*]], i32 [[BITNO:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP:%.*]] = ptrtoint i32* [[Q]] to i32 @@ -98,7 +98,7 @@ l1: @lookup_table = global [2 x i1] [ i1 0, i1 1 ] define i1 @c5(i32* %q, i32 %bitno) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) ; TUNIT-LABEL: define {{[^@]+}}@c5 ; TUNIT-SAME: (i32* nofree readonly [[Q:%.*]], i32 [[BITNO:%.*]]) #[[ATTR2:[0-9]+]] { ; TUNIT-NEXT: [[TMP:%.*]] = ptrtoint i32* [[Q]] to i32 @@ -108,7 +108,7 @@ define i1 @c5(i32* %q, i32 %bitno) { ; TUNIT-NEXT: [[VAL:%.*]] = load i1, i1* [[LOOKUP]], align 1 ; TUNIT-NEXT: ret i1 [[VAL]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) ; CGSCC-LABEL: define {{[^@]+}}@c5 ; CGSCC-SAME: (i32* nofree readonly [[Q:%.*]], i32 [[BITNO:%.*]]) #[[ATTR3:[0-9]+]] { ; CGSCC-NEXT: [[TMP:%.*]] = ptrtoint i32* [[Q]] to i32 @@ -130,10 +130,10 @@ define i1 @c5(i32* %q, i32 %bitno) { declare void @throw_if_bit_set(i8*, i8) readonly define i1 @c6(i8* %q, i8 %bit) personality i32 (...)* @__gxx_personality_v0 { -; TUNIT: Function Attrs: nounwind readonly +; TUNIT: Function Attrs: nounwind memory(read) ; TUNIT-LABEL: define {{[^@]+}}@c6 ; TUNIT-SAME: (i8* readonly [[Q:%.*]], i8 [[BIT:%.*]]) #[[ATTR4:[0-9]+]] personality i32 (...)* @__gxx_personality_v0 { -; TUNIT-NEXT: invoke void @throw_if_bit_set(i8* readonly [[Q]], i8 [[BIT]]) #[[ATTR3:[0-9]+]] +; TUNIT-NEXT: invoke void @throw_if_bit_set(i8* readonly [[Q]], i8 [[BIT]]) ; TUNIT-NEXT: to label [[RET0:%.*]] unwind label [[RET1:%.*]] ; TUNIT: ret0: ; TUNIT-NEXT: ret i1 false @@ -142,10 +142,10 @@ define i1 @c6(i8* %q, i8 %bit) personality i32 (...)* @__gxx_personality_v0 { ; TUNIT-NEXT: cleanup ; TUNIT-NEXT: ret i1 true ; -; CGSCC: Function Attrs: nounwind readonly +; CGSCC: Function Attrs: nounwind memory(read) ; CGSCC-LABEL: define {{[^@]+}}@c6 ; CGSCC-SAME: (i8* readonly [[Q:%.*]], i8 [[BIT:%.*]]) #[[ATTR5:[0-9]+]] personality i32 (...)* @__gxx_personality_v0 { -; CGSCC-NEXT: invoke void @throw_if_bit_set(i8* readonly [[Q]], i8 [[BIT]]) #[[ATTR4:[0-9]+]] +; CGSCC-NEXT: invoke void @throw_if_bit_set(i8* readonly [[Q]], i8 [[BIT]]) ; CGSCC-NEXT: to label [[RET0:%.*]] unwind label [[RET1:%.*]] ; CGSCC: ret0: ; CGSCC-NEXT: ret i1 false @@ -167,7 +167,7 @@ ret1: declare i32 @__gxx_personality_v0(...) define i1* @lookup_bit(i32* %q, i32 %bitno) readnone nounwind { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@lookup_bit ; CHECK-SAME: (i32* nofree readnone [[Q:%.*]], i32 [[BITNO:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP:%.*]] = ptrtoint i32* [[Q]] to i32 @@ -184,17 +184,17 @@ define i1* @lookup_bit(i32* %q, i32 %bitno) readnone nounwind { } define i1 @c7(i32* %q, i32 %bitno) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) ; TUNIT-LABEL: define {{[^@]+}}@c7 ; TUNIT-SAME: (i32* nofree readonly [[Q:%.*]], i32 [[BITNO:%.*]]) #[[ATTR2]] { ; TUNIT-NEXT: [[PTR:%.*]] = call i1* @lookup_bit(i32* noalias nofree readnone [[Q]], i32 [[BITNO]]) #[[ATTR15:[0-9]+]] ; TUNIT-NEXT: [[VAL:%.*]] = load i1, i1* [[PTR]], align 1 ; TUNIT-NEXT: ret i1 [[VAL]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(read) ; CGSCC-LABEL: define {{[^@]+}}@c7 ; CGSCC-SAME: (i32* nofree readonly [[Q:%.*]], i32 [[BITNO:%.*]]) #[[ATTR6:[0-9]+]] { -; CGSCC-NEXT: [[PTR:%.*]] = call i1* @lookup_bit(i32* noalias nofree readnone [[Q]], i32 [[BITNO]]) #[[ATTR18:[0-9]+]] +; CGSCC-NEXT: [[PTR:%.*]] = call i1* @lookup_bit(i32* noalias nofree readnone [[Q]], i32 [[BITNO]]) #[[ATTR17:[0-9]+]] ; CGSCC-NEXT: [[VAL:%.*]] = load i1, i1* [[PTR]], align 1 ; CGSCC-NEXT: ret i1 [[VAL]] ; @@ -292,13 +292,13 @@ define void @nc2(i32* %p, i32* %q) { ; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn ; TUNIT-LABEL: define {{[^@]+}}@nc2 ; TUNIT-SAME: (i32* nocapture nofree [[P:%.*]], i32* nofree [[Q:%.*]]) #[[ATTR5]] { -; TUNIT-NEXT: [[TMP1:%.*]] = call i32 @nc1(i32* nofree [[Q]], i32* nocapture nofree [[P]], i1 noundef false) #[[ATTR16:[0-9]+]] +; TUNIT-NEXT: [[TMP1:%.*]] = call i32 @nc1(i32* nofree [[Q]], i32* nocapture nofree [[P]], i1 noundef false) #[[ATTR14]] ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: nofree nosync nounwind willreturn ; CGSCC-LABEL: define {{[^@]+}}@nc2 ; CGSCC-SAME: (i32* nocapture nofree align 4 [[P:%.*]], i32* nofree [[Q:%.*]]) #[[ATTR8:[0-9]+]] { -; CGSCC-NEXT: [[TMP1:%.*]] = call i32 @nc1(i32* nofree [[Q]], i32* nocapture nofree align 4 [[P]], i1 noundef false) #[[ATTR14:[0-9]+]] +; CGSCC-NEXT: [[TMP1:%.*]] = call i32 @nc1(i32* nofree [[Q]], i32* nocapture nofree align 4 [[P]], i1 noundef false) #[[ATTR14]] ; CGSCC-NEXT: ret void ; %1 = call i32 @nc1(i32* %q, i32* %p, i1 0) ; [#uses=0] @@ -320,16 +320,16 @@ define void @nc3(void ()* %p) { ; FIXME: readonly and nocapture missing on the pointer. declare void @external(i8* readonly) nounwind argmemonly define void @nc4(i8* %p) { -; TUNIT: Function Attrs: argmemonly nounwind +; TUNIT: Function Attrs: nounwind memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@nc4 ; TUNIT-SAME: (i8* [[P:%.*]]) #[[ATTR6:[0-9]+]] { -; TUNIT-NEXT: call void @external(i8* readonly [[P]]) #[[ATTR17:[0-9]+]] +; TUNIT-NEXT: call void @external(i8* readonly [[P]]) #[[ATTR16:[0-9]+]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nounwind +; CGSCC: Function Attrs: nounwind memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@nc4 ; CGSCC-SAME: (i8* [[P:%.*]]) #[[ATTR9:[0-9]+]] { -; CGSCC-NEXT: call void @external(i8* readonly [[P]]) #[[ATTR19:[0-9]+]] +; CGSCC-NEXT: call void @external(i8* readonly [[P]]) #[[ATTR18:[0-9]+]] ; CGSCC-NEXT: ret void ; call void @external(i8* %p) @@ -349,17 +349,17 @@ define void @nc5(void (i8*)* %f, i8* %p) { ; It would be acceptable to add readnone to %y1_1 and %y1_2. define void @test1_1(i8* %x1_1, i8* %y1_1, i1 %c) { -; TUNIT: Function Attrs: nofree nosync nounwind writeonly +; TUNIT: Function Attrs: nofree nosync nounwind memory(write) ; TUNIT-LABEL: define {{[^@]+}}@test1_1 ; TUNIT-SAME: (i8* nocapture nofree readnone [[X1_1:%.*]], i8* nocapture nofree readnone [[Y1_1:%.*]], i1 [[C:%.*]]) #[[ATTR7:[0-9]+]] { -; TUNIT-NEXT: [[TMP1:%.*]] = call i8* @test1_2(i8* noalias nocapture nofree readnone undef, i8* noalias nofree readnone "no-capture-maybe-returned" [[Y1_1]], i1 [[C]]) #[[ATTR7]] +; TUNIT-NEXT: [[TMP1:%.*]] = call i8* @test1_2(i8* noalias nocapture nofree readnone undef, i8* noalias nofree readnone "no-capture-maybe-returned" [[Y1_1]], i1 [[C]]) #[[ATTR17:[0-9]+]] ; TUNIT-NEXT: store i32* null, i32** @g, align 8 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind writeonly +; CGSCC: Function Attrs: nofree nosync nounwind memory(write) ; CGSCC-LABEL: define {{[^@]+}}@test1_1 ; CGSCC-SAME: (i8* nocapture nofree readnone [[X1_1:%.*]], i8* nocapture nofree readnone [[Y1_1:%.*]], i1 [[C:%.*]]) #[[ATTR10:[0-9]+]] { -; CGSCC-NEXT: [[TMP1:%.*]] = call i8* @test1_2(i8* noalias nocapture nofree readnone undef, i8* noalias nofree readnone "no-capture-maybe-returned" [[Y1_1]], i1 [[C]]) #[[ATTR10]] +; CGSCC-NEXT: [[TMP1:%.*]] = call i8* @test1_2(i8* noalias nocapture nofree readnone undef, i8* noalias nofree readnone "no-capture-maybe-returned" [[Y1_1]], i1 [[C]]) #[[ATTR19:[0-9]+]] ; CGSCC-NEXT: store i32* null, i32** @g, align 8 ; CGSCC-NEXT: ret void ; @@ -369,23 +369,23 @@ define void @test1_1(i8* %x1_1, i8* %y1_1, i1 %c) { } define i8* @test1_2(i8* %x1_2, i8* %y1_2, i1 %c) { -; TUNIT: Function Attrs: nofree nosync nounwind writeonly +; TUNIT: Function Attrs: nofree nosync nounwind memory(write) ; TUNIT-LABEL: define {{[^@]+}}@test1_2 ; TUNIT-SAME: (i8* nocapture nofree readnone [[X1_2:%.*]], i8* nofree readnone returned "no-capture-maybe-returned" [[Y1_2:%.*]], i1 [[C:%.*]]) #[[ATTR7]] { ; TUNIT-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; TUNIT: t: -; TUNIT-NEXT: call void @test1_1(i8* noalias nocapture nofree readnone undef, i8* noalias nocapture nofree readnone [[Y1_2]], i1 noundef [[C]]) #[[ATTR7]] +; TUNIT-NEXT: call void @test1_1(i8* noalias nocapture nofree readnone undef, i8* noalias nocapture nofree readnone [[Y1_2]], i1 noundef [[C]]) #[[ATTR17]] ; TUNIT-NEXT: store i32* null, i32** @g, align 8 ; TUNIT-NEXT: br label [[F]] ; TUNIT: f: ; TUNIT-NEXT: ret i8* [[Y1_2]] ; -; CGSCC: Function Attrs: nofree nosync nounwind writeonly +; CGSCC: Function Attrs: nofree nosync nounwind memory(write) ; CGSCC-LABEL: define {{[^@]+}}@test1_2 ; CGSCC-SAME: (i8* nocapture nofree readnone [[X1_2:%.*]], i8* nofree readnone returned "no-capture-maybe-returned" [[Y1_2:%.*]], i1 [[C:%.*]]) #[[ATTR10]] { ; CGSCC-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; CGSCC: t: -; CGSCC-NEXT: call void @test1_1(i8* noalias nocapture nofree readnone undef, i8* noalias nocapture nofree readnone [[Y1_2]], i1 noundef [[C]]) #[[ATTR10]] +; CGSCC-NEXT: call void @test1_1(i8* noalias nocapture nofree readnone undef, i8* noalias nocapture nofree readnone [[Y1_2]], i1 noundef [[C]]) #[[ATTR19]] ; CGSCC-NEXT: store i32* null, i32** @g, align 8 ; CGSCC-NEXT: br label [[F]] ; CGSCC: f: @@ -401,17 +401,17 @@ f: } define void @test2(i8* %x2) { -; TUNIT: Function Attrs: nofree nosync nounwind writeonly +; TUNIT: Function Attrs: nofree nosync nounwind memory(write) ; TUNIT-LABEL: define {{[^@]+}}@test2 ; TUNIT-SAME: (i8* nocapture nofree readnone [[X2:%.*]]) #[[ATTR7]] { -; TUNIT-NEXT: call void @test2(i8* noalias nocapture nofree readnone undef) #[[ATTR7]] +; TUNIT-NEXT: call void @test2(i8* noalias nocapture nofree readnone undef) #[[ATTR17]] ; TUNIT-NEXT: store i32* null, i32** @g, align 8 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind writeonly +; CGSCC: Function Attrs: nofree nosync nounwind memory(write) ; CGSCC-LABEL: define {{[^@]+}}@test2 ; CGSCC-SAME: (i8* nocapture nofree readnone [[X2:%.*]]) #[[ATTR10]] { -; CGSCC-NEXT: call void @test2(i8* noalias nocapture nofree readnone undef) #[[ATTR10]] +; CGSCC-NEXT: call void @test2(i8* noalias nocapture nofree readnone undef) #[[ATTR19]] ; CGSCC-NEXT: store i32* null, i32** @g, align 8 ; CGSCC-NEXT: ret void ; @@ -421,17 +421,17 @@ define void @test2(i8* %x2) { } define void @test3(i8* %x3, i8* %y3, i8* %z3) { -; TUNIT: Function Attrs: nofree nosync nounwind writeonly +; TUNIT: Function Attrs: nofree nosync nounwind memory(write) ; TUNIT-LABEL: define {{[^@]+}}@test3 ; TUNIT-SAME: (i8* nocapture nofree readnone [[X3:%.*]], i8* nocapture nofree readnone [[Y3:%.*]], i8* nocapture nofree readnone [[Z3:%.*]]) #[[ATTR7]] { -; TUNIT-NEXT: call void @test3(i8* noalias nocapture nofree readnone undef, i8* noalias nocapture nofree readnone undef, i8* noalias nocapture nofree readnone undef) #[[ATTR7]] +; TUNIT-NEXT: call void @test3(i8* noalias nocapture nofree readnone undef, i8* noalias nocapture nofree readnone undef, i8* noalias nocapture nofree readnone undef) #[[ATTR17]] ; TUNIT-NEXT: store i32* null, i32** @g, align 8 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind writeonly +; CGSCC: Function Attrs: nofree nosync nounwind memory(write) ; CGSCC-LABEL: define {{[^@]+}}@test3 ; CGSCC-SAME: (i8* nocapture nofree readnone [[X3:%.*]], i8* nocapture nofree readnone [[Y3:%.*]], i8* nocapture nofree readnone [[Z3:%.*]]) #[[ATTR10]] { -; CGSCC-NEXT: call void @test3(i8* noalias nocapture nofree readnone undef, i8* noalias nocapture nofree readnone undef, i8* noalias nocapture nofree readnone undef) #[[ATTR10]] +; CGSCC-NEXT: call void @test3(i8* noalias nocapture nofree readnone undef, i8* noalias nocapture nofree readnone undef, i8* noalias nocapture nofree readnone undef) #[[ATTR19]] ; CGSCC-NEXT: store i32* null, i32** @g, align 8 ; CGSCC-NEXT: ret void ; @@ -441,17 +441,17 @@ define void @test3(i8* %x3, i8* %y3, i8* %z3) { } define void @test4_1(i8* %x4_1, i1 %c) { -; TUNIT: Function Attrs: nofree nosync nounwind writeonly +; TUNIT: Function Attrs: nofree nosync nounwind memory(write) ; TUNIT-LABEL: define {{[^@]+}}@test4_1 ; TUNIT-SAME: (i8* nocapture nofree readnone [[X4_1:%.*]], i1 [[C:%.*]]) #[[ATTR7]] { -; TUNIT-NEXT: [[TMP1:%.*]] = call i8* @test4_2(i8* noalias nocapture nofree readnone undef, i8* noalias nofree readnone "no-capture-maybe-returned" [[X4_1]], i8* noalias nocapture nofree readnone undef, i1 [[C]]) #[[ATTR7]] +; TUNIT-NEXT: [[TMP1:%.*]] = call i8* @test4_2(i8* noalias nocapture nofree readnone undef, i8* noalias nofree readnone "no-capture-maybe-returned" [[X4_1]], i8* noalias nocapture nofree readnone undef, i1 [[C]]) #[[ATTR17]] ; TUNIT-NEXT: store i32* null, i32** @g, align 8 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind writeonly +; CGSCC: Function Attrs: nofree nosync nounwind memory(write) ; CGSCC-LABEL: define {{[^@]+}}@test4_1 ; CGSCC-SAME: (i8* nocapture nofree readnone [[X4_1:%.*]], i1 [[C:%.*]]) #[[ATTR10]] { -; CGSCC-NEXT: [[TMP1:%.*]] = call i8* @test4_2(i8* noalias nocapture nofree readnone undef, i8* noalias nofree readnone "no-capture-maybe-returned" [[X4_1]], i8* noalias nocapture nofree readnone undef, i1 [[C]]) #[[ATTR10]] +; CGSCC-NEXT: [[TMP1:%.*]] = call i8* @test4_2(i8* noalias nocapture nofree readnone undef, i8* noalias nofree readnone "no-capture-maybe-returned" [[X4_1]], i8* noalias nocapture nofree readnone undef, i1 [[C]]) #[[ATTR19]] ; CGSCC-NEXT: store i32* null, i32** @g, align 8 ; CGSCC-NEXT: ret void ; @@ -461,23 +461,23 @@ define void @test4_1(i8* %x4_1, i1 %c) { } define i8* @test4_2(i8* %x4_2, i8* %y4_2, i8* %z4_2, i1 %c) { -; TUNIT: Function Attrs: nofree nosync nounwind writeonly +; TUNIT: Function Attrs: nofree nosync nounwind memory(write) ; TUNIT-LABEL: define {{[^@]+}}@test4_2 ; TUNIT-SAME: (i8* nocapture nofree readnone [[X4_2:%.*]], i8* nofree readnone returned "no-capture-maybe-returned" [[Y4_2:%.*]], i8* nocapture nofree readnone [[Z4_2:%.*]], i1 [[C:%.*]]) #[[ATTR7]] { ; TUNIT-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; TUNIT: t: -; TUNIT-NEXT: call void @test4_1(i8* noalias nocapture nofree noundef readnone align 4294967296 null, i1 noundef [[C]]) #[[ATTR7]] +; TUNIT-NEXT: call void @test4_1(i8* noalias nocapture nofree noundef readnone align 4294967296 null, i1 noundef [[C]]) #[[ATTR17]] ; TUNIT-NEXT: store i32* null, i32** @g, align 8 ; TUNIT-NEXT: br label [[F]] ; TUNIT: f: ; TUNIT-NEXT: ret i8* [[Y4_2]] ; -; CGSCC: Function Attrs: nofree nosync nounwind writeonly +; CGSCC: Function Attrs: nofree nosync nounwind memory(write) ; CGSCC-LABEL: define {{[^@]+}}@test4_2 ; CGSCC-SAME: (i8* nocapture nofree readnone [[X4_2:%.*]], i8* nofree readnone returned "no-capture-maybe-returned" [[Y4_2:%.*]], i8* nocapture nofree readnone [[Z4_2:%.*]], i1 [[C:%.*]]) #[[ATTR10]] { ; CGSCC-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; CGSCC: t: -; CGSCC-NEXT: call void @test4_1(i8* noalias nocapture nofree noundef readnone align 4294967296 null, i1 noundef [[C]]) #[[ATTR10]] +; CGSCC-NEXT: call void @test4_1(i8* noalias nocapture nofree noundef readnone align 4294967296 null, i1 noundef [[C]]) #[[ATTR19]] ; CGSCC-NEXT: store i32* null, i32** @g, align 8 ; CGSCC-NEXT: br label [[F]] ; CGSCC: f: @@ -521,13 +521,13 @@ define void @test6_2(i8* %x6_2, i8* %y6_2, i8* %z6_2) { } define void @test_cmpxchg(i32* %p) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@test_cmpxchg ; TUNIT-SAME: (i32* nocapture nofree noundef nonnull dereferenceable(4) [[P:%.*]]) #[[ATTR8:[0-9]+]] { ; TUNIT-NEXT: [[TMP1:%.*]] = cmpxchg i32* [[P]], i32 0, i32 1 acquire monotonic, align 4 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@test_cmpxchg ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull dereferenceable(4) [[P:%.*]]) #[[ATTR11:[0-9]+]] { ; CGSCC-NEXT: [[TMP1:%.*]] = cmpxchg i32* [[P]], i32 0, i32 1 acquire monotonic, align 4 @@ -538,13 +538,13 @@ define void @test_cmpxchg(i32* %p) { } define void @test_cmpxchg_ptr(i32** %p, i32* %q) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@test_cmpxchg_ptr ; TUNIT-SAME: (i32** nocapture nofree noundef nonnull dereferenceable(8) [[P:%.*]], i32* nofree [[Q:%.*]]) #[[ATTR8]] { ; TUNIT-NEXT: [[TMP1:%.*]] = cmpxchg i32** [[P]], i32* null, i32* [[Q]] acquire monotonic, align 8 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@test_cmpxchg_ptr ; CGSCC-SAME: (i32** nocapture nofree noundef nonnull dereferenceable(8) [[P:%.*]], i32* nofree [[Q:%.*]]) #[[ATTR11]] { ; CGSCC-NEXT: [[TMP1:%.*]] = cmpxchg i32** [[P]], i32* null, i32* [[Q]] acquire monotonic, align 8 @@ -555,13 +555,13 @@ define void @test_cmpxchg_ptr(i32** %p, i32* %q) { } define void @test_atomicrmw(i32* %p) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@test_atomicrmw ; TUNIT-SAME: (i32* nocapture nofree noundef nonnull dereferenceable(4) [[P:%.*]]) #[[ATTR8]] { ; TUNIT-NEXT: [[TMP1:%.*]] = atomicrmw add i32* [[P]], i32 1 seq_cst, align 4 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@test_atomicrmw ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull dereferenceable(4) [[P:%.*]]) #[[ATTR11]] { ; CGSCC-NEXT: [[TMP1:%.*]] = atomicrmw add i32* [[P]], i32 1 seq_cst, align 4 @@ -572,7 +572,7 @@ define void @test_atomicrmw(i32* %p) { } define void @test_volatile(i32* %x) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@test_volatile ; TUNIT-SAME: (i32* nofree align 4 [[X:%.*]]) #[[ATTR8]] { ; TUNIT-NEXT: entry: @@ -580,7 +580,7 @@ define void @test_volatile(i32* %x) { ; TUNIT-NEXT: store volatile i32 0, i32* [[GEP]], align 4 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@test_volatile ; CGSCC-SAME: (i32* nofree align 4 [[X:%.*]]) #[[ATTR11]] { ; CGSCC-NEXT: entry: @@ -607,7 +607,7 @@ define void @nocaptureLaunder(i8* %p) { ; CGSCC-LABEL: define {{[^@]+}}@nocaptureLaunder ; CGSCC-SAME: (i8* nocapture nofree [[P:%.*]]) #[[ATTR7]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[B:%.*]] = call i8* @llvm.launder.invariant.group.p0i8(i8* nofree [[P]]) #[[ATTR20:[0-9]+]] +; CGSCC-NEXT: [[B:%.*]] = call i8* @llvm.launder.invariant.group.p0i8(i8* nofree [[P]]) #[[ATTR17]] ; CGSCC-NEXT: store i8 42, i8* [[B]], align 1 ; CGSCC-NEXT: ret void ; @@ -629,7 +629,7 @@ define void @captureLaunder(i8* %p) { ; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn ; CGSCC-LABEL: define {{[^@]+}}@captureLaunder ; CGSCC-SAME: (i8* nofree [[P:%.*]]) #[[ATTR7]] { -; CGSCC-NEXT: [[B:%.*]] = call i8* @llvm.launder.invariant.group.p0i8(i8* nofree [[P]]) #[[ATTR20]] +; CGSCC-NEXT: [[B:%.*]] = call i8* @llvm.launder.invariant.group.p0i8(i8* nofree [[P]]) #[[ATTR17]] ; CGSCC-NEXT: store i8* [[B]], i8** @g2, align 8 ; CGSCC-NEXT: ret void ; @@ -639,19 +639,19 @@ define void @captureLaunder(i8* %p) { } define void @nocaptureStrip(i8* %p) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@nocaptureStrip ; TUNIT-SAME: (i8* nocapture nofree writeonly [[P:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[B:%.*]] = call i8* @llvm.strip.invariant.group.p0i8(i8* noalias nofree readnone [[P]]) #[[ATTR19:[0-9]+]] +; TUNIT-NEXT: [[B:%.*]] = call i8* @llvm.strip.invariant.group.p0i8(i8* noalias nofree readnone [[P]]) #[[ATTR18]] ; TUNIT-NEXT: store i8 42, i8* [[B]], align 1 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@nocaptureStrip ; CGSCC-SAME: (i8* nocapture nofree writeonly [[P:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[B:%.*]] = call i8* @llvm.strip.invariant.group.p0i8(i8* noalias nofree readnone [[P]]) #[[ATTR18]] +; CGSCC-NEXT: [[B:%.*]] = call i8* @llvm.strip.invariant.group.p0i8(i8* noalias nofree readnone [[P]]) #[[ATTR17]] ; CGSCC-NEXT: store i8 42, i8* [[B]], align 1 ; CGSCC-NEXT: ret void ; @@ -663,17 +663,17 @@ entry: @g3 = global i8* null define void @captureStrip(i8* %p) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@captureStrip ; TUNIT-SAME: (i8* nofree writeonly [[P:%.*]]) #[[ATTR1]] { -; TUNIT-NEXT: [[B:%.*]] = call i8* @llvm.strip.invariant.group.p0i8(i8* noalias nofree readnone [[P]]) #[[ATTR19]] +; TUNIT-NEXT: [[B:%.*]] = call i8* @llvm.strip.invariant.group.p0i8(i8* noalias nofree readnone [[P]]) #[[ATTR18]] ; TUNIT-NEXT: store i8* [[B]], i8** @g3, align 8 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@captureStrip ; CGSCC-SAME: (i8* nofree writeonly [[P:%.*]]) #[[ATTR1]] { -; CGSCC-NEXT: [[B:%.*]] = call i8* @llvm.strip.invariant.group.p0i8(i8* noalias nofree readnone [[P]]) #[[ATTR18]] +; CGSCC-NEXT: [[B:%.*]] = call i8* @llvm.strip.invariant.group.p0i8(i8* noalias nofree readnone [[P]]) #[[ATTR17]] ; CGSCC-NEXT: store i8* [[B]], i8** @g3, align 8 ; CGSCC-NEXT: ret void ; @@ -683,7 +683,7 @@ define void @captureStrip(i8* %p) { } define i1 @captureICmp(i32* %x) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@captureICmp ; CHECK-SAME: (i32* nofree readnone [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32* [[X]], null @@ -694,7 +694,7 @@ define i1 @captureICmp(i32* %x) { } define i1 @captureICmpRev(i32* %x) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@captureICmpRev ; CHECK-SAME: (i32* nofree readnone [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32* null, [[X]] @@ -705,7 +705,7 @@ define i1 @captureICmpRev(i32* %x) { } define i1 @nocaptureInboundsGEPICmp(i32* %x) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@nocaptureInboundsGEPICmp ; CHECK-SAME: (i32* nocapture nofree readnone [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret i1 false @@ -717,7 +717,7 @@ define i1 @nocaptureInboundsGEPICmp(i32* %x) { } define i1 @nocaptureInboundsGEPICmpRev(i32* %x) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@nocaptureInboundsGEPICmpRev ; CHECK-SAME: (i32* nocapture nofree readnone [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret i1 true @@ -729,7 +729,7 @@ define i1 @nocaptureInboundsGEPICmpRev(i32* %x) { } define i1 @nocaptureDereferenceableOrNullICmp(i32* dereferenceable_or_null(4) %x) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@nocaptureDereferenceableOrNullICmp ; CHECK-SAME: (i32* nocapture nofree readnone dereferenceable_or_null(4) [[X:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[X]] to i8* @@ -742,14 +742,14 @@ define i1 @nocaptureDereferenceableOrNullICmp(i32* dereferenceable_or_null(4) %x } define i1 @captureDereferenceableOrNullICmp(i32* dereferenceable_or_null(4) %x) null_pointer_is_valid { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind null_pointer_is_valid readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@captureDereferenceableOrNullICmp ; TUNIT-SAME: (i32* nofree readnone dereferenceable_or_null(4) [[X:%.*]]) #[[ATTR9:[0-9]+]] { ; TUNIT-NEXT: [[TMP1:%.*]] = bitcast i32* [[X]] to i8* ; TUNIT-NEXT: [[TMP2:%.*]] = icmp eq i8* [[TMP1]], null ; TUNIT-NEXT: ret i1 [[TMP2]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind null_pointer_is_valid readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@captureDereferenceableOrNullICmp ; CGSCC-SAME: (i32* nofree readnone dereferenceable_or_null(4) [[X:%.*]]) #[[ATTR12:[0-9]+]] { ; CGSCC-NEXT: [[TMP1:%.*]] = bitcast i32* [[X]] to i8* @@ -776,16 +776,16 @@ entry: declare i8* @unknownpi8pi8(i8*,i8* returned) define i8* @test_returned1(i8* %A, i8* returned %B) nounwind readonly { -; TUNIT: Function Attrs: nounwind readonly +; TUNIT: Function Attrs: nounwind memory(read) ; TUNIT-LABEL: define {{[^@]+}}@test_returned1 -; TUNIT-SAME: (i8* nocapture readonly [[A:%.*]], i8* readonly returned [[B:%.*]]) #[[ATTR4]] { +; TUNIT-SAME: (i8* nocapture [[A:%.*]], i8* returned [[B:%.*]]) #[[ATTR4]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[P:%.*]] = call i8* @unknownpi8pi8(i8* [[A]], i8* [[B]]) ; TUNIT-NEXT: ret i8* [[P]] ; -; CGSCC: Function Attrs: nounwind readonly +; CGSCC: Function Attrs: nounwind memory(read) ; CGSCC-LABEL: define {{[^@]+}}@test_returned1 -; CGSCC-SAME: (i8* nocapture readonly [[A:%.*]], i8* readonly returned [[B:%.*]]) #[[ATTR5]] { +; CGSCC-SAME: (i8* nocapture [[A:%.*]], i8* returned [[B:%.*]]) #[[ATTR5]] { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: [[P:%.*]] = call i8* @unknownpi8pi8(i8* [[A]], i8* [[B]]) ; CGSCC-NEXT: ret i8* [[P]] @@ -796,14 +796,14 @@ entry: } define i8* @test_returned2(i8* %A, i8* %B) { -; TUNIT: Function Attrs: nounwind readonly +; TUNIT: Function Attrs: nounwind memory(read) ; TUNIT-LABEL: define {{[^@]+}}@test_returned2 ; TUNIT-SAME: (i8* readonly [[A:%.*]], i8* readonly [[B:%.*]]) #[[ATTR4]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[P:%.*]] = call i8* @unknownpi8pi8(i8* readonly [[A]], i8* readonly [[B]]) #[[ATTR4]] ; TUNIT-NEXT: ret i8* [[P]] ; -; CGSCC: Function Attrs: nounwind readonly +; CGSCC: Function Attrs: nounwind memory(read) ; CGSCC-LABEL: define {{[^@]+}}@test_returned2 ; CGSCC-SAME: (i8* readonly [[A:%.*]], i8* readonly [[B:%.*]]) #[[ATTR5]] { ; CGSCC-NEXT: entry: @@ -843,46 +843,44 @@ define void @ptr_uses(i8* %ptr, i8* %wptr) { declare i8* @llvm.launder.invariant.group.p0i8(i8*) declare i8* @llvm.strip.invariant.group.p0i8(i8*) ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR3]] = { readonly } -; TUNIT: attributes #[[ATTR4]] = { nounwind readonly } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(write) } +; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn memory(read) } +; TUNIT: attributes #[[ATTR3:[0-9]+]] = { memory(read) } +; TUNIT: attributes #[[ATTR4]] = { nounwind memory(read) } ; TUNIT: attributes #[[ATTR5]] = { nofree norecurse nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR6]] = { argmemonly nounwind } -; TUNIT: attributes #[[ATTR7]] = { nofree nosync nounwind writeonly } -; TUNIT: attributes #[[ATTR8]] = { argmemonly nofree norecurse nounwind willreturn } -; TUNIT: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind null_pointer_is_valid readnone willreturn } -; TUNIT: attributes #[[ATTR10:[0-9]+]] = { nounwind readonly willreturn } +; TUNIT: attributes #[[ATTR6]] = { nounwind memory(argmem: readwrite) } +; TUNIT: attributes #[[ATTR7]] = { nofree nosync nounwind memory(write) } +; TUNIT: attributes #[[ATTR8]] = { nofree norecurse nounwind willreturn memory(argmem: readwrite) } +; TUNIT: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(none) } +; TUNIT: attributes #[[ATTR10:[0-9]+]] = { nounwind willreturn memory(read) } ; TUNIT: attributes #[[ATTR11]] = { nounwind willreturn } -; TUNIT: attributes #[[ATTR12:[0-9]+]] = { inaccessiblememonly nocallback nofree nosync nounwind speculatable willreturn } -; TUNIT: attributes #[[ATTR13:[0-9]+]] = { nocallback nofree nosync nounwind readnone speculatable willreturn } -; TUNIT: attributes #[[ATTR14]] = { nofree nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR15]] = { nofree nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR16]] = { nofree nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR17]] = { nounwind } +; TUNIT: attributes #[[ATTR12:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(inaccessiblemem: readwrite) } +; TUNIT: attributes #[[ATTR13:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +; TUNIT: attributes #[[ATTR14]] = { nofree nosync nounwind willreturn } +; TUNIT: attributes #[[ATTR15]] = { nofree nounwind willreturn } +; TUNIT: attributes #[[ATTR16]] = { nounwind } +; TUNIT: attributes #[[ATTR17]] = { nofree nosync nounwind } ; TUNIT: attributes #[[ATTR18]] = { willreturn } -; TUNIT: attributes #[[ATTR19]] = { readnone willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR4]] = { readonly } -; CGSCC: attributes #[[ATTR5]] = { nounwind readonly } -; CGSCC: attributes #[[ATTR6]] = { nofree nosync nounwind readonly willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(write) } +; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn memory(write) } +; CGSCC: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind willreturn memory(read) } +; CGSCC: attributes #[[ATTR4:[0-9]+]] = { memory(read) } +; CGSCC: attributes #[[ATTR5]] = { nounwind memory(read) } +; CGSCC: attributes #[[ATTR6]] = { nofree nosync nounwind willreturn memory(read) } ; CGSCC: attributes #[[ATTR7]] = { nofree norecurse nosync nounwind willreturn } ; CGSCC: attributes #[[ATTR8]] = { nofree nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR9]] = { argmemonly nounwind } -; CGSCC: attributes #[[ATTR10]] = { nofree nosync nounwind writeonly } -; CGSCC: attributes #[[ATTR11]] = { argmemonly nofree norecurse nounwind willreturn } -; CGSCC: attributes #[[ATTR12]] = { nofree norecurse nosync nounwind null_pointer_is_valid readnone willreturn } -; CGSCC: attributes #[[ATTR13:[0-9]+]] = { nounwind readonly willreturn } +; CGSCC: attributes #[[ATTR9]] = { nounwind memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR10]] = { nofree nosync nounwind memory(write) } +; CGSCC: attributes #[[ATTR11]] = { nofree norecurse nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR12]] = { nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(none) } +; CGSCC: attributes #[[ATTR13:[0-9]+]] = { nounwind willreturn memory(read) } ; CGSCC: attributes #[[ATTR14]] = { nounwind willreturn } -; CGSCC: attributes #[[ATTR15:[0-9]+]] = { inaccessiblememonly nocallback nofree nosync nounwind speculatable willreturn } -; CGSCC: attributes #[[ATTR16:[0-9]+]] = { nocallback nofree nosync nounwind readnone speculatable willreturn } -; CGSCC: attributes #[[ATTR17]] = { nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR18]] = { readnone willreturn } -; CGSCC: attributes #[[ATTR19]] = { nounwind } -; CGSCC: attributes #[[ATTR20]] = { willreturn } +; CGSCC: attributes #[[ATTR15:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(inaccessiblemem: readwrite) } +; CGSCC: attributes #[[ATTR16:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +; CGSCC: attributes #[[ATTR17]] = { willreturn } +; CGSCC: attributes #[[ATTR18]] = { nounwind } +; CGSCC: attributes #[[ATTR19]] = { nofree nosync nounwind } ;. diff --git a/llvm/test/Transforms/Attributor/nocapture-2.ll b/llvm/test/Transforms/Attributor/nocapture-2.ll index f54b15ba6c5d4..ea6fc175b97b2 100644 --- a/llvm/test/Transforms/Attributor/nocapture-2.ll +++ b/llvm/test/Transforms/Attributor/nocapture-2.ll @@ -16,7 +16,7 @@ declare i32* @unknown() ; ; no-capture is missing on %p because it is not dereferenceable define i32 @is_null_return(i32* %p) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@is_null_return ; CHECK-SAME: (i32* nofree readnone [[P:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -42,7 +42,7 @@ entry: ; ; no-capture is missing on %p because it is not dereferenceable define i32 @is_null_control(i32* %p) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@is_null_control ; CHECK-SAME: (i32* nofree [[P:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -99,7 +99,7 @@ return: ; preds = %if.end3, %if.then2, ; } ; define double* @srec0(double* %a) #0 { -; CHECK: Function Attrs: nofree noinline nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@srec0 ; CHECK-SAME: (double* nocapture nofree readnone [[A:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: @@ -124,7 +124,7 @@ entry: ; Other arguments are possible here due to the no-return behavior. ; define i32* @srec16(i32* %a) #0 { -; CHECK: Function Attrs: nofree noinline nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@srec16 ; CHECK-SAME: (i32* nocapture nofree readnone [[A:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: entry: @@ -164,27 +164,49 @@ entry: ; return scc_A((int*)(scc_A(a) ? scc_B((double*)a) : scc_C(a))); ; } define float* @scc_A(i32* dereferenceable_or_null(4) %a) { -; CHECK: Function Attrs: nofree nosync nounwind readnone -; CHECK-LABEL: define {{[^@]+}}@scc_A -; CHECK-SAME: (i32* nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR2:[0-9]+]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32* [[A]], null -; CHECK-NEXT: br i1 [[TOBOOL]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -; CHECK: cond.true: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i16* -; CHECK-NEXT: [[CALL:%.*]] = call dereferenceable_or_null(4) i8* @scc_C(i16* noalias nofree nonnull readnone dereferenceable(4) "no-capture-maybe-returned" [[TMP0]]) #[[ATTR2]] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[CALL]] to double* -; CHECK-NEXT: [[CALL1:%.*]] = call dereferenceable_or_null(4) i64* @scc_B(double* noalias nofree readnone dereferenceable_or_null(8) "no-capture-maybe-returned" [[TMP1]]) #[[ATTR2]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[CALL1]] to i32* -; CHECK-NEXT: [[CALL2:%.*]] = call dereferenceable_or_null(4) float* @scc_A(i32* noalias nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[TMP2]]) #[[ATTR2]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[CALL2]] to i32* -; CHECK-NEXT: br label [[COND_END:%.*]] -; CHECK: cond.false: -; CHECK-NEXT: br label [[COND_END]] -; CHECK: cond.end: -; CHECK-NEXT: [[COND:%.*]] = phi i32* [ [[TMP3]], [[COND_TRUE]] ], [ [[A]], [[COND_FALSE]] ] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[COND]] to float* -; CHECK-NEXT: ret float* [[TMP4]] +; TUNIT: Function Attrs: nofree nosync nounwind memory(none) +; TUNIT-LABEL: define {{[^@]+}}@scc_A +; TUNIT-SAME: (i32* nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR2:[0-9]+]] { +; TUNIT-NEXT: entry: +; TUNIT-NEXT: [[TOBOOL:%.*]] = icmp ne i32* [[A]], null +; TUNIT-NEXT: br i1 [[TOBOOL]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; TUNIT: cond.true: +; TUNIT-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i16* +; TUNIT-NEXT: [[CALL:%.*]] = call dereferenceable_or_null(4) i8* @scc_C(i16* noalias nofree nonnull readnone dereferenceable(4) "no-capture-maybe-returned" [[TMP0]]) #[[ATTR9:[0-9]+]] +; TUNIT-NEXT: [[TMP1:%.*]] = bitcast i8* [[CALL]] to double* +; TUNIT-NEXT: [[CALL1:%.*]] = call dereferenceable_or_null(4) i64* @scc_B(double* noalias nofree readnone dereferenceable_or_null(8) "no-capture-maybe-returned" [[TMP1]]) #[[ATTR9]] +; TUNIT-NEXT: [[TMP2:%.*]] = bitcast i64* [[CALL1]] to i32* +; TUNIT-NEXT: [[CALL2:%.*]] = call dereferenceable_or_null(4) float* @scc_A(i32* noalias nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[TMP2]]) #[[ATTR9]] +; TUNIT-NEXT: [[TMP3:%.*]] = bitcast float* [[CALL2]] to i32* +; TUNIT-NEXT: br label [[COND_END:%.*]] +; TUNIT: cond.false: +; TUNIT-NEXT: br label [[COND_END]] +; TUNIT: cond.end: +; TUNIT-NEXT: [[COND:%.*]] = phi i32* [ [[TMP3]], [[COND_TRUE]] ], [ [[A]], [[COND_FALSE]] ] +; TUNIT-NEXT: [[TMP4:%.*]] = bitcast i32* [[COND]] to float* +; TUNIT-NEXT: ret float* [[TMP4]] +; +; CGSCC: Function Attrs: nofree nosync nounwind memory(none) +; CGSCC-LABEL: define {{[^@]+}}@scc_A +; CGSCC-SAME: (i32* nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR2:[0-9]+]] { +; CGSCC-NEXT: entry: +; CGSCC-NEXT: [[TOBOOL:%.*]] = icmp ne i32* [[A]], null +; CGSCC-NEXT: br i1 [[TOBOOL]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; CGSCC: cond.true: +; CGSCC-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i16* +; CGSCC-NEXT: [[CALL:%.*]] = call dereferenceable_or_null(4) i8* @scc_C(i16* noalias nofree nonnull readnone dereferenceable(4) "no-capture-maybe-returned" [[TMP0]]) #[[ATTR10:[0-9]+]] +; CGSCC-NEXT: [[TMP1:%.*]] = bitcast i8* [[CALL]] to double* +; CGSCC-NEXT: [[CALL1:%.*]] = call dereferenceable_or_null(4) i64* @scc_B(double* noalias nofree readnone dereferenceable_or_null(8) "no-capture-maybe-returned" [[TMP1]]) #[[ATTR10]] +; CGSCC-NEXT: [[TMP2:%.*]] = bitcast i64* [[CALL1]] to i32* +; CGSCC-NEXT: [[CALL2:%.*]] = call dereferenceable_or_null(4) float* @scc_A(i32* noalias nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[TMP2]]) #[[ATTR10]] +; CGSCC-NEXT: [[TMP3:%.*]] = bitcast float* [[CALL2]] to i32* +; CGSCC-NEXT: br label [[COND_END:%.*]] +; CGSCC: cond.false: +; CGSCC-NEXT: br label [[COND_END]] +; CGSCC: cond.end: +; CGSCC-NEXT: [[COND:%.*]] = phi i32* [ [[TMP3]], [[COND_TRUE]] ], [ [[A]], [[COND_FALSE]] ] +; CGSCC-NEXT: [[TMP4:%.*]] = bitcast i32* [[COND]] to float* +; CGSCC-NEXT: ret float* [[TMP4]] ; entry: %tobool = icmp ne i32* %a, null @@ -211,27 +233,49 @@ cond.end: ; preds = %cond.false, %cond.t ; FIXME: the call1 below to scc_B should return dereferenceable_or_null(8) (as the callee does). Something prevented that deduction and needs to be investigated. define i64* @scc_B(double* dereferenceable_or_null(8) %a) { -; CHECK: Function Attrs: nofree nosync nounwind readnone -; CHECK-LABEL: define {{[^@]+}}@scc_B -; CHECK-SAME: (double* nofree readnone dereferenceable_or_null(8) "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne double* [[A]], null -; CHECK-NEXT: br i1 [[TOBOOL]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -; CHECK: cond.true: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[A]] to i32* -; CHECK-NEXT: [[CALL:%.*]] = call dereferenceable_or_null(4) float* @scc_A(i32* noalias nofree nonnull readnone dereferenceable(8) "no-capture-maybe-returned" [[TMP0]]) #[[ATTR2]] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[CALL]] to double* -; CHECK-NEXT: [[CALL1:%.*]] = call dereferenceable_or_null(4) i64* @scc_B(double* noalias nofree readnone dereferenceable_or_null(8) "no-capture-maybe-returned" [[TMP1]]) #[[ATTR2]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[CALL1]] to i16* -; CHECK-NEXT: [[CALL2:%.*]] = call dereferenceable_or_null(4) i8* @scc_C(i16* noalias nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[TMP2]]) #[[ATTR2]] -; CHECK-NEXT: br label [[COND_END:%.*]] -; CHECK: cond.false: -; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[A]] to i8* -; CHECK-NEXT: br label [[COND_END]] -; CHECK: cond.end: -; CHECK-NEXT: [[COND:%.*]] = phi i8* [ [[CALL2]], [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[COND]] to i64* -; CHECK-NEXT: ret i64* [[TMP4]] +; TUNIT: Function Attrs: nofree nosync nounwind memory(none) +; TUNIT-LABEL: define {{[^@]+}}@scc_B +; TUNIT-SAME: (double* nofree readnone dereferenceable_or_null(8) "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR2]] { +; TUNIT-NEXT: entry: +; TUNIT-NEXT: [[TOBOOL:%.*]] = icmp ne double* [[A]], null +; TUNIT-NEXT: br i1 [[TOBOOL]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; TUNIT: cond.true: +; TUNIT-NEXT: [[TMP0:%.*]] = bitcast double* [[A]] to i32* +; TUNIT-NEXT: [[CALL:%.*]] = call dereferenceable_or_null(4) float* @scc_A(i32* noalias nofree nonnull readnone dereferenceable(8) "no-capture-maybe-returned" [[TMP0]]) #[[ATTR9]] +; TUNIT-NEXT: [[TMP1:%.*]] = bitcast float* [[CALL]] to double* +; TUNIT-NEXT: [[CALL1:%.*]] = call dereferenceable_or_null(4) i64* @scc_B(double* noalias nofree readnone dereferenceable_or_null(8) "no-capture-maybe-returned" [[TMP1]]) #[[ATTR9]] +; TUNIT-NEXT: [[TMP2:%.*]] = bitcast i64* [[CALL1]] to i16* +; TUNIT-NEXT: [[CALL2:%.*]] = call dereferenceable_or_null(4) i8* @scc_C(i16* noalias nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[TMP2]]) #[[ATTR9]] +; TUNIT-NEXT: br label [[COND_END:%.*]] +; TUNIT: cond.false: +; TUNIT-NEXT: [[TMP3:%.*]] = bitcast double* [[A]] to i8* +; TUNIT-NEXT: br label [[COND_END]] +; TUNIT: cond.end: +; TUNIT-NEXT: [[COND:%.*]] = phi i8* [ [[CALL2]], [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +; TUNIT-NEXT: [[TMP4:%.*]] = bitcast i8* [[COND]] to i64* +; TUNIT-NEXT: ret i64* [[TMP4]] +; +; CGSCC: Function Attrs: nofree nosync nounwind memory(none) +; CGSCC-LABEL: define {{[^@]+}}@scc_B +; CGSCC-SAME: (double* nofree readnone dereferenceable_or_null(8) "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR2]] { +; CGSCC-NEXT: entry: +; CGSCC-NEXT: [[TOBOOL:%.*]] = icmp ne double* [[A]], null +; CGSCC-NEXT: br i1 [[TOBOOL]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; CGSCC: cond.true: +; CGSCC-NEXT: [[TMP0:%.*]] = bitcast double* [[A]] to i32* +; CGSCC-NEXT: [[CALL:%.*]] = call dereferenceable_or_null(4) float* @scc_A(i32* noalias nofree nonnull readnone dereferenceable(8) "no-capture-maybe-returned" [[TMP0]]) #[[ATTR10]] +; CGSCC-NEXT: [[TMP1:%.*]] = bitcast float* [[CALL]] to double* +; CGSCC-NEXT: [[CALL1:%.*]] = call dereferenceable_or_null(4) i64* @scc_B(double* noalias nofree readnone dereferenceable_or_null(8) "no-capture-maybe-returned" [[TMP1]]) #[[ATTR10]] +; CGSCC-NEXT: [[TMP2:%.*]] = bitcast i64* [[CALL1]] to i16* +; CGSCC-NEXT: [[CALL2:%.*]] = call dereferenceable_or_null(4) i8* @scc_C(i16* noalias nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[TMP2]]) #[[ATTR10]] +; CGSCC-NEXT: br label [[COND_END:%.*]] +; CGSCC: cond.false: +; CGSCC-NEXT: [[TMP3:%.*]] = bitcast double* [[A]] to i8* +; CGSCC-NEXT: br label [[COND_END]] +; CGSCC: cond.end: +; CGSCC-NEXT: [[COND:%.*]] = phi i8* [ [[CALL2]], [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +; CGSCC-NEXT: [[TMP4:%.*]] = bitcast i8* [[COND]] to i64* +; CGSCC-NEXT: ret i64* [[TMP4]] ; entry: %tobool = icmp ne double* %a, null @@ -257,29 +301,53 @@ cond.end: ; preds = %cond.false, %cond.t } define i8* @scc_C(i16* dereferenceable_or_null(2) %a) { -; CHECK: Function Attrs: nofree nosync nounwind readnone -; CHECK-LABEL: define {{[^@]+}}@scc_C -; CHECK-SAME: (i16* nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR2]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[BC:%.*]] = bitcast i16* [[A]] to i32* -; CHECK-NEXT: [[CALL:%.*]] = call dereferenceable_or_null(4) float* @scc_A(i32* noalias nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[BC]]) #[[ATTR2]] -; CHECK-NEXT: [[BC2:%.*]] = bitcast float* [[CALL]] to i8* -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8* [[BC2]], null -; CHECK-NEXT: br i1 [[TOBOOL]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -; CHECK: cond.true: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[A]] to double* -; CHECK-NEXT: [[CALL1:%.*]] = call dereferenceable_or_null(4) i64* @scc_B(double* noalias nofree readnone dereferenceable_or_null(8) "no-capture-maybe-returned" [[TMP0]]) #[[ATTR2]] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[CALL1]] to i8* -; CHECK-NEXT: br label [[COND_END:%.*]] -; CHECK: cond.false: -; CHECK-NEXT: [[CALL2:%.*]] = call dereferenceable_or_null(4) i8* @scc_C(i16* noalias nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[A]]) #[[ATTR2]] -; CHECK-NEXT: br label [[COND_END]] -; CHECK: cond.end: -; CHECK-NEXT: [[COND:%.*]] = phi i8* [ [[TMP1]], [[COND_TRUE]] ], [ [[CALL2]], [[COND_FALSE]] ] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[COND]] to i32* -; CHECK-NEXT: [[CALL3:%.*]] = call dereferenceable_or_null(4) float* @scc_A(i32* noalias nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[TMP2]]) #[[ATTR2]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[CALL3]] to i8* -; CHECK-NEXT: ret i8* [[TMP3]] +; TUNIT: Function Attrs: nofree nosync nounwind memory(none) +; TUNIT-LABEL: define {{[^@]+}}@scc_C +; TUNIT-SAME: (i16* nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR2]] { +; TUNIT-NEXT: entry: +; TUNIT-NEXT: [[BC:%.*]] = bitcast i16* [[A]] to i32* +; TUNIT-NEXT: [[CALL:%.*]] = call dereferenceable_or_null(4) float* @scc_A(i32* noalias nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[BC]]) #[[ATTR9]] +; TUNIT-NEXT: [[BC2:%.*]] = bitcast float* [[CALL]] to i8* +; TUNIT-NEXT: [[TOBOOL:%.*]] = icmp ne i8* [[BC2]], null +; TUNIT-NEXT: br i1 [[TOBOOL]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; TUNIT: cond.true: +; TUNIT-NEXT: [[TMP0:%.*]] = bitcast i16* [[A]] to double* +; TUNIT-NEXT: [[CALL1:%.*]] = call dereferenceable_or_null(4) i64* @scc_B(double* noalias nofree readnone dereferenceable_or_null(8) "no-capture-maybe-returned" [[TMP0]]) #[[ATTR9]] +; TUNIT-NEXT: [[TMP1:%.*]] = bitcast i64* [[CALL1]] to i8* +; TUNIT-NEXT: br label [[COND_END:%.*]] +; TUNIT: cond.false: +; TUNIT-NEXT: [[CALL2:%.*]] = call dereferenceable_or_null(4) i8* @scc_C(i16* noalias nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[A]]) #[[ATTR9]] +; TUNIT-NEXT: br label [[COND_END]] +; TUNIT: cond.end: +; TUNIT-NEXT: [[COND:%.*]] = phi i8* [ [[TMP1]], [[COND_TRUE]] ], [ [[CALL2]], [[COND_FALSE]] ] +; TUNIT-NEXT: [[TMP2:%.*]] = bitcast i8* [[COND]] to i32* +; TUNIT-NEXT: [[CALL3:%.*]] = call dereferenceable_or_null(4) float* @scc_A(i32* noalias nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[TMP2]]) #[[ATTR9]] +; TUNIT-NEXT: [[TMP3:%.*]] = bitcast float* [[CALL3]] to i8* +; TUNIT-NEXT: ret i8* [[TMP3]] +; +; CGSCC: Function Attrs: nofree nosync nounwind memory(none) +; CGSCC-LABEL: define {{[^@]+}}@scc_C +; CGSCC-SAME: (i16* nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR2]] { +; CGSCC-NEXT: entry: +; CGSCC-NEXT: [[BC:%.*]] = bitcast i16* [[A]] to i32* +; CGSCC-NEXT: [[CALL:%.*]] = call dereferenceable_or_null(4) float* @scc_A(i32* noalias nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[BC]]) #[[ATTR10]] +; CGSCC-NEXT: [[BC2:%.*]] = bitcast float* [[CALL]] to i8* +; CGSCC-NEXT: [[TOBOOL:%.*]] = icmp ne i8* [[BC2]], null +; CGSCC-NEXT: br i1 [[TOBOOL]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +; CGSCC: cond.true: +; CGSCC-NEXT: [[TMP0:%.*]] = bitcast i16* [[A]] to double* +; CGSCC-NEXT: [[CALL1:%.*]] = call dereferenceable_or_null(4) i64* @scc_B(double* noalias nofree readnone dereferenceable_or_null(8) "no-capture-maybe-returned" [[TMP0]]) #[[ATTR10]] +; CGSCC-NEXT: [[TMP1:%.*]] = bitcast i64* [[CALL1]] to i8* +; CGSCC-NEXT: br label [[COND_END:%.*]] +; CGSCC: cond.false: +; CGSCC-NEXT: [[CALL2:%.*]] = call dereferenceable_or_null(4) i8* @scc_C(i16* noalias nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[A]]) #[[ATTR10]] +; CGSCC-NEXT: br label [[COND_END]] +; CGSCC: cond.end: +; CGSCC-NEXT: [[COND:%.*]] = phi i8* [ [[TMP1]], [[COND_TRUE]] ], [ [[CALL2]], [[COND_FALSE]] ] +; CGSCC-NEXT: [[TMP2:%.*]] = bitcast i8* [[COND]] to i32* +; CGSCC-NEXT: [[CALL3:%.*]] = call dereferenceable_or_null(4) float* @scc_A(i32* noalias nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[TMP2]]) #[[ATTR10]] +; CGSCC-NEXT: [[TMP3:%.*]] = bitcast float* [[CALL3]] to i8* +; CGSCC-NEXT: ret i8* [[TMP3]] ; entry: %bc = bitcast i16* %a to i32* @@ -360,7 +428,7 @@ declare i32 @printf(i8* nocapture, ...) ; ; There should *not* be a no-capture attribute on %a define i64* @not_captured_but_returned_0(i64* %a) #0 { -; CHECK: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind willreturn writeonly uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: write) uwtable ; CHECK-LABEL: define {{[^@]+}}@not_captured_but_returned_0 ; CHECK-SAME: (i64* nofree noundef nonnull returned writeonly align 8 dereferenceable(8) "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR4:[0-9]+]] { ; CHECK-NEXT: entry: @@ -381,7 +449,7 @@ entry: ; ; There should *not* be a no-capture attribute on %a define i64* @not_captured_but_returned_1(i64* %a) #0 { -; CHECK: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind willreturn writeonly uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: write) uwtable ; CHECK-LABEL: define {{[^@]+}}@not_captured_but_returned_1 ; CHECK-SAME: (i64* nofree nonnull writeonly align 8 dereferenceable(16) "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: entry: @@ -403,20 +471,20 @@ entry: ; } ; define void @test_not_captured_but_returned_calls(i64* %a) #0 { -; TUNIT: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind willreturn writeonly uwtable +; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: write) uwtable ; TUNIT-LABEL: define {{[^@]+}}@test_not_captured_but_returned_calls ; TUNIT-SAME: (i64* nocapture nofree writeonly align 8 [[A:%.*]]) #[[ATTR4]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[CALL:%.*]] = call i64* @not_captured_but_returned_0(i64* nofree writeonly align 8 "no-capture-maybe-returned" [[A]]) #[[ATTR9:[0-9]+]] -; TUNIT-NEXT: [[CALL1:%.*]] = call i64* @not_captured_but_returned_1(i64* nofree writeonly align 8 "no-capture-maybe-returned" [[A]]) #[[ATTR9]] +; TUNIT-NEXT: [[CALL:%.*]] = call i64* @not_captured_but_returned_0(i64* nofree writeonly align 8 "no-capture-maybe-returned" [[A]]) #[[ATTR10:[0-9]+]] +; TUNIT-NEXT: [[CALL1:%.*]] = call i64* @not_captured_but_returned_1(i64* nofree writeonly align 8 "no-capture-maybe-returned" [[A]]) #[[ATTR10]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree noinline nosync nounwind willreturn writeonly uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind willreturn memory(argmem: write) uwtable ; CGSCC-LABEL: define {{[^@]+}}@test_not_captured_but_returned_calls ; CGSCC-SAME: (i64* nofree noundef nonnull writeonly align 8 dereferenceable(16) [[A:%.*]]) #[[ATTR5:[0-9]+]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[CALL:%.*]] = call i64* @not_captured_but_returned_0(i64* nofree noundef nonnull writeonly align 8 dereferenceable(16) [[A]]) #[[ATTR10:[0-9]+]] -; CGSCC-NEXT: [[CALL1:%.*]] = call i64* @not_captured_but_returned_1(i64* nofree noundef nonnull writeonly align 8 dereferenceable(16) [[A]]) #[[ATTR10]] +; CGSCC-NEXT: [[CALL:%.*]] = call i64* @not_captured_but_returned_0(i64* nofree noundef nonnull writeonly align 8 dereferenceable(16) [[A]]) #[[ATTR11:[0-9]+]] +; CGSCC-NEXT: [[CALL1:%.*]] = call i64* @not_captured_but_returned_1(i64* nofree noundef nonnull writeonly align 8 dereferenceable(16) [[A]]) #[[ATTR11]] ; CGSCC-NEXT: ret void ; entry: @@ -433,18 +501,18 @@ entry: ; ; There should *not* be a no-capture attribute on %a define i64* @negative_test_not_captured_but_returned_call_0a(i64* %a) #0 { -; TUNIT: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind willreturn writeonly uwtable +; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: write) uwtable ; TUNIT-LABEL: define {{[^@]+}}@negative_test_not_captured_but_returned_call_0a ; TUNIT-SAME: (i64* nofree returned writeonly align 8 "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR4]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[CALL:%.*]] = call i64* @not_captured_but_returned_0(i64* nofree writeonly align 8 "no-capture-maybe-returned" [[A]]) #[[ATTR9]] +; TUNIT-NEXT: [[CALL:%.*]] = call i64* @not_captured_but_returned_0(i64* nofree writeonly align 8 "no-capture-maybe-returned" [[A]]) #[[ATTR10]] ; TUNIT-NEXT: ret i64* [[A]] ; -; CGSCC: Function Attrs: argmemonly nofree noinline nosync nounwind willreturn writeonly uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind willreturn memory(argmem: write) uwtable ; CGSCC-LABEL: define {{[^@]+}}@negative_test_not_captured_but_returned_call_0a ; CGSCC-SAME: (i64* nofree noundef nonnull writeonly align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR5]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[CALL:%.*]] = call noundef nonnull align 8 dereferenceable(8) i64* @not_captured_but_returned_0(i64* nofree noundef nonnull writeonly align 8 dereferenceable(8) [[A]]) #[[ATTR10]] +; CGSCC-NEXT: [[CALL:%.*]] = call noundef nonnull align 8 dereferenceable(8) i64* @not_captured_but_returned_0(i64* nofree noundef nonnull writeonly align 8 dereferenceable(8) [[A]]) #[[ATTR11]] ; CGSCC-NEXT: ret i64* [[CALL]] ; entry: @@ -460,20 +528,20 @@ entry: ; ; There should *not* be a no-capture attribute on %a define void @negative_test_not_captured_but_returned_call_0b(i64* %a) #0 { -; TUNIT: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind willreturn writeonly uwtable +; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: write) uwtable ; TUNIT-LABEL: define {{[^@]+}}@negative_test_not_captured_but_returned_call_0b ; TUNIT-SAME: (i64* nofree writeonly align 8 [[A:%.*]]) #[[ATTR4]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[CALL:%.*]] = call i64* @not_captured_but_returned_0(i64* nofree writeonly align 8 "no-capture-maybe-returned" [[A]]) #[[ATTR9]] +; TUNIT-NEXT: [[CALL:%.*]] = call i64* @not_captured_but_returned_0(i64* nofree writeonly align 8 "no-capture-maybe-returned" [[A]]) #[[ATTR10]] ; TUNIT-NEXT: [[TMP0:%.*]] = ptrtoint i64* [[A]] to i64 ; TUNIT-NEXT: store i64 [[TMP0]], i64* [[A]], align 8 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree noinline nosync nounwind willreturn writeonly uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind willreturn memory(argmem: write) uwtable ; CGSCC-LABEL: define {{[^@]+}}@negative_test_not_captured_but_returned_call_0b ; CGSCC-SAME: (i64* nofree noundef nonnull writeonly align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR5]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[CALL:%.*]] = call i64* @not_captured_but_returned_0(i64* nofree noundef nonnull writeonly align 8 dereferenceable(8) [[A]]) #[[ATTR10]] +; CGSCC-NEXT: [[CALL:%.*]] = call i64* @not_captured_but_returned_0(i64* nofree noundef nonnull writeonly align 8 dereferenceable(8) [[A]]) #[[ATTR11]] ; CGSCC-NEXT: [[TMP0:%.*]] = ptrtoint i64* [[CALL]] to i64 ; CGSCC-NEXT: store i64 [[TMP0]], i64* [[A]], align 8 ; CGSCC-NEXT: ret void @@ -493,18 +561,18 @@ entry: ; ; There should *not* be a no-capture attribute on %a define i64* @negative_test_not_captured_but_returned_call_1a(i64* %a) #0 { -; TUNIT: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind willreturn writeonly uwtable +; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: write) uwtable ; TUNIT-LABEL: define {{[^@]+}}@negative_test_not_captured_but_returned_call_1a ; TUNIT-SAME: (i64* nofree writeonly align 8 "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR4]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[CALL:%.*]] = call noundef nonnull align 8 dereferenceable(8) i64* @not_captured_but_returned_1(i64* nofree writeonly align 8 "no-capture-maybe-returned" [[A]]) #[[ATTR9]] +; TUNIT-NEXT: [[CALL:%.*]] = call noundef nonnull align 8 dereferenceable(8) i64* @not_captured_but_returned_1(i64* nofree writeonly align 8 "no-capture-maybe-returned" [[A]]) #[[ATTR10]] ; TUNIT-NEXT: ret i64* [[CALL]] ; -; CGSCC: Function Attrs: argmemonly nofree noinline nosync nounwind willreturn writeonly uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind willreturn memory(argmem: write) uwtable ; CGSCC-LABEL: define {{[^@]+}}@negative_test_not_captured_but_returned_call_1a ; CGSCC-SAME: (i64* nofree noundef nonnull writeonly align 8 dereferenceable(16) [[A:%.*]]) #[[ATTR5]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[CALL:%.*]] = call noundef nonnull align 8 dereferenceable(8) i64* @not_captured_but_returned_1(i64* nofree noundef nonnull writeonly align 8 dereferenceable(16) [[A]]) #[[ATTR10]] +; CGSCC-NEXT: [[CALL:%.*]] = call noundef nonnull align 8 dereferenceable(8) i64* @not_captured_but_returned_1(i64* nofree noundef nonnull writeonly align 8 dereferenceable(16) [[A]]) #[[ATTR11]] ; CGSCC-NEXT: ret i64* [[CALL]] ; entry: @@ -520,20 +588,20 @@ entry: ; ; There should *not* be a no-capture attribute on %a define void @negative_test_not_captured_but_returned_call_1b(i64* %a) #0 { -; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn writeonly uwtable +; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(write) uwtable ; TUNIT-LABEL: define {{[^@]+}}@negative_test_not_captured_but_returned_call_1b ; TUNIT-SAME: (i64* nofree writeonly align 8 [[A:%.*]]) #[[ATTR5:[0-9]+]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[CALL:%.*]] = call align 8 i64* @not_captured_but_returned_1(i64* nofree writeonly align 8 "no-capture-maybe-returned" [[A]]) #[[ATTR9]] +; TUNIT-NEXT: [[CALL:%.*]] = call align 8 i64* @not_captured_but_returned_1(i64* nofree writeonly align 8 "no-capture-maybe-returned" [[A]]) #[[ATTR10]] ; TUNIT-NEXT: [[TMP0:%.*]] = ptrtoint i64* [[CALL]] to i64 ; TUNIT-NEXT: store i64 [[TMP0]], i64* [[CALL]], align 8 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind willreturn writeonly uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind willreturn memory(write) uwtable ; CGSCC-LABEL: define {{[^@]+}}@negative_test_not_captured_but_returned_call_1b ; CGSCC-SAME: (i64* nofree noundef nonnull writeonly align 8 dereferenceable(16) [[A:%.*]]) #[[ATTR6:[0-9]+]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[CALL:%.*]] = call align 8 i64* @not_captured_but_returned_1(i64* nofree noundef nonnull writeonly align 8 dereferenceable(16) [[A]]) #[[ATTR10]] +; CGSCC-NEXT: [[CALL:%.*]] = call align 8 i64* @not_captured_but_returned_1(i64* nofree noundef nonnull writeonly align 8 dereferenceable(16) [[A]]) #[[ATTR11]] ; CGSCC-NEXT: [[TMP0:%.*]] = ptrtoint i64* [[CALL]] to i64 ; CGSCC-NEXT: store i64 [[TMP0]], i64* [[CALL]], align 8 ; CGSCC-NEXT: ret void @@ -619,18 +687,18 @@ r: declare i32* @readonly_unknown(i32*, i32*) readonly define void @not_captured_by_readonly_call(i32* %b) #0 { -; TUNIT: Function Attrs: noinline nounwind readonly uwtable +; TUNIT: Function Attrs: noinline nounwind memory(read) uwtable ; TUNIT-LABEL: define {{[^@]+}}@not_captured_by_readonly_call ; TUNIT-SAME: (i32* nocapture readonly [[B:%.*]]) #[[ATTR7:[0-9]+]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown(i32* readonly [[B]], i32* readonly [[B]]) #[[ATTR6:[0-9]+]] +; TUNIT-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown(i32* readonly [[B]], i32* readonly [[B]]) ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: noinline nounwind readonly uwtable +; CGSCC: Function Attrs: noinline nounwind memory(read) uwtable ; CGSCC-LABEL: define {{[^@]+}}@not_captured_by_readonly_call ; CGSCC-SAME: (i32* nocapture readonly [[B:%.*]]) #[[ATTR8:[0-9]+]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown(i32* readonly [[B]], i32* readonly [[B]]) #[[ATTR7:[0-9]+]] +; CGSCC-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown(i32* readonly [[B]], i32* readonly [[B]]) ; CGSCC-NEXT: ret void ; entry: @@ -644,18 +712,18 @@ entry: ; Make sure the returned flag on %r is strong enough to justify nocapture on %b but **not** on %r. ; define i32* @not_captured_by_readonly_call_not_returned_either1(i32* %b, i32* returned %r) { -; TUNIT: Function Attrs: nounwind readonly +; TUNIT: Function Attrs: nounwind memory(read) ; TUNIT-LABEL: define {{[^@]+}}@not_captured_by_readonly_call_not_returned_either1 ; TUNIT-SAME: (i32* nocapture readonly [[B:%.*]], i32* readonly returned [[R:%.*]]) #[[ATTR8:[0-9]+]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown(i32* readonly [[B]], i32* readonly [[R]]) #[[ATTR8]] +; TUNIT-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown(i32* readonly [[B]], i32* readonly [[R]]) #[[ATTR11:[0-9]+]] ; TUNIT-NEXT: ret i32* [[CALL]] ; -; CGSCC: Function Attrs: nounwind readonly +; CGSCC: Function Attrs: nounwind memory(read) ; CGSCC-LABEL: define {{[^@]+}}@not_captured_by_readonly_call_not_returned_either1 ; CGSCC-SAME: (i32* nocapture readonly [[B:%.*]], i32* readonly returned [[R:%.*]]) #[[ATTR9:[0-9]+]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown(i32* readonly [[B]], i32* readonly [[R]]) #[[ATTR9]] +; CGSCC-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown(i32* readonly [[B]], i32* readonly [[R]]) #[[ATTR12:[0-9]+]] ; CGSCC-NEXT: ret i32* [[CALL]] ; entry: @@ -665,18 +733,18 @@ entry: declare i32* @readonly_unknown_r1a(i32*, i32* returned) readonly define i32* @not_captured_by_readonly_call_not_returned_either2(i32* %b, i32* %r) { -; TUNIT: Function Attrs: nounwind readonly +; TUNIT: Function Attrs: nounwind memory(read) ; TUNIT-LABEL: define {{[^@]+}}@not_captured_by_readonly_call_not_returned_either2 ; TUNIT-SAME: (i32* readonly [[B:%.*]], i32* readonly [[R:%.*]]) #[[ATTR8]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown_r1a(i32* readonly [[B]], i32* readonly [[R]]) #[[ATTR8]] +; TUNIT-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown_r1a(i32* readonly [[B]], i32* readonly [[R]]) #[[ATTR11]] ; TUNIT-NEXT: ret i32* [[CALL]] ; -; CGSCC: Function Attrs: nounwind readonly +; CGSCC: Function Attrs: nounwind memory(read) ; CGSCC-LABEL: define {{[^@]+}}@not_captured_by_readonly_call_not_returned_either2 ; CGSCC-SAME: (i32* readonly [[B:%.*]], i32* readonly [[R:%.*]]) #[[ATTR9]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown_r1a(i32* readonly [[B]], i32* readonly [[R]]) #[[ATTR9]] +; CGSCC-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown_r1a(i32* readonly [[B]], i32* readonly [[R]]) #[[ATTR12]] ; CGSCC-NEXT: ret i32* [[CALL]] ; entry: @@ -686,18 +754,18 @@ entry: declare i32* @readonly_unknown_r1b(i32*, i32* returned) readonly nounwind define i32* @not_captured_by_readonly_call_not_returned_either3(i32* %b, i32* %r) { -; TUNIT: Function Attrs: nounwind readonly +; TUNIT: Function Attrs: nounwind memory(read) ; TUNIT-LABEL: define {{[^@]+}}@not_captured_by_readonly_call_not_returned_either3 ; TUNIT-SAME: (i32* nocapture readonly [[B:%.*]], i32* readonly [[R:%.*]]) #[[ATTR8]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown_r1b(i32* nocapture readonly [[B]], i32* readonly [[R]]) #[[ATTR8]] +; TUNIT-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown_r1b(i32* nocapture readonly [[B]], i32* readonly [[R]]) #[[ATTR11]] ; TUNIT-NEXT: ret i32* [[CALL]] ; -; CGSCC: Function Attrs: nounwind readonly +; CGSCC: Function Attrs: nounwind memory(read) ; CGSCC-LABEL: define {{[^@]+}}@not_captured_by_readonly_call_not_returned_either3 ; CGSCC-SAME: (i32* nocapture readonly [[B:%.*]], i32* readonly [[R:%.*]]) #[[ATTR9]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown_r1b(i32* nocapture readonly [[B]], i32* readonly [[R]]) #[[ATTR9]] +; CGSCC-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown_r1b(i32* nocapture readonly [[B]], i32* readonly [[R]]) #[[ATTR12]] ; CGSCC-NEXT: ret i32* [[CALL]] ; entry: @@ -706,18 +774,18 @@ entry: } define i32* @not_captured_by_readonly_call_not_returned_either4(i32* %b, i32* %r) nounwind { -; TUNIT: Function Attrs: nounwind readonly +; TUNIT: Function Attrs: nounwind memory(read) ; TUNIT-LABEL: define {{[^@]+}}@not_captured_by_readonly_call_not_returned_either4 ; TUNIT-SAME: (i32* readonly [[B:%.*]], i32* readonly [[R:%.*]]) #[[ATTR8]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown_r1a(i32* readonly [[B]], i32* readonly [[R]]) #[[ATTR6]] +; TUNIT-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown_r1a(i32* readonly [[B]], i32* readonly [[R]]) ; TUNIT-NEXT: ret i32* [[CALL]] ; -; CGSCC: Function Attrs: nounwind readonly +; CGSCC: Function Attrs: nounwind memory(read) ; CGSCC-LABEL: define {{[^@]+}}@not_captured_by_readonly_call_not_returned_either4 ; CGSCC-SAME: (i32* readonly [[B:%.*]], i32* readonly [[R:%.*]]) #[[ATTR9]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown_r1a(i32* readonly [[B]], i32* readonly [[R]]) #[[ATTR7]] +; CGSCC-NEXT: [[CALL:%.*]] = call i32* @readonly_unknown_r1a(i32* readonly [[B]], i32* readonly [[R]]) ; CGSCC-NEXT: ret i32* [[CALL]] ; entry: @@ -743,19 +811,12 @@ entry: declare i32* @readonly_i32p(i32*) readonly define void @nocapture_is_not_subsumed_2(i32* nocapture %b) { -; TUNIT-LABEL: define {{[^@]+}}@nocapture_is_not_subsumed_2 -; TUNIT-SAME: (i32* nocapture [[B:%.*]]) { -; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[CALL:%.*]] = call i32* @readonly_i32p(i32* readonly [[B]]) #[[ATTR6]] -; TUNIT-NEXT: store i32 0, i32* [[CALL]], align 4 -; TUNIT-NEXT: ret void -; -; CGSCC-LABEL: define {{[^@]+}}@nocapture_is_not_subsumed_2 -; CGSCC-SAME: (i32* nocapture [[B:%.*]]) { -; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[CALL:%.*]] = call i32* @readonly_i32p(i32* readonly [[B]]) #[[ATTR7]] -; CGSCC-NEXT: store i32 0, i32* [[CALL]], align 4 -; CGSCC-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@nocapture_is_not_subsumed_2 +; CHECK-SAME: (i32* nocapture [[B:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32* @readonly_i32p(i32* readonly [[B]]) +; CHECK-NEXT: store i32 0, i32* [[CALL]], align 4 +; CHECK-NEXT: ret void ; entry: %call = call i32* @readonly_i32p(i32* %b) @@ -765,26 +826,30 @@ entry: attributes #0 = { noinline nounwind uwtable } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind readnone willreturn uwtable } -; TUNIT: attributes #[[ATTR1]] = { nofree noinline nosync nounwind readnone willreturn uwtable } -; TUNIT: attributes #[[ATTR2]] = { nofree nosync nounwind readnone } +; TUNIT: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable } +; TUNIT: attributes #[[ATTR1]] = { nofree noinline nosync nounwind willreturn memory(none) uwtable } +; TUNIT: attributes #[[ATTR2]] = { nofree nosync nounwind memory(none) } ; TUNIT: attributes #[[ATTR3]] = { noinline nounwind uwtable } -; TUNIT: attributes #[[ATTR4]] = { argmemonly nofree noinline norecurse nosync nounwind willreturn writeonly uwtable } -; TUNIT: attributes #[[ATTR5]] = { nofree noinline norecurse nosync nounwind willreturn writeonly uwtable } -; TUNIT: attributes #[[ATTR6]] = { readonly } -; TUNIT: attributes #[[ATTR7]] = { noinline nounwind readonly uwtable } -; TUNIT: attributes #[[ATTR8]] = { nounwind readonly } -; TUNIT: attributes #[[ATTR9]] = { nofree nosync nounwind willreturn writeonly } +; TUNIT: attributes #[[ATTR4]] = { nofree noinline norecurse nosync nounwind willreturn memory(argmem: write) uwtable } +; TUNIT: attributes #[[ATTR5]] = { nofree noinline norecurse nosync nounwind willreturn memory(write) uwtable } +; TUNIT: attributes #[[ATTR6:[0-9]+]] = { memory(read) } +; TUNIT: attributes #[[ATTR7]] = { noinline nounwind memory(read) uwtable } +; TUNIT: attributes #[[ATTR8]] = { nounwind memory(read) } +; TUNIT: attributes #[[ATTR9]] = { nofree nosync nounwind } +; TUNIT: attributes #[[ATTR10]] = { nofree nosync nounwind willreturn } +; TUNIT: attributes #[[ATTR11]] = { nounwind } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind readnone willreturn uwtable } -; CGSCC: attributes #[[ATTR1]] = { nofree noinline nosync nounwind readnone willreturn uwtable } -; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind readnone } +; CGSCC: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable } +; CGSCC: attributes #[[ATTR1]] = { nofree noinline nosync nounwind willreturn memory(none) uwtable } +; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind memory(none) } ; CGSCC: attributes #[[ATTR3]] = { noinline nounwind uwtable } -; CGSCC: attributes #[[ATTR4]] = { argmemonly nofree noinline norecurse nosync nounwind willreturn writeonly uwtable } -; CGSCC: attributes #[[ATTR5]] = { argmemonly nofree noinline nosync nounwind willreturn writeonly uwtable } -; CGSCC: attributes #[[ATTR6]] = { nofree noinline nosync nounwind willreturn writeonly uwtable } -; CGSCC: attributes #[[ATTR7]] = { readonly } -; CGSCC: attributes #[[ATTR8]] = { noinline nounwind readonly uwtable } -; CGSCC: attributes #[[ATTR9]] = { nounwind readonly } -; CGSCC: attributes #[[ATTR10]] = { nounwind willreturn writeonly } +; CGSCC: attributes #[[ATTR4]] = { nofree noinline norecurse nosync nounwind willreturn memory(argmem: write) uwtable } +; CGSCC: attributes #[[ATTR5]] = { nofree noinline nosync nounwind willreturn memory(argmem: write) uwtable } +; CGSCC: attributes #[[ATTR6]] = { nofree noinline nosync nounwind willreturn memory(write) uwtable } +; CGSCC: attributes #[[ATTR7:[0-9]+]] = { memory(read) } +; CGSCC: attributes #[[ATTR8]] = { noinline nounwind memory(read) uwtable } +; CGSCC: attributes #[[ATTR9]] = { nounwind memory(read) } +; CGSCC: attributes #[[ATTR10]] = { nofree nosync nounwind } +; CGSCC: attributes #[[ATTR11]] = { nounwind willreturn } +; CGSCC: attributes #[[ATTR12]] = { nounwind } ;. diff --git a/llvm/test/Transforms/Attributor/nodelete.ll b/llvm/test/Transforms/Attributor/nodelete.ll index 41355326dc910..c22fcfde8a71b 100644 --- a/llvm/test/Transforms/Attributor/nodelete.ll +++ b/llvm/test/Transforms/Attributor/nodelete.ll @@ -6,13 +6,13 @@ %"b" = type { i8 } define hidden i64 @f1() align 2 { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@f1 ; TUNIT-SAME: () #[[ATTR0:[0-9]+]] align 2 { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: ret i64 undef ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@f1 ; CGSCC-SAME: () #[[ATTR0:[0-9]+]] align 2 { ; CGSCC-NEXT: entry: @@ -27,7 +27,7 @@ entry: } define internal i64 @f2(%"a"* %this) align 2 { -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@f2 ; CGSCC-SAME: () #[[ATTR0]] align 2 { ; CGSCC-NEXT: entry: @@ -43,7 +43,7 @@ entry: } define internal void @f3(%"b"* %this) align 2 { -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@f3 ; CGSCC-SAME: () #[[ATTR0]] align 2 { ; CGSCC-NEXT: entry: @@ -58,7 +58,7 @@ entry: } define internal i1 @f4(%"b"* %this) align 2 { -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@f4 ; CGSCC-SAME: () #[[ATTR0]] align 2 { ; CGSCC-NEXT: entry: @@ -73,7 +73,7 @@ entry: } define internal %"a"* @f5(%"b"* %this) align 2 { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@f5 ; CGSCC-SAME: () #[[ATTR1:[0-9]+]] align 2 { ; CGSCC-NEXT: entry: @@ -87,9 +87,9 @@ entry: ret %"a"* %0 } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/nofree.ll b/llvm/test/Transforms/Attributor/nofree.ll index 13bfde5277b86..9eb23ab793d12 100644 --- a/llvm/test/Transforms/Attributor/nofree.ll +++ b/llvm/test/Transforms/Attributor/nofree.ll @@ -15,7 +15,7 @@ declare void @_ZdaPv(i8*) local_unnamed_addr #2 ; TEST 1 (positive case) define void @only_return() #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@only_return ; CHECK-SAME: () #[[ATTR3:[0-9]+]] { ; CHECK-NEXT: ret void @@ -104,12 +104,12 @@ end: define void @mutual_recursion1() #0 { -; TUNIT: Function Attrs: nofree noinline nosync nounwind readnone willreturn uwtable +; TUNIT: Function Attrs: nofree noinline nosync nounwind willreturn memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@mutual_recursion1 ; TUNIT-SAME: () #[[ATTR4:[0-9]+]] { ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@mutual_recursion1 ; CGSCC-SAME: () #[[ATTR3]] { ; CGSCC-NEXT: ret void @@ -119,12 +119,12 @@ define void @mutual_recursion1() #0 { } define void @mutual_recursion2() #0 { -; TUNIT: Function Attrs: nofree noinline nosync nounwind readnone willreturn uwtable +; TUNIT: Function Attrs: nofree noinline nosync nounwind willreturn memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@mutual_recursion2 ; TUNIT-SAME: () #[[ATTR4]] { ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@mutual_recursion2 ; CGSCC-SAME: () #[[ATTR3]] { ; CGSCC-NEXT: ret void @@ -182,17 +182,17 @@ define noalias i8* @call_realloc(i8* nocapture %0, i64 %1) local_unnamed_addr #0 ; Call function declaration with "nofree" -; CHECK: Function Attrs: nofree noinline nounwind readnone uwtable +; CHECK: Function Attrs: nofree noinline nounwind memory(none) uwtable ; CHECK-NEXT: declare void @nofree_function() declare void @nofree_function() nofree readnone #0 define void @call_nofree_function() #0 { -; TUNIT: Function Attrs: nofree noinline nosync nounwind readnone willreturn uwtable +; TUNIT: Function Attrs: nofree noinline nosync nounwind willreturn memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@call_nofree_function ; TUNIT-SAME: () #[[ATTR4]] { ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone willreturn uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind willreturn memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@call_nofree_function ; CGSCC-SAME: () #[[ATTR5:[0-9]+]] { ; CGSCC-NEXT: ret void @@ -240,12 +240,12 @@ define void @call_both() #0 { ; TEST 10 (positive case) ; Call intrinsic function -; CHECK: Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +; CHECK: Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) ; CHECK-NEXT: declare float @llvm.floor.f32(float) declare float @llvm.floor.f32(float) define void @call_floor(float %a) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@call_floor ; CHECK-SAME: (float [[A:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: ret void @@ -255,7 +255,7 @@ define void @call_floor(float %a) #0 { } define float @call_floor2(float %a) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@call_floor2 ; CHECK-SAME: (float [[A:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[C:%.*]] = tail call float @llvm.floor.f32(float [[A]]) #[[ATTR11:[0-9]+]] @@ -269,12 +269,12 @@ define float @call_floor2(float %a) #0 { ; Check propagation. define void @f1() #0 { -; TUNIT: Function Attrs: nofree noinline nosync nounwind readnone willreturn uwtable +; TUNIT: Function Attrs: nofree noinline nosync nounwind willreturn memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@f1 ; TUNIT-SAME: () #[[ATTR4]] { ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone willreturn uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind willreturn memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@f1 ; CGSCC-SAME: () #[[ATTR5]] { ; CGSCC-NEXT: ret void @@ -284,12 +284,12 @@ define void @f1() #0 { } define void @f2() #0 { -; TUNIT: Function Attrs: nofree noinline nosync nounwind readnone willreturn uwtable +; TUNIT: Function Attrs: nofree noinline nosync nounwind willreturn memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@f2 ; TUNIT-SAME: () #[[ATTR4]] { ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone willreturn uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind willreturn memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@f2 ; CGSCC-SAME: () #[[ATTR5]] { ; CGSCC-NEXT: ret void @@ -357,7 +357,7 @@ define void @nonnull_assume_pos(i8* %arg1, i8* %arg2, i8* %arg3, i8* %arg4) { ; ; CHECK-LABEL: define {{[^@]+}}@nonnull_assume_pos ; CHECK-SAME: (i8* nofree [[ARG1:%.*]], i8* [[ARG2:%.*]], i8* nofree [[ARG3:%.*]], i8* [[ARG4:%.*]]) { -; CHECK-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR12:[0-9]+]] [ "nofree"(i8* [[ARG1]]), "nofree"(i8* [[ARG3]]) ] +; CHECK-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR11]] [ "nofree"(i8* [[ARG1]]), "nofree"(i8* [[ARG3]]) ] ; CHECK-NEXT: call void @unknown(i8* nofree [[ARG1]], i8* [[ARG2]], i8* nofree [[ARG3]], i8* [[ARG4]]) ; CHECK-NEXT: ret void ; @@ -440,28 +440,26 @@ attributes #2 = { nobuiltin nounwind } ; TUNIT: attributes #[[ATTR0]] = { nounwind } ; TUNIT: attributes #[[ATTR1]] = { noinline nounwind uwtable } ; TUNIT: attributes #[[ATTR2]] = { nobuiltin nounwind } -; TUNIT: attributes #[[ATTR3]] = { nofree noinline norecurse nosync nounwind readnone willreturn uwtable } -; TUNIT: attributes #[[ATTR4]] = { nofree noinline nosync nounwind readnone willreturn uwtable } -; TUNIT: attributes #[[ATTR5:[0-9]+]] = { nofree noinline nounwind readnone uwtable } -; TUNIT: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind readnone speculatable willreturn } +; TUNIT: attributes #[[ATTR3]] = { nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable } +; TUNIT: attributes #[[ATTR4]] = { nofree noinline nosync nounwind willreturn memory(none) uwtable } +; TUNIT: attributes #[[ATTR5:[0-9]+]] = { nofree noinline nounwind memory(none) uwtable } +; TUNIT: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; TUNIT: attributes #[[ATTR7]] = { nofree nounwind } ; TUNIT: attributes #[[ATTR8:[0-9]+]] = { nobuiltin nofree nounwind } -; TUNIT: attributes #[[ATTR9:[0-9]+]] = { inaccessiblememonly nocallback nofree nosync nounwind willreturn } +; TUNIT: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } ; TUNIT: attributes #[[ATTR10:[0-9]+]] = { nounwind willreturn } -; TUNIT: attributes #[[ATTR11]] = { readnone willreturn } -; TUNIT: attributes #[[ATTR12]] = { willreturn } +; TUNIT: attributes #[[ATTR11]] = { willreturn } ;. ; CGSCC: attributes #[[ATTR0]] = { nounwind } ; CGSCC: attributes #[[ATTR1]] = { noinline nounwind uwtable } ; CGSCC: attributes #[[ATTR2]] = { nobuiltin nounwind } -; CGSCC: attributes #[[ATTR3]] = { nofree noinline norecurse nosync nounwind readnone willreturn uwtable } -; CGSCC: attributes #[[ATTR4:[0-9]+]] = { nofree noinline nounwind readnone uwtable } -; CGSCC: attributes #[[ATTR5]] = { nofree noinline nosync nounwind readnone willreturn uwtable } -; CGSCC: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind readnone speculatable willreturn } +; CGSCC: attributes #[[ATTR3]] = { nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable } +; CGSCC: attributes #[[ATTR4:[0-9]+]] = { nofree noinline nounwind memory(none) uwtable } +; CGSCC: attributes #[[ATTR5]] = { nofree noinline nosync nounwind willreturn memory(none) uwtable } +; CGSCC: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; CGSCC: attributes #[[ATTR7]] = { nofree nounwind } ; CGSCC: attributes #[[ATTR8:[0-9]+]] = { nobuiltin nofree nounwind } -; CGSCC: attributes #[[ATTR9:[0-9]+]] = { inaccessiblememonly nocallback nofree nosync nounwind willreturn } +; CGSCC: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } ; CGSCC: attributes #[[ATTR10:[0-9]+]] = { nounwind willreturn } -; CGSCC: attributes #[[ATTR11]] = { readnone willreturn } -; CGSCC: attributes #[[ATTR12]] = { willreturn } +; CGSCC: attributes #[[ATTR11]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/nonnull.ll b/llvm/test/Transforms/Attributor/nonnull.ll index 3bea83442d24d..5a1e37cc73133 100644 --- a/llvm/test/Transforms/Attributor/nonnull.ll +++ b/llvm/test/Transforms/Attributor/nonnull.ll @@ -20,7 +20,7 @@ define i8* @test1() { ; Return a pointer trivially nonnull (argument attribute) define i8* @test2(i8* nonnull %p) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test2 ; CHECK-SAME: (i8* nofree nonnull readnone returned "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: ret i8* [[P]] @@ -29,7 +29,7 @@ define i8* @test2(i8* nonnull %p) { } define i8* @test2A(i1 %c, i8* %ret) { -; CHECK: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@test2A ; CHECK-SAME: (i1 [[C:%.*]], i8* nofree nonnull readnone returned "no-capture-maybe-returned" [[RET:%.*]]) #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: br i1 [[C]], label [[A:%.*]], label [[B:%.*]] @@ -50,7 +50,7 @@ B: } define i8* @test2B(i1 %c, i8* %ret) { -; CHECK: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@test2B ; CHECK-SAME: (i1 [[C:%.*]], i8* nofree nonnull readnone returned dereferenceable(4) "no-capture-maybe-returned" [[RET:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: br i1 [[C]], label [[A:%.*]], label [[B:%.*]] @@ -106,12 +106,12 @@ define i8* @test3(i1 %c) { ; nonnull if neither can ever return null. (In this case, they ; just never return period.) define i8* @test4_helper() { -; TUNIT: Function Attrs: nofree nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@test4_helper ; TUNIT-SAME: () #[[ATTR3:[0-9]+]] { ; TUNIT-NEXT: ret i8* undef ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test4_helper ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: ret i8* undef @@ -121,12 +121,12 @@ define i8* @test4_helper() { } define i8* @test4() { -; TUNIT: Function Attrs: nofree nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@test4 ; TUNIT-SAME: () #[[ATTR3]] { ; TUNIT-NEXT: ret i8* undef ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test4 ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: ret i8* undef @@ -138,7 +138,7 @@ define i8* @test4() { ; Given a mutual recursive set of functions which *can* return null ; make sure we haven't marked them as nonnull. define i8* @test5_helper(i1 %c) { -; TUNIT: Function Attrs: nofree nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@test5_helper ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: br i1 [[C]], label [[REC:%.*]], label [[END:%.*]] @@ -147,7 +147,7 @@ define i8* @test5_helper(i1 %c) { ; TUNIT: end: ; TUNIT-NEXT: ret i8* null ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test5_helper ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: br i1 [[C]], label [[REC:%.*]], label [[END:%.*]] @@ -165,12 +165,12 @@ end: } define i8* @test5(i1 %c) { -; TUNIT: Function Attrs: nofree nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@test5 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: ret i8* null ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test5 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: ret i8* null @@ -237,7 +237,7 @@ exit: } define i8* @test7(i8* %a) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test7 ; CHECK-SAME: (i8* nofree readnone returned "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: ret i8* [[A]] @@ -247,7 +247,7 @@ define i8* @test7(i8* %a) { } define i8* @test8(i8* %a) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test8 ; CHECK-SAME: (i8* nofree readnone "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 1 @@ -258,7 +258,7 @@ define i8* @test8(i8* %a) { } define i8* @test9(i8* %a, i64 %n) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test9 ; CHECK-SAME: (i8* nofree readnone "no-capture-maybe-returned" [[A:%.*]], i64 [[N:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[N]] @@ -271,7 +271,7 @@ define i8* @test9(i8* %a, i64 %n) { ; ATTRIBUTOR_OPM: define i8* @test10 ; ATTRIBUTOR_NPM: define nonnull i8* @test10 define i8* @test10(i8* %a, i64 %n) { -; CHECK: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@test10 ; CHECK-SAME: (i8* nofree readnone "no-capture-maybe-returned" [[A:%.*]], i64 [[N:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[N]], 0 @@ -391,7 +391,7 @@ declare nonnull i8* @nonnull() define internal i32* @f1(i32* %arg) { ; FIXME: missing nonnull It should be nonnull @f1(i32* nonnull readonly %arg) -; TUNIT: Function Attrs: argmemonly nofree nosync nounwind readonly +; TUNIT: Function Attrs: nofree nosync nounwind memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@f1 ; TUNIT-SAME: (i32* nofree readonly [[ARG:%.*]]) #[[ATTR6:[0-9]+]] { ; TUNIT-NEXT: bb: @@ -413,7 +413,7 @@ define internal i32* @f1(i32* %arg) { ; TUNIT-NEXT: [[TMP10:%.*]] = phi i32* [ [[TMP5C]], [[BB4]] ], [ inttoptr (i64 4 to i32*), [[BB:%.*]] ] ; TUNIT-NEXT: ret i32* [[TMP10]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind readonly +; CGSCC: Function Attrs: nofree nosync nounwind memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@f1 ; CGSCC-SAME: (i32* nofree readonly [[ARG:%.*]]) #[[ATTR5:[0-9]+]] { ; CGSCC-NEXT: bb: @@ -461,14 +461,14 @@ bb9: ; preds = %bb4, %bb } define internal i32* @f2(i32* %arg) { -; TUNIT: Function Attrs: argmemonly nofree nosync nounwind readonly +; TUNIT: Function Attrs: nofree nosync nounwind memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@f2 ; TUNIT-SAME: (i32* nofree nonnull readonly align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR6]] { ; TUNIT-NEXT: bb: ; TUNIT-NEXT: [[TMP:%.*]] = tail call i32* @f1(i32* nofree readonly [[ARG]]) #[[ATTR14]] ; TUNIT-NEXT: ret i32* [[TMP]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind readonly +; CGSCC: Function Attrs: nofree nosync nounwind memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@f2 ; CGSCC-SAME: (i32* nofree nonnull readonly align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR5]] { ; CGSCC-NEXT: bb: @@ -482,14 +482,14 @@ bb: define dso_local noalias i32* @f3(i32* %arg) { ; FIXME: missing nonnull. It should be nonnull @f3(i32* nonnull readonly %arg) -; TUNIT: Function Attrs: argmemonly nofree nosync nounwind readonly +; TUNIT: Function Attrs: nofree nosync nounwind memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@f3 ; TUNIT-SAME: (i32* nofree readonly [[ARG:%.*]]) #[[ATTR6]] { ; TUNIT-NEXT: bb: ; TUNIT-NEXT: [[TMP:%.*]] = call i32* @f1(i32* nofree readonly [[ARG]]) #[[ATTR14]] ; TUNIT-NEXT: ret i32* [[TMP]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind readonly +; CGSCC: Function Attrs: nofree nosync nounwind memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@f3 ; CGSCC-SAME: (i32* nofree readonly [[ARG:%.*]]) #[[ATTR5]] { ; CGSCC-NEXT: bb: @@ -857,7 +857,7 @@ define i8 @parent6(i8* %a, i8* %b) { define i8 @parent7(i8* %a) { ; CHECK-LABEL: define {{[^@]+}}@parent7 ; CHECK-SAME: (i8* nonnull [[A:%.*]]) { -; CHECK-NEXT: [[RET:%.*]] = call i8 @use1safecall(i8* nonnull readonly [[A]]) #[[ATTR15:[0-9]+]] +; CHECK-NEXT: [[RET:%.*]] = call i8 @use1safecall(i8* nonnull readonly [[A]]) #[[ATTR13]] ; CHECK-NEXT: call void @use1nonnull(i8* nonnull [[A]]) ; CHECK-NEXT: ret i8 [[RET]] ; @@ -915,7 +915,7 @@ exc: } define i32* @gep1(i32* %p) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@gep1 ; CHECK-SAME: (i32* nofree readnone "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[Q:%.*]] = getelementptr inbounds i32, i32* [[P]], i32 1 @@ -927,13 +927,13 @@ define i32* @gep1(i32* %p) { define i32* @gep1_no_null_opt(i32* %p) #0 { ; Should't be able to derive nonnull based on gep. -; TUNIT: Function Attrs: nofree norecurse nosync nounwind null_pointer_is_valid readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@gep1_no_null_opt ; TUNIT-SAME: (i32* nofree readnone "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR9:[0-9]+]] { ; TUNIT-NEXT: [[Q:%.*]] = getelementptr inbounds i32, i32* [[P]], i32 1 ; TUNIT-NEXT: ret i32* [[Q]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind null_pointer_is_valid readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@gep1_no_null_opt ; CGSCC-SAME: (i32* nofree readnone "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR8:[0-9]+]] { ; CGSCC-NEXT: [[Q:%.*]] = getelementptr inbounds i32, i32* [[P]], i32 1 @@ -944,7 +944,7 @@ define i32* @gep1_no_null_opt(i32* %p) #0 { } define i32 addrspace(3)* @gep2(i32 addrspace(3)* %p) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@gep2 ; CHECK-SAME: (i32 addrspace(3)* nofree readnone "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[Q:%.*]] = getelementptr inbounds i32, i32 addrspace(3)* [[P]], i32 1 @@ -956,7 +956,7 @@ define i32 addrspace(3)* @gep2(i32 addrspace(3)* %p) { ; FIXME: We should propagate dereferenceable here but *not* nonnull define i32 addrspace(3)* @as(i32 addrspace(3)* dereferenceable(4) %p) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@as ; CHECK-SAME: (i32 addrspace(3)* nofree nonnull readnone returned dereferenceable(4) "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: ret i32 addrspace(3)* [[P]] @@ -966,7 +966,7 @@ define i32 addrspace(3)* @as(i32 addrspace(3)* dereferenceable(4) %p) { ; CHECK-NOT: @g2() define internal i32* @g2() { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@g2 ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: ret i32* inttoptr (i64 4 to i32*) @@ -975,15 +975,15 @@ define internal i32* @g2() { } define i32* @g1() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@g1 ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i32* inttoptr (i64 4 to i32*) ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@g1 ; CGSCC-SAME: () #[[ATTR9:[0-9]+]] { -; CGSCC-NEXT: [[C:%.*]] = call noundef nonnull align 4 i32* @g2() #[[ATTR16:[0-9]+]] +; CGSCC-NEXT: [[C:%.*]] = call noundef nonnull align 4 i32* @g2() #[[ATTR13]] ; CGSCC-NEXT: ret i32* [[C]] ; %c = call i32* @g2() @@ -1391,10 +1391,10 @@ declare i8* @strrchr(i8* %0, i32 %1) nofree nounwind readonly willreturn ; We should not mark the return of @strrchr as `nonnull`, it may well be NULL! define i8* @mybasename(i8* nofree readonly %str) { -; CHECK: Function Attrs: nofree nounwind readonly willreturn +; CHECK: Function Attrs: nofree nounwind willreturn memory(read) ; CHECK-LABEL: define {{[^@]+}}@mybasename ; CHECK-SAME: (i8* nofree readonly [[STR:%.*]]) #[[ATTR12:[0-9]+]] { -; CHECK-NEXT: [[CALL:%.*]] = call i8* @strrchr(i8* nofree readonly [[STR]], i32 noundef 47) #[[ATTR15]] +; CHECK-NEXT: [[CALL:%.*]] = call i8* @strrchr(i8* nofree readonly [[STR]], i32 noundef 47) #[[ATTR13]] ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8* [[CALL]], null ; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[CALL]], i64 1 ; CHECK-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i8* [[ADD_PTR]], i8* [[STR]] @@ -1486,7 +1486,7 @@ declare void @use_i8_ptr(i8* nofree nocapture readnone) nounwind declare void @use_i8_ptr_ret(i8* nofree nocapture readnone) nounwind willreturn define i8* @nonnull_function_ptr_1() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@nonnull_function_ptr_1 ; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: ret i8* bitcast (i8* ()* @nonnull_function_ptr_1 to i8*) @@ -1497,7 +1497,7 @@ define i8* @nonnull_function_ptr_1() { declare i8* @function_decl() define i8* @nonnull_function_ptr_2() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@nonnull_function_ptr_2 ; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: ret i8* bitcast (i8* ()* @function_decl to i8*) @@ -1522,38 +1522,35 @@ declare void @nonnull_callee(i8* nonnull %p) attributes #0 = { null_pointer_is_valid } attributes #1 = { nounwind willreturn} ;. -; TUNIT: attributes #[[ATTR0:[0-9]+]] = { inaccessiblememonly nocallback nofree nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR2]] = { inaccessiblememonly nofree norecurse nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR3]] = { nofree nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +; TUNIT: attributes #[[ATTR3]] = { nofree nosync nounwind willreturn memory(none) } ; TUNIT: attributes #[[ATTR4]] = { noreturn } ; TUNIT: attributes #[[ATTR5]] = { nounwind } -; TUNIT: attributes #[[ATTR6]] = { argmemonly nofree nosync nounwind readonly } +; TUNIT: attributes #[[ATTR6]] = { nofree nosync nounwind memory(argmem: read) } ; TUNIT: attributes #[[ATTR7]] = { nounwind willreturn } -; TUNIT: attributes #[[ATTR8:[0-9]+]] = { nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind null_pointer_is_valid readnone willreturn } +; TUNIT: attributes #[[ATTR8:[0-9]+]] = { nounwind willreturn memory(read) } +; TUNIT: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(none) } ; TUNIT: attributes #[[ATTR10]] = { naked } ; TUNIT: attributes #[[ATTR11]] = { noinline optnone } -; TUNIT: attributes #[[ATTR12]] = { nofree nounwind readonly willreturn } +; TUNIT: attributes #[[ATTR12]] = { nofree nounwind willreturn memory(read) } ; TUNIT: attributes #[[ATTR13]] = { willreturn } -; TUNIT: attributes #[[ATTR14]] = { nofree nosync nounwind readonly } -; TUNIT: attributes #[[ATTR15]] = { readonly willreturn } +; TUNIT: attributes #[[ATTR14]] = { nofree nosync nounwind } ;. -; CGSCC: attributes #[[ATTR0:[0-9]+]] = { inaccessiblememonly nocallback nofree nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { inaccessiblememonly nofree norecurse nosync nounwind willreturn } +; CGSCC: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) } ; CGSCC: attributes #[[ATTR3]] = { noreturn } ; CGSCC: attributes #[[ATTR4]] = { nounwind } -; CGSCC: attributes #[[ATTR5]] = { argmemonly nofree nosync nounwind readonly } +; CGSCC: attributes #[[ATTR5]] = { nofree nosync nounwind memory(argmem: read) } ; CGSCC: attributes #[[ATTR6]] = { nounwind willreturn } -; CGSCC: attributes #[[ATTR7:[0-9]+]] = { nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR8]] = { nofree norecurse nosync nounwind null_pointer_is_valid readnone willreturn } -; CGSCC: attributes #[[ATTR9]] = { nofree nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR7:[0-9]+]] = { nounwind willreturn memory(read) } +; CGSCC: attributes #[[ATTR8]] = { nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(none) } +; CGSCC: attributes #[[ATTR9]] = { nofree nosync nounwind willreturn memory(none) } ; CGSCC: attributes #[[ATTR10]] = { naked } ; CGSCC: attributes #[[ATTR11]] = { noinline optnone } -; CGSCC: attributes #[[ATTR12]] = { nofree nounwind readonly willreturn } +; CGSCC: attributes #[[ATTR12]] = { nofree nounwind willreturn memory(read) } ; CGSCC: attributes #[[ATTR13]] = { willreturn } -; CGSCC: attributes #[[ATTR14]] = { nofree nosync nounwind readonly } -; CGSCC: attributes #[[ATTR15]] = { readonly willreturn } -; CGSCC: attributes #[[ATTR16]] = { readnone willreturn } +; CGSCC: attributes #[[ATTR14]] = { nofree nosync nounwind } ;. diff --git a/llvm/test/Transforms/Attributor/norecurse.ll b/llvm/test/Transforms/Attributor/norecurse.ll index 8361c4c1547e3..ba9cbb9dbaac2 100644 --- a/llvm/test/Transforms/Attributor/norecurse.ll +++ b/llvm/test/Transforms/Attributor/norecurse.ll @@ -3,7 +3,7 @@ ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC define i32 @leaf() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@leaf ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: ret i32 1 @@ -12,7 +12,7 @@ define i32 @leaf() { } define i32 @self_rec() { -; CHECK: Function Attrs: nofree nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@self_rec ; CHECK-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: ret i32 4 @@ -22,12 +22,12 @@ define i32 @self_rec() { } define i32 @indirect_rec() { -; TUNIT: Function Attrs: nofree nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@indirect_rec ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i32 undef ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@indirect_rec ; CGSCC-SAME: () #[[ATTR0]] { ; CGSCC-NEXT: ret i32 undef @@ -36,12 +36,12 @@ define i32 @indirect_rec() { ret i32 %a } define i32 @indirect_rec2() { -; TUNIT: Function Attrs: nofree nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@indirect_rec2 ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i32 undef ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@indirect_rec2 ; CGSCC-SAME: () #[[ATTR0]] { ; CGSCC-NEXT: ret i32 undef @@ -51,7 +51,7 @@ define i32 @indirect_rec2() { } define i32 @extern() { -; CHECK: Function Attrs: nosync readnone +; CHECK: Function Attrs: nosync memory(none) ; CHECK-LABEL: define {{[^@]+}}@extern ; CHECK-SAME: () #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: [[A:%.*]] = call i32 @k() @@ -66,7 +66,7 @@ define i32 @extern() { declare i32 @k() readnone define void @intrinsic(i8* %dest, i8* %src, i32 %len) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@intrinsic ; CHECK-SAME: (i8* nocapture nofree writeonly [[DEST:%.*]], i8* nocapture nofree readonly [[SRC:%.*]], i32 [[LEN:%.*]]) #[[ATTR4:[0-9]+]] { ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture nofree writeonly [[DEST]], i8* noalias nocapture nofree readonly [[SRC]], i32 [[LEN]], i1 noundef false) #[[ATTR9:[0-9]+]] @@ -81,7 +81,7 @@ define void @intrinsic(i8* %dest, i8* %src, i32 %len) { declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1) define internal i32 @called_by_norecurse() { -; CHECK: Function Attrs: norecurse nosync readnone +; CHECK: Function Attrs: norecurse nosync memory(none) ; CHECK-LABEL: define {{[^@]+}}@called_by_norecurse ; CHECK-SAME: () #[[ATTR6:[0-9]+]] { ; CHECK-NEXT: [[A:%.*]] = call i32 @k() @@ -91,13 +91,13 @@ define internal i32 @called_by_norecurse() { ret i32 %a } define void @m() norecurse { -; TUNIT: Function Attrs: norecurse nosync readnone +; TUNIT: Function Attrs: norecurse nosync memory(none) ; TUNIT-LABEL: define {{[^@]+}}@m ; TUNIT-SAME: () #[[ATTR6]] { -; TUNIT-NEXT: [[A:%.*]] = call i32 @called_by_norecurse() #[[ATTR2]] +; TUNIT-NEXT: [[A:%.*]] = call i32 @called_by_norecurse() #[[ATTR10:[0-9]+]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: norecurse nosync readnone +; CGSCC: Function Attrs: norecurse nosync memory(none) ; CGSCC-LABEL: define {{[^@]+}}@m ; CGSCC-SAME: () #[[ATTR6]] { ; CGSCC-NEXT: [[A:%.*]] = call i32 @called_by_norecurse() @@ -108,13 +108,13 @@ define void @m() norecurse { } define internal i32 @called_by_norecurse_indirectly() { -; TUNIT: Function Attrs: norecurse nosync readnone +; TUNIT: Function Attrs: norecurse nosync memory(none) ; TUNIT-LABEL: define {{[^@]+}}@called_by_norecurse_indirectly ; TUNIT-SAME: () #[[ATTR6]] { ; TUNIT-NEXT: [[A:%.*]] = call i32 @k() ; TUNIT-NEXT: ret i32 [[A]] ; -; CGSCC: Function Attrs: nosync readnone +; CGSCC: Function Attrs: nosync memory(none) ; CGSCC-LABEL: define {{[^@]+}}@called_by_norecurse_indirectly ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: [[A:%.*]] = call i32 @k() @@ -124,13 +124,13 @@ define internal i32 @called_by_norecurse_indirectly() { ret i32 %a } define internal i32 @o() { -; TUNIT: Function Attrs: norecurse nosync readnone +; TUNIT: Function Attrs: norecurse nosync memory(none) ; TUNIT-LABEL: define {{[^@]+}}@o ; TUNIT-SAME: () #[[ATTR6]] { -; TUNIT-NEXT: [[A:%.*]] = call i32 @called_by_norecurse_indirectly() #[[ATTR2]] +; TUNIT-NEXT: [[A:%.*]] = call i32 @called_by_norecurse_indirectly() #[[ATTR10]] ; TUNIT-NEXT: ret i32 [[A]] ; -; CGSCC: Function Attrs: norecurse nosync readnone +; CGSCC: Function Attrs: norecurse nosync memory(none) ; CGSCC-LABEL: define {{[^@]+}}@o ; CGSCC-SAME: () #[[ATTR6]] { ; CGSCC-NEXT: [[A:%.*]] = call i32 @called_by_norecurse_indirectly() @@ -140,13 +140,13 @@ define internal i32 @o() { ret i32 %a } define i32 @p() norecurse { -; TUNIT: Function Attrs: norecurse nosync readnone +; TUNIT: Function Attrs: norecurse nosync memory(none) ; TUNIT-LABEL: define {{[^@]+}}@p ; TUNIT-SAME: () #[[ATTR6]] { -; TUNIT-NEXT: [[A:%.*]] = call i32 @o() #[[ATTR2]] +; TUNIT-NEXT: [[A:%.*]] = call i32 @o() #[[ATTR10]] ; TUNIT-NEXT: ret i32 [[A]] ; -; CGSCC: Function Attrs: norecurse nosync readnone +; CGSCC: Function Attrs: norecurse nosync memory(none) ; CGSCC-LABEL: define {{[^@]+}}@p ; CGSCC-SAME: () #[[ATTR6]] { ; CGSCC-NEXT: [[A:%.*]] = call i32 @o() @@ -157,7 +157,7 @@ define i32 @p() norecurse { } define void @f(i32 %x) { -; TUNIT: Function Attrs: nofree nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@f ; TUNIT-SAME: (i32 [[X:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: @@ -170,7 +170,7 @@ define void @f(i32 %x) { ; TUNIT: if.end: ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@f ; CGSCC-SAME: (i32 [[X:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: entry: @@ -198,7 +198,7 @@ if.end: } define void @g() norecurse { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@g ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -241,7 +241,7 @@ define i32 @eval_func2(i32 (i32)* , i32) local_unnamed_addr null_pointer_is_vali ; Call an unknown function in a dead block. declare void @unknown() define i32 @call_unknown_in_dead_block() local_unnamed_addr { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@call_unknown_in_dead_block ; CHECK-SAME: () local_unnamed_addr #[[ATTR0]] { ; CHECK-NEXT: ret i32 0 @@ -307,14 +307,26 @@ f: } ;. -; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; CHECK: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } -; CHECK: attributes #[[ATTR2]] = { nosync readnone } -; CHECK: attributes #[[ATTR3:[0-9]+]] = { readnone } -; CHECK: attributes #[[ATTR4]] = { argmemonly nofree norecurse nosync nounwind willreturn } -; CHECK: attributes #[[ATTR5:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn } -; CHECK: attributes #[[ATTR6]] = { norecurse nosync readnone } -; CHECK: attributes #[[ATTR7]] = { null_pointer_is_valid } -; CHECK: attributes #[[ATTR8:[0-9]+]] = { norecurse } -; CHECK: attributes #[[ATTR9]] = { willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR2]] = { nosync memory(none) } +; TUNIT: attributes #[[ATTR3:[0-9]+]] = { memory(none) } +; TUNIT: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } +; TUNIT: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +; TUNIT: attributes #[[ATTR6]] = { norecurse nosync memory(none) } +; TUNIT: attributes #[[ATTR7]] = { null_pointer_is_valid } +; TUNIT: attributes #[[ATTR8]] = { norecurse } +; TUNIT: attributes #[[ATTR9]] = { willreturn } +; TUNIT: attributes #[[ATTR10]] = { nosync } +;. +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { nosync memory(none) } +; CGSCC: attributes #[[ATTR3:[0-9]+]] = { memory(none) } +; CGSCC: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR6]] = { norecurse nosync memory(none) } +; CGSCC: attributes #[[ATTR7]] = { null_pointer_is_valid } +; CGSCC: attributes #[[ATTR8]] = { norecurse } +; CGSCC: attributes #[[ATTR9]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/noreturn.ll b/llvm/test/Transforms/Attributor/noreturn.ll index 8d1d275c31f99..d3ef7f354d36c 100644 --- a/llvm/test/Transforms/Attributor/noreturn.ll +++ b/llvm/test/Transforms/Attributor/noreturn.ll @@ -15,7 +15,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; } ; define void @srec0() #0 { -; CHECK: Function Attrs: nofree noinline nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@srec0 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -34,7 +34,7 @@ entry: ; } ; define i32 @srec16(i32 %a) #0 { -; CHECK: Function Attrs: nofree noinline noreturn nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline noreturn nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@srec16 ; CHECK-SAME: (i32 [[A:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: @@ -73,7 +73,7 @@ exit: ; } ; define i32 @endless_loop(i32 %a) #0 { -; CHECK: Function Attrs: nofree noinline norecurse noreturn nosync nounwind readnone uwtable +; CHECK: Function Attrs: nofree noinline norecurse noreturn nosync nounwind memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@endless_loop ; CHECK-SAME: (i32 [[A:%.*]]) #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: entry: @@ -98,7 +98,7 @@ while.body: ; preds = %entry, %while.body ; ; FIXME: no-return missing (D65243 should fix this) define i32 @dead_return(i32 %a) #0 { -; CHECK: Function Attrs: nofree noinline norecurse noreturn nosync nounwind readnone uwtable +; CHECK: Function Attrs: nofree noinline norecurse noreturn nosync nounwind memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@dead_return ; CHECK-SAME: (i32 [[A:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: entry: @@ -126,7 +126,7 @@ return: ; No predecessors! ; } ; define i32 @multiple_noreturn_calls(i32 %a) #0 { -; TUNIT: Function Attrs: nofree noinline norecurse noreturn nosync nounwind readnone willreturn uwtable +; TUNIT: Function Attrs: nofree noinline norecurse noreturn nosync nounwind willreturn memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@multiple_noreturn_calls ; TUNIT-SAME: (i32 [[A:%.*]]) #[[ATTR3:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -139,7 +139,7 @@ define i32 @multiple_noreturn_calls(i32 %a) #0 { ; TUNIT: cond.end: ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree noinline noreturn nosync nounwind readnone willreturn uwtable +; CGSCC: Function Attrs: nofree noinline noreturn nosync nounwind willreturn memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@multiple_noreturn_calls ; CGSCC-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: entry: @@ -174,7 +174,7 @@ cond.end: ; preds = %cond.false, %cond.t ; FIXME: we should derive "UB" as an argument and report it to the user on request. define i32 @endless_loop_but_willreturn() willreturn { -; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@endless_loop_but_willreturn ; TUNIT-SAME: () #[[ATTR4:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -182,7 +182,7 @@ define i32 @endless_loop_but_willreturn() willreturn { ; TUNIT: while.body: ; TUNIT-NEXT: br label [[WHILE_BODY]] ; -; CGSCC: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse noreturn nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@endless_loop_but_willreturn ; CGSCC-SAME: () #[[ATTR3:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -199,13 +199,13 @@ while.body: ; preds = %entry, %while.body ; TEST 6b: willreturn means *not* no-return or UB define i32 @UB_and_willreturn() willreturn { -; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@UB_and_willreturn ; TUNIT-SAME: () #[[ATTR4]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse noreturn nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@UB_and_willreturn ; CGSCC-SAME: () #[[ATTR3]] { ; CGSCC-NEXT: entry: @@ -217,14 +217,14 @@ entry: attributes #0 = { noinline nounwind uwtable } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree noinline nosync nounwind readnone willreturn uwtable } -; TUNIT: attributes #[[ATTR1]] = { nofree noinline noreturn nosync nounwind readnone willreturn uwtable } -; TUNIT: attributes #[[ATTR2]] = { nofree noinline norecurse noreturn nosync nounwind readnone uwtable } -; TUNIT: attributes #[[ATTR3]] = { nofree noinline norecurse noreturn nosync nounwind readnone willreturn uwtable } -; TUNIT: attributes #[[ATTR4]] = { nofree norecurse noreturn nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree noinline nosync nounwind willreturn memory(none) uwtable } +; TUNIT: attributes #[[ATTR1]] = { nofree noinline noreturn nosync nounwind willreturn memory(none) uwtable } +; TUNIT: attributes #[[ATTR2]] = { nofree noinline norecurse noreturn nosync nounwind memory(none) uwtable } +; TUNIT: attributes #[[ATTR3]] = { nofree noinline norecurse noreturn nosync nounwind willreturn memory(none) uwtable } +; TUNIT: attributes #[[ATTR4]] = { nofree norecurse noreturn nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree noinline nosync nounwind readnone willreturn uwtable } -; CGSCC: attributes #[[ATTR1]] = { nofree noinline noreturn nosync nounwind readnone willreturn uwtable } -; CGSCC: attributes #[[ATTR2]] = { nofree noinline norecurse noreturn nosync nounwind readnone uwtable } -; CGSCC: attributes #[[ATTR3]] = { nofree norecurse noreturn nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree noinline nosync nounwind willreturn memory(none) uwtable } +; CGSCC: attributes #[[ATTR1]] = { nofree noinline noreturn nosync nounwind willreturn memory(none) uwtable } +; CGSCC: attributes #[[ATTR2]] = { nofree noinline norecurse noreturn nosync nounwind memory(none) uwtable } +; CGSCC: attributes #[[ATTR3]] = { nofree norecurse noreturn nosync nounwind willreturn memory(none) } ;. diff --git a/llvm/test/Transforms/Attributor/noreturn_async.ll b/llvm/test/Transforms/Attributor/noreturn_async.ll index cae672f644a17..859c8f69b0dde 100644 --- a/llvm/test/Transforms/Attributor/noreturn_async.ll +++ b/llvm/test/Transforms/Attributor/noreturn_async.ll @@ -149,6 +149,6 @@ declare dso_local i32 @printf(i8* %_Format, ...) declare i32 @llvm.eh.exceptioncode(token) ;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { noreturn } -; CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind readnone } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind memory(none) } ; CHECK: attributes #[[ATTR2:[0-9]+]] = { nofree nosync nounwind } ;. diff --git a/llvm/test/Transforms/Attributor/noreturn_sync.ll b/llvm/test/Transforms/Attributor/noreturn_sync.ll index 7c6c7a8272523..681dbf5980c7a 100644 --- a/llvm/test/Transforms/Attributor/noreturn_sync.ll +++ b/llvm/test/Transforms/Attributor/noreturn_sync.ll @@ -139,6 +139,6 @@ declare dso_local i32 @printf(i8* %_Format, ...) declare i32 @llvm.eh.exceptioncode(token) ;. -; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone } +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind memory(none) } ; CHECK: attributes #[[ATTR1:[0-9]+]] = { nofree nosync nounwind } ;. diff --git a/llvm/test/Transforms/Attributor/nosync.ll b/llvm/test/Transforms/Attributor/nosync.ll index e3a1f82156da6..324c49fe0151c 100644 --- a/llvm/test/Transforms/Attributor/nosync.ll +++ b/llvm/test/Transforms/Attributor/nosync.ll @@ -30,7 +30,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; CHECK: @[[A:[a-zA-Z0-9_$"\\.-]+]] = common global i32 0, align 4 ;. define i32* @foo(%struct.ST* %s) nounwind uwtable readnone optsize ssp { -; CHECK: Function Attrs: nofree norecurse nosync nounwind optsize readnone ssp willreturn uwtable +; CHECK: Function Attrs: nofree norecurse nosync nounwind optsize ssp willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@foo ; CHECK-SAME: (%struct.ST* nofree readnone "no-capture-maybe-returned" [[S:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -50,7 +50,7 @@ entry: ; } define i32 @load_monotonic(i32* nocapture readonly %0) norecurse nounwind uwtable { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn uwtable +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: define {{[^@]+}}@load_monotonic ; CHECK-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: [[TMP2:%.*]] = load atomic i32, i32* [[TMP0]] monotonic, align 4 @@ -68,7 +68,7 @@ define i32 @load_monotonic(i32* nocapture readonly %0) norecurse nounwind uwtabl ; } define void @store_monotonic(i32* nocapture %0) norecurse nounwind uwtable { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn uwtable +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: define {{[^@]+}}@store_monotonic ; CHECK-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: store atomic i32 10, i32* [[TMP0]] monotonic, align 4 @@ -86,7 +86,7 @@ define void @store_monotonic(i32* nocapture %0) norecurse nounwind uwtable { ; } define i32 @load_acquire(i32* nocapture readonly %0) norecurse nounwind uwtable { -; CHECK: Function Attrs: argmemonly nofree norecurse nounwind willreturn uwtable +; CHECK: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: define {{[^@]+}}@load_acquire ; CHECK-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: [[TMP2:%.*]] = load atomic i32, i32* [[TMP0]] acquire, align 4 @@ -103,7 +103,7 @@ define i32 @load_acquire(i32* nocapture readonly %0) norecurse nounwind uwtable ; } define void @load_release(i32* nocapture %0) norecurse nounwind uwtable { -; CHECK: Function Attrs: argmemonly nofree norecurse nounwind willreturn uwtable +; CHECK: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: define {{[^@]+}}@load_release ; CHECK-SAME: (i32* nocapture nofree noundef writeonly align 4 [[TMP0:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: store atomic volatile i32 10, i32* [[TMP0]] release, align 4 @@ -116,7 +116,7 @@ define void @load_release(i32* nocapture %0) norecurse nounwind uwtable { ; TEST 6 - negative volatile, relaxed atomic define void @load_volatile_release(i32* nocapture %0) norecurse nounwind uwtable { -; CHECK: Function Attrs: argmemonly nofree norecurse nounwind willreturn uwtable +; CHECK: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: define {{[^@]+}}@load_volatile_release ; CHECK-SAME: (i32* nocapture nofree noundef writeonly align 4 [[TMP0:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: store atomic volatile i32 10, i32* [[TMP0]] release, align 4 @@ -133,7 +133,7 @@ define void @load_volatile_release(i32* nocapture %0) norecurse nounwind uwtable ; } define void @volatile_store(i32* %0) norecurse nounwind uwtable { -; CHECK: Function Attrs: argmemonly nofree norecurse nounwind willreturn uwtable +; CHECK: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: define {{[^@]+}}@volatile_store ; CHECK-SAME: (i32* nofree noundef align 4 [[TMP0:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: store volatile i32 14, i32* [[TMP0]], align 4 @@ -151,7 +151,7 @@ define void @volatile_store(i32* %0) norecurse nounwind uwtable { ; } define i32 @volatile_load(i32* %0) norecurse nounwind uwtable { -; CHECK: Function Attrs: argmemonly nofree norecurse nounwind willreturn uwtable +; CHECK: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: define {{[^@]+}}@volatile_load ; CHECK-SAME: (i32* nofree noundef align 4 [[TMP0:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[TMP2:%.*]] = load volatile i32, i32* [[TMP0]], align 4 @@ -199,14 +199,14 @@ define void @call_might_sync() nounwind uwtable noinline { ; volatile operation in same scc but dead. Call volatile_load defined in TEST 8. define i32 @scc1(i32* %0) noinline nounwind uwtable { -; TUNIT: Function Attrs: argmemonly nofree noinline nounwind uwtable +; TUNIT: Function Attrs: nofree noinline nounwind memory(argmem: readwrite) uwtable ; TUNIT-LABEL: define {{[^@]+}}@scc1 ; TUNIT-SAME: (i32* nofree [[TMP0:%.*]]) #[[ATTR5:[0-9]+]] { ; TUNIT-NEXT: tail call void @scc2(i32* nofree [[TMP0]]) #[[ATTR19:[0-9]+]] ; TUNIT-NEXT: [[VAL:%.*]] = tail call i32 @volatile_load(i32* nofree align 4 [[TMP0]]) #[[ATTR19]] ; TUNIT-NEXT: ret i32 [[VAL]] ; -; CGSCC: Function Attrs: argmemonly nofree noinline nounwind uwtable +; CGSCC: Function Attrs: nofree noinline nounwind memory(argmem: readwrite) uwtable ; CGSCC-LABEL: define {{[^@]+}}@scc1 ; CGSCC-SAME: (i32* nofree [[TMP0:%.*]]) #[[ATTR5:[0-9]+]] { ; CGSCC-NEXT: tail call void @scc2(i32* nofree [[TMP0]]) #[[ATTR19:[0-9]+]] @@ -219,7 +219,7 @@ define i32 @scc1(i32* %0) noinline nounwind uwtable { } define void @scc2(i32* %0) noinline nounwind uwtable { -; CHECK: Function Attrs: argmemonly nofree noinline nounwind uwtable +; CHECK: Function Attrs: nofree noinline nounwind memory(argmem: readwrite) uwtable ; CHECK-LABEL: define {{[^@]+}}@scc2 ; CHECK-SAME: (i32* nofree [[TMP0:%.*]]) #[[ATTR5:[0-9]+]] { ; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @scc1(i32* nofree [[TMP0]]) #[[ATTR19:[0-9]+]] @@ -349,7 +349,7 @@ declare void @llvm.memset(i8* %dest, i8 %val, i32 %len, i1 %isvolatile) ; It is odd to add nocapture but a result of the llvm.memcpy nocapture. ; define i32 @memcpy_volatile(i8* %ptr1, i8* %ptr2) { -; CHECK: Function Attrs: argmemonly nofree norecurse nounwind willreturn +; CHECK: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@memcpy_volatile ; CHECK-SAME: (i8* nocapture nofree writeonly [[PTR1:%.*]], i8* nocapture nofree readonly [[PTR2:%.*]]) #[[ATTR10:[0-9]+]] { ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture nofree writeonly [[PTR1]], i8* noalias nocapture nofree readonly [[PTR2]], i32 noundef 8, i1 noundef true) #[[ATTR20:[0-9]+]] @@ -364,10 +364,10 @@ define i32 @memcpy_volatile(i8* %ptr1, i8* %ptr2) { ; It is odd to add nocapture but a result of the llvm.memset nocapture. ; define i32 @memset_non_volatile(i8* %ptr1, i8 %val) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@memset_non_volatile ; CHECK-SAME: (i8* nocapture nofree writeonly [[PTR1:%.*]], i8 [[VAL:%.*]]) #[[ATTR11:[0-9]+]] { -; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* nocapture nofree writeonly [[PTR1]], i8 [[VAL]], i32 noundef 8, i1 noundef false) #[[ATTR21:[0-9]+]] +; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* nocapture nofree writeonly [[PTR1]], i8 [[VAL]], i32 noundef 8, i1 noundef false) #[[ATTR20]] ; CHECK-NEXT: ret i32 4 ; call void @llvm.memset(i8* %ptr1, i8 %val, i32 8, i1 0) @@ -390,7 +390,7 @@ declare void @readnone_test() convergent readnone ; TEST 17 - negative. Convergent define void @convergent_readnone(){ -; CHECK: Function Attrs: readnone +; CHECK: Function Attrs: memory(none) ; CHECK-LABEL: define {{[^@]+}}@convergent_readnone ; CHECK-SAME: () #[[ATTR13:[0-9]+]] { ; CHECK-NEXT: call void @readnone_test() @@ -423,7 +423,7 @@ declare float @llvm.cos(float %val) readnone ; TEST 19 - positive, readnone & non-convergent intrinsic. define i32 @cos_test(float %x) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@cos_test ; CHECK-SAME: (float [[X:%.*]]) #[[ATTR15:[0-9]+]] { ; CHECK-NEXT: ret i32 4 @@ -433,37 +433,35 @@ define i32 @cos_test(float %x) { } define float @cos_test2(float %x) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@cos_test2 ; CHECK-SAME: (float [[X:%.*]]) #[[ATTR15]] { -; CHECK-NEXT: [[C:%.*]] = call float @llvm.cos.f32(float [[X]]) #[[ATTR22:[0-9]+]] +; CHECK-NEXT: [[C:%.*]] = call float @llvm.cos.f32(float [[X]]) #[[ATTR20]] ; CHECK-NEXT: ret float [[C]] ; %c = call float @llvm.cos(float %x) ret float %c } ;. -; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind optsize readnone ssp willreturn uwtable } -; CHECK: attributes #[[ATTR1]] = { argmemonly nofree norecurse nosync nounwind willreturn uwtable } -; CHECK: attributes #[[ATTR2]] = { argmemonly nofree norecurse nounwind willreturn uwtable } +; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind optsize ssp willreturn memory(none) uwtable } +; CHECK: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable } +; CHECK: attributes #[[ATTR2]] = { nofree norecurse nounwind willreturn memory(argmem: readwrite) uwtable } ; CHECK: attributes #[[ATTR3]] = { noinline nosync nounwind uwtable } ; CHECK: attributes #[[ATTR4]] = { noinline nounwind uwtable } -; CHECK: attributes #[[ATTR5]] = { argmemonly nofree noinline nounwind uwtable } +; CHECK: attributes #[[ATTR5]] = { nofree noinline nounwind memory(argmem: readwrite) uwtable } ; CHECK: attributes #[[ATTR6]] = { nofree norecurse nounwind willreturn } ; CHECK: attributes #[[ATTR7]] = { nofree norecurse nounwind } ; CHECK: attributes #[[ATTR8]] = { nofree norecurse nosync nounwind willreturn } ; CHECK: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind } -; CHECK: attributes #[[ATTR10]] = { argmemonly nofree norecurse nounwind willreturn } -; CHECK: attributes #[[ATTR11]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; CHECK: attributes #[[ATTR12:[0-9]+]] = { convergent readnone } -; CHECK: attributes #[[ATTR13]] = { readnone } +; CHECK: attributes #[[ATTR10]] = { nofree norecurse nounwind willreturn memory(argmem: readwrite) } +; CHECK: attributes #[[ATTR11]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; CHECK: attributes #[[ATTR12:[0-9]+]] = { convergent memory(none) } +; CHECK: attributes #[[ATTR13]] = { memory(none) } ; CHECK: attributes #[[ATTR14]] = { nounwind } -; CHECK: attributes #[[ATTR15]] = { nofree norecurse nosync nounwind readnone willreturn } -; CHECK: attributes #[[ATTR16:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn } -; CHECK: attributes #[[ATTR17:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn writeonly } -; CHECK: attributes #[[ATTR18:[0-9]+]] = { nocallback nofree nosync nounwind readnone speculatable willreturn } +; CHECK: attributes #[[ATTR15]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR16:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +; CHECK: attributes #[[ATTR17:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } +; CHECK: attributes #[[ATTR18:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; CHECK: attributes #[[ATTR19]] = { nofree nounwind } ; CHECK: attributes #[[ATTR20]] = { willreturn } -; CHECK: attributes #[[ATTR21]] = { willreturn writeonly } -; CHECK: attributes #[[ATTR22]] = { readnone willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/nounwind.ll b/llvm/test/Transforms/Attributor/nounwind.ll index 1b4b4d19bdb91..0e1002535dd38 100644 --- a/llvm/test/Transforms/Attributor/nounwind.ll +++ b/llvm/test/Transforms/Attributor/nounwind.ll @@ -4,7 +4,7 @@ ; TEST 1 define i32 @foo1() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@foo1 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: ret i32 1 @@ -14,12 +14,12 @@ define i32 @foo1() { ; TEST 2 define i32 @scc1_foo() { -; TUNIT: Function Attrs: nofree nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@scc1_foo ; TUNIT-SAME: () #[[ATTR1:[0-9]+]] { ; TUNIT-NEXT: ret i32 1 ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@scc1_foo ; CGSCC-SAME: () #[[ATTR0]] { ; CGSCC-NEXT: ret i32 1 @@ -31,12 +31,12 @@ define i32 @scc1_foo() { ; TEST 3 define i32 @scc1_bar() { -; TUNIT: Function Attrs: nofree nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@scc1_bar ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i32 1 ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@scc1_bar ; CGSCC-SAME: () #[[ATTR0]] { ; CGSCC-NEXT: ret i32 1 @@ -145,8 +145,8 @@ declare i8* @__cxa_begin_catch(i8*) declare void @__cxa_end_catch() ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. diff --git a/llvm/test/Transforms/Attributor/openmp_parallel.ll b/llvm/test/Transforms/Attributor/openmp_parallel.ll index 1d0c3c880c891..01de35d791721 100644 --- a/llvm/test/Transforms/Attributor/openmp_parallel.ll +++ b/llvm/test/Transforms/Attributor/openmp_parallel.ll @@ -69,7 +69,7 @@ define internal void @.omp_outlined.(i32* noalias nocapture readonly %.global_ti ; TUNIT-NEXT: br label [[OMP_PRECOND_THEN:%.*]] ; TUNIT: omp.precond.then: ; TUNIT-NEXT: [[TMP0:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -; TUNIT-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[TMP0]]) +; TUNIT-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[TMP0]]) #[[ATTR3:[0-9]+]] ; TUNIT-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 ; TUNIT-NEXT: [[TMP1:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* ; TUNIT-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[TMP1]]) @@ -129,7 +129,7 @@ define internal void @.omp_outlined.(i32* noalias nocapture readonly %.global_ti ; CGSCC-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] ; CGSCC: omp.precond.then: ; CGSCC-NEXT: [[TMP1:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -; CGSCC-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; CGSCC-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR3:[0-9]+]] ; CGSCC-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 ; CGSCC-NEXT: [[TMP2:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* ; CGSCC-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[TMP2]]) @@ -264,7 +264,8 @@ attributes #2 = { nounwind } ;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind uwtable } ; CHECK: attributes #[[ATTR1:[0-9]+]] = { alwaysinline nofree norecurse nounwind uwtable } -; CHECK: attributes #[[ATTR2:[0-9]+]] = { argmemonly nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { memory(readwrite) } ;. ; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50} ; CHECK: [[META1:![0-9]+]] = !{!2} diff --git a/llvm/test/Transforms/Attributor/pointer-info.ll b/llvm/test/Transforms/Attributor/pointer-info.ll index 5a771dcca7ec5..0b947919ec599 100644 --- a/llvm/test/Transforms/Attributor/pointer-info.ll +++ b/llvm/test/Transforms/Attributor/pointer-info.ll @@ -6,7 +6,7 @@ %struct.test.a = type { %struct.test.b, i32, i8*} define void @foo(i8* %ptr) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@foo ; TUNIT-SAME: (i8* nocapture nofree readnone [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -17,7 +17,7 @@ define void @foo(i8* %ptr) { ; TUNIT-NEXT: tail call void @bar(%struct.test.a* noalias nocapture nofree noundef nonnull readonly byval([[STRUCT_TEST_A]]) align 8 dereferenceable(24) [[TMP0]]) #[[ATTR2:[0-9]+]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@foo ; CGSCC-SAME: (i8* nocapture nofree writeonly [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -41,7 +41,7 @@ call.br: } define void @bar(%struct.test.a* noundef byval(%struct.test.a) align 8 %dev) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@bar ; CHECK-SAME: (%struct.test.a* noalias nocapture nofree noundef nonnull writeonly byval([[STRUCT_TEST_A:%.*]]) align 8 dereferenceable(24) [[DEV:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_TEST_A]], %struct.test.a* [[DEV]], i64 0, i32 0 @@ -55,11 +55,11 @@ define void @bar(%struct.test.a* noundef byval(%struct.test.a) align 8 %dev) { ret void } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR1]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn writeonly } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; TUNIT: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR2]] = { nounwind willreturn writeonly } +; CGSCC: attributes #[[ATTR0]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; CGSCC: attributes #[[ATTR2]] = { nounwind willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/potential.ll b/llvm/test/Transforms/Attributor/potential.ll index 8e8c993fa637b..977eb29f02bff 100644 --- a/llvm/test/Transforms/Attributor/potential.ll +++ b/llvm/test/Transforms/Attributor/potential.ll @@ -9,7 +9,7 @@ ; bool potential_test1(bool c) { return iszero(c ? 1 : -1); } define internal i1 @iszero1(i32 %c) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@iszero1 ; CGSCC-SAME: (i32 noundef [[C:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], 0 @@ -20,12 +20,12 @@ define internal i1 @iszero1(i32 %c) { } define i1 @potential_test1(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@potential_test1 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: ret i1 false ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@potential_test1 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: [[ARG:%.*]] = select i1 [[C]], i32 -1, i32 1 @@ -47,7 +47,7 @@ define i1 @potential_test1(i1 %c) { ; int potential_test2(int x) { return call_with_two_values(1) + call_with_two_values(-1); } define internal i32 @iszero2(i32 %c) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@iszero2 ; CHECK-SAME: (i32 [[C:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], 0 @@ -60,7 +60,7 @@ define internal i32 @iszero2(i32 %c) { } define internal i32 @call_with_two_values(i32 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@call_with_two_values ; TUNIT-SAME: (i32 noundef [[C:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: [[CSRET1:%.*]] = call i32 @iszero2(i32 noundef [[C]]) #[[ATTR1:[0-9]+]], !range [[RNG0:![0-9]+]] @@ -69,7 +69,7 @@ define internal i32 @call_with_two_values(i32 %c) { ; TUNIT-NEXT: [[RET:%.*]] = add i32 [[CSRET1]], [[CSRET2]] ; TUNIT-NEXT: ret i32 [[RET]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@call_with_two_values ; CGSCC-SAME: (i32 noundef [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: [[CSRET1:%.*]] = call i32 @iszero2(i32 noundef [[C]]) #[[ATTR2]] @@ -86,7 +86,7 @@ define internal i32 @call_with_two_values(i32 %c) { } define i32 @potential_test2(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@potential_test2 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: [[CSRET1:%.*]] = call i32 @call_with_two_values(i32 noundef 1) #[[ATTR1]], !range [[RNG1:![0-9]+]] @@ -94,7 +94,7 @@ define i32 @potential_test2(i1 %c) { ; TUNIT-NEXT: [[RET:%.*]] = add i32 [[CSRET1]], [[CSRET2]] ; TUNIT-NEXT: ret i32 [[RET]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@potential_test2 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: [[CSRET1:%.*]] = call i32 @call_with_two_values(i32 noundef 1) #[[ATTR2]] @@ -120,7 +120,7 @@ define i32 @potential_test2(i1 %c) { ; int potential_test3() { return zero_or_one(iszero(0))+zero_or_one(iszero(1)); } define internal i32 @iszero3(i32 %c) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@iszero3 ; CGSCC-SAME: (i32 noundef [[C:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], 0 @@ -133,7 +133,7 @@ define internal i32 @iszero3(i32 %c) { } define internal i32 @less_than_two(i32 %c) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@less_than_two ; CGSCC-SAME: (i32 [[C:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: [[CMP:%.*]] = icmp slt i32 [[C]], 2 @@ -146,12 +146,12 @@ define internal i32 @less_than_two(i32 %c) { } define i32 @potential_test3() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@potential_test3 ; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: ret i32 2 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@potential_test3 ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: [[CMP1:%.*]] = call i32 @iszero3(i32 noundef 0) #[[ATTR2]] @@ -181,7 +181,7 @@ define i32 @potential_test3() { ; int potential_test7(int c) { return return1or3(c) == return3or4(c); } define i32 @potential_test4(i32 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@potential_test4 ; TUNIT-SAME: (i32 [[C:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: [[CSRET:%.*]] = call i32 @return1or3(i32 [[C]]) #[[ATTR1]] @@ -189,7 +189,7 @@ define i32 @potential_test4(i32 %c) { ; TUNIT-NEXT: [[RET:%.*]] = zext i1 [[FALSE]] to i32 ; TUNIT-NEXT: ret i32 [[RET]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@potential_test4 ; CGSCC-SAME: (i32 [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: [[CSRET:%.*]] = call i32 @return1or3(i32 [[C]]) #[[ATTR2]] @@ -204,7 +204,7 @@ define i32 @potential_test4(i32 %c) { } define i32 @potential_test5(i32 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@potential_test5 ; TUNIT-SAME: (i32 [[C:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: [[CSRET1:%.*]] = call i32 @return1or3(i32 [[C]]) #[[ATTR1]] @@ -213,7 +213,7 @@ define i32 @potential_test5(i32 %c) { ; TUNIT-NEXT: [[RET:%.*]] = zext i1 [[FALSE]] to i32 ; TUNIT-NEXT: ret i32 [[RET]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@potential_test5 ; CGSCC-SAME: (i32 [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: [[CSRET1:%.*]] = call i32 @return1or3(i32 [[C]]) #[[ATTR2]] @@ -230,14 +230,14 @@ define i32 @potential_test5(i32 %c) { } define i1 @potential_test6(i32 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@potential_test6 ; TUNIT-SAME: (i32 [[C:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: [[CSRET1:%.*]] = call i32 @return1or3(i32 [[C]]) #[[ATTR1]] ; TUNIT-NEXT: [[RET:%.*]] = icmp eq i32 [[CSRET1]], 3 ; TUNIT-NEXT: ret i1 [[RET]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@potential_test6 ; CGSCC-SAME: (i32 [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: [[CSRET1:%.*]] = call i32 @return1or3(i32 [[C]]) #[[ATTR2]] @@ -250,7 +250,7 @@ define i1 @potential_test6(i32 %c) { } define i1 @potential_test7(i32 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@potential_test7 ; TUNIT-SAME: (i32 [[C:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: [[CSRET1:%.*]] = call i32 @return1or3(i32 [[C]]) #[[ATTR1]] @@ -258,7 +258,7 @@ define i1 @potential_test7(i32 %c) { ; TUNIT-NEXT: [[RET:%.*]] = icmp eq i32 [[CSRET1]], [[CSRET2]] ; TUNIT-NEXT: ret i1 [[RET]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@potential_test7 ; CGSCC-SAME: (i32 [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: [[CSRET1:%.*]] = call i32 @return1or3(i32 [[C]]) #[[ATTR2]] @@ -273,7 +273,7 @@ define i1 @potential_test7(i32 %c) { } define internal i32 @return1or3(i32 %c) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@return1or3 ; CHECK-SAME: (i32 [[C:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], 0 @@ -286,7 +286,7 @@ define internal i32 @return1or3(i32 %c) { } define internal i32 @return2or4(i32 %c) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@return2or4 ; CHECK-SAME: (i32 [[C:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], 0 @@ -299,7 +299,7 @@ define internal i32 @return2or4(i32 %c) { } define internal i32 @return3or4(i32 %c) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@return3or4 ; CHECK-SAME: (i32 [[C:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], 0 @@ -316,7 +316,7 @@ define internal i32 @return3or4(i32 %c) { ; propagate argument to callsite argument define internal i1 @cmp_with_four(i32 %c) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@cmp_with_four ; CGSCC-SAME: (i32 [[C:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], 4 @@ -327,7 +327,7 @@ define internal i1 @cmp_with_four(i32 %c) { } define internal i1 @wrapper(i32 %c) { -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@wrapper ; CGSCC-SAME: (i32 noundef [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: [[RET:%.*]] = call i1 @cmp_with_four(i32 noundef [[C]]) #[[ATTR2]] @@ -338,12 +338,12 @@ define internal i1 @wrapper(i32 %c) { } define i1 @potential_test8() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@potential_test8 ; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: ret i1 false ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@potential_test8 ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: [[RES1:%.*]] = call i1 @wrapper(i32 noundef 1) #[[ATTR2]] @@ -362,7 +362,7 @@ define i1 @potential_test8() { } define i1 @potential_test9() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@potential_test9 ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -405,7 +405,7 @@ end: ; and returned value of @potential_test10 can be simplified to 0(false) define internal i32 @may_return_undef(i32 %c) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@may_return_undef ; CHECK-SAME: (i32 [[C:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: switch i32 [[C]], label [[OTHERWISE:%.*]] [ @@ -430,14 +430,14 @@ otherwise: } define i1 @potential_test10(i32 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@potential_test10 ; TUNIT-SAME: (i32 [[C:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: [[RET:%.*]] = call i32 @may_return_undef(i32 [[C]]) #[[ATTR1]] ; TUNIT-NEXT: [[CMP:%.*]] = icmp eq i32 [[RET]], 0 ; TUNIT-NEXT: ret i1 [[CMP]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@potential_test10 ; CGSCC-SAME: (i32 [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: [[RET:%.*]] = call i32 @may_return_undef(i32 [[C]]) #[[ATTR2]] @@ -450,7 +450,7 @@ define i1 @potential_test10(i32 %c) { } define i32 @optimize_undef_1(i1 %c) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@optimize_undef_1 ; CHECK-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] @@ -469,7 +469,7 @@ f: } define i32 @optimize_undef_2(i1 %c) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@optimize_undef_2 ; CHECK-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] @@ -488,7 +488,7 @@ f: } define i32 @optimize_undef_3(i1 %c) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@optimize_undef_3 ; CHECK-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] @@ -511,7 +511,7 @@ f: ; FIXME: returned value can be simplified to 0 define i32 @potential_test11(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@potential_test11 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: [[ZERO1:%.*]] = call i32 @optimize_undef_1(i1 [[C]]) #[[ATTR1]], !range [[RNG0]] @@ -521,7 +521,7 @@ define i32 @potential_test11(i1 %c) { ; TUNIT-NEXT: [[ACC2:%.*]] = add i32 [[ACC1]], [[ZERO3]] ; TUNIT-NEXT: ret i32 [[ACC2]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@potential_test11 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: [[ZERO1:%.*]] = call i32 @optimize_undef_1(i1 [[C]]) #[[ATTR2]] @@ -540,7 +540,7 @@ define i32 @potential_test11(i1 %c) { } define i32 @optimize_poison_1(i1 %c) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@optimize_poison_1 ; CHECK-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] @@ -560,12 +560,12 @@ f: ; FIXME: returned value can be simplified to 0 define i32 @potential_test12(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@potential_test12 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: ret i32 0 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@potential_test12 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: [[ZERO:%.*]] = call i32 @optimize_poison_1(i1 [[C]]) #[[ATTR2]] @@ -581,7 +581,7 @@ define i32 @potential_test12(i1 %c) { ; However, we should not simplify `and i32 %c, 3` to `%c` define internal i32 @potential_test13_callee(i32 %c) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@potential_test13_callee ; CHECK-SAME: (i32 [[C:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = and i32 [[C]], 3 @@ -592,13 +592,13 @@ define internal i32 @potential_test13_callee(i32 %c) { } define i32 @potential_test13_caller1() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@potential_test13_caller1 ; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: [[RET:%.*]] = call i32 @potential_test13_callee(i32 noundef 0) #[[ATTR1]], !range [[RNG0]] ; TUNIT-NEXT: ret i32 [[RET]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@potential_test13_caller1 ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: [[RET:%.*]] = call i32 @potential_test13_callee(i32 noundef 0) #[[ATTR2]] @@ -609,13 +609,13 @@ define i32 @potential_test13_caller1() { } define i32 @potential_test13_caller2() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@potential_test13_caller2 ; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: [[RET:%.*]] = call i32 @potential_test13_callee(i32 noundef 1) #[[ATTR1]], !range [[RNG0]] ; TUNIT-NEXT: ret i32 [[RET]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@potential_test13_caller2 ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: [[RET:%.*]] = call i32 @potential_test13_callee(i32 noundef 1) #[[ATTR2]] @@ -626,13 +626,13 @@ define i32 @potential_test13_caller2() { } define i32 @potential_test13_caller3() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@potential_test13_caller3 ; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: [[RET:%.*]] = call i32 @potential_test13_callee(i32 undef) #[[ATTR1]], !range [[RNG0]] ; TUNIT-NEXT: ret i32 [[RET]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@potential_test13_caller3 ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: [[RET:%.*]] = call i32 @potential_test13_callee(i32 undef) #[[ATTR2]] @@ -643,7 +643,7 @@ define i32 @potential_test13_caller3() { } define i1 @potential_test14(i1 %c0, i1 %c1, i1 %c2, i1 %c3) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@potential_test14 ; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[X0:%.*]] = select i1 [[C0]], i32 0, i32 1 @@ -662,7 +662,7 @@ define i1 @potential_test14(i1 %c0, i1 %c1, i1 %c2, i1 %c3) { } define i1 @potential_test15(i1 %c0, i1 %c1) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@potential_test15 ; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[X0:%.*]] = select i1 [[C0]], i32 0, i32 1 @@ -677,7 +677,7 @@ define i1 @potential_test15(i1 %c0, i1 %c1) { } define i1 @potential_test16(i1 %c0, i1 %c1) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@potential_test16 ; CHECK-SAME: (i1 [[C0:%.*]], i1 [[C1:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[X1:%.*]] = select i1 [[C1]], i32 0, i32 1 @@ -691,12 +691,12 @@ define i1 @potential_test16(i1 %c0, i1 %c1) { } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { willreturn } ;. ; TUNIT: [[RNG0]] = !{i32 0, i32 2} ; TUNIT: [[RNG1]] = !{i32 0, i32 3} diff --git a/llvm/test/Transforms/Attributor/range.ll b/llvm/test/Transforms/Attributor/range.ll index f94c986e9e46d..b9d0b6d0c99fb 100644 --- a/llvm/test/Transforms/Attributor/range.ll +++ b/llvm/test/Transforms/Attributor/range.ll @@ -5,7 +5,7 @@ ; FIXME: CGSCC is not looking at callees and calleers even though it could be allowed. define i32 @test0(i32* %p) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@test0 ; CHECK-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[A:%.*]] = load i32, i32* [[P]], align 4, !range [[RNG0:![0-9]+]] @@ -16,13 +16,13 @@ define i32 @test0(i32* %p) { } define i32 @test0-range-check(i32* %p) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@test0-range-check ; TUNIT-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: [[A:%.*]] = tail call i32 @test0(i32* nocapture nofree readonly align 4 [[P]]) #[[ATTR3:[0-9]+]], !range [[RNG0]] ; TUNIT-NEXT: ret i32 [[A]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@test0-range-check ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: [[A:%.*]] = tail call i32 @test0(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P]]) #[[ATTR5:[0-9]+]] @@ -269,7 +269,7 @@ define void @test0-icmp-check(i32* %p){ ret void } define i32 @test1(i32* %p) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@test1 ; CHECK-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[LOAD_10_100:%.*]] = load i32, i32* [[P]], align 4, !range [[RNG1:![0-9]+]] @@ -285,14 +285,14 @@ define i32 @test1(i32* %p) { define i1 @test1-check(i32* %p) { ; -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@test1-check ; TUNIT-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: [[RES:%.*]] = tail call i32 @test1(i32* nocapture nofree readonly align 4 [[P]]) #[[ATTR3]], !range [[RNG2:![0-9]+]] ; TUNIT-NEXT: [[CMP:%.*]] = icmp eq i32 [[RES]], 500 ; TUNIT-NEXT: ret i1 [[CMP]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@test1-check ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: [[RES:%.*]] = tail call i32 @test1(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P]]) #[[ATTR5]] @@ -317,7 +317,7 @@ define i1 @test1-check(i32* %p) { ; } define i32 @test2(i32* %p) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@test2 ; CHECK-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -334,7 +334,7 @@ entry: } define i32 @test2_check(i32* %p) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@test2_check ; TUNIT-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: entry: @@ -349,7 +349,7 @@ define i32 @test2_check(i32* %p) { ; TUNIT-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 2, [[IF_THEN]] ], [ 3, [[IF_END]] ] ; TUNIT-NEXT: ret i32 [[RETVAL_0]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@test2_check ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: entry: @@ -408,7 +408,7 @@ return: ; preds = %if.end, %if.then declare dso_local void @unkown() define internal i32 @r1(i32) local_unnamed_addr { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@r1 ; TUNIT-SAME: () local_unnamed_addr #[[ATTR1:[0-9]+]] { ; TUNIT-NEXT: br label [[TMP4:%.*]] @@ -427,7 +427,7 @@ define internal i32 @r1(i32) local_unnamed_addr { ; TUNIT-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 100 ; TUNIT-NEXT: br i1 [[TMP9]], label [[TMP1:%.*]], label [[TMP4]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@r1 ; CGSCC-SAME: () local_unnamed_addr #[[ATTR2:[0-9]+]] { ; CGSCC-NEXT: br label [[TMP4:%.*]] @@ -467,7 +467,7 @@ f: define void @f1(i32){ ; TUNIT-LABEL: define {{[^@]+}}@f1 ; TUNIT-SAME: (i32 [[TMP0:%.*]]) { -; TUNIT-NEXT: [[TMP2:%.*]] = tail call i32 @r1() #[[ATTR4:[0-9]+]] +; TUNIT-NEXT: [[TMP2:%.*]] = tail call i32 @r1() #[[ATTR3]] ; TUNIT-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], 15 ; TUNIT-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP5:%.*]] ; TUNIT: 4: @@ -478,7 +478,7 @@ define void @f1(i32){ ; ; CGSCC-LABEL: define {{[^@]+}}@f1 ; CGSCC-SAME: (i32 [[TMP0:%.*]]) { -; CGSCC-NEXT: [[TMP2:%.*]] = tail call i32 @r1() #[[ATTR6:[0-9]+]] +; CGSCC-NEXT: [[TMP2:%.*]] = tail call i32 @r1() #[[ATTR5]] ; CGSCC-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], 15 ; CGSCC-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP5:%.*]] ; CGSCC: 4: @@ -510,7 +510,7 @@ define void @f1(i32){ ; } ; } define dso_local i32 @test4-f1(i32 %u) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@test4-f1 ; TUNIT-SAME: (i32 [[U:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: @@ -522,7 +522,7 @@ define dso_local i32 @test4-f1(i32 %u) { ; TUNIT-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[U]], [[IF_THEN]] ], [ 0, [[ENTRY:%.*]] ] ; TUNIT-NEXT: ret i32 [[RETVAL_0]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test4-f1 ; CGSCC-SAME: (i32 [[U:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: entry: @@ -549,18 +549,18 @@ return: ; preds = %entry, %if.then define dso_local i32 @test4-g1(i32 %u) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@test4-g1 ; TUNIT-SAME: (i32 [[U:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[CALL:%.*]] = tail call i32 @test4-f1(i32 [[U]]) #[[ATTR4]] +; TUNIT-NEXT: [[CALL:%.*]] = tail call i32 @test4-f1(i32 [[U]]) #[[ATTR3]] ; TUNIT-NEXT: ret i32 [[CALL]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test4-g1 ; CGSCC-SAME: (i32 [[U:%.*]]) #[[ATTR3:[0-9]+]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[CALL:%.*]] = tail call i32 @test4-f1(i32 [[U]]) #[[ATTR6]] +; CGSCC-NEXT: [[CALL:%.*]] = tail call i32 @test4-f1(i32 [[U]]) #[[ATTR5]] ; CGSCC-NEXT: ret i32 [[CALL]] ; ; FIXME: %call should have range [0, inf] @@ -579,7 +579,7 @@ entry: ; } ; } define dso_local i32 @test4-f2(i32 %u) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@test4-f2 ; TUNIT-SAME: (i32 [[U:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: @@ -594,7 +594,7 @@ define dso_local i32 @test4-f2(i32 %u) { ; TUNIT-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[ADD]], [[IF_THEN]] ], [ 1, [[IF_ELSE]] ] ; TUNIT-NEXT: ret i32 [[RETVAL_0]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test4-f2 ; CGSCC-SAME: (i32 [[U:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: entry: @@ -627,18 +627,18 @@ return: ; preds = %if.else, %if.then define dso_local i32 @test4-g2(i32 %u) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@test4-g2 ; TUNIT-SAME: (i32 [[U:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[CALL:%.*]] = tail call i32 @test4-f2(i32 [[U]]) #[[ATTR4]], !range [[RNG3:![0-9]+]] +; TUNIT-NEXT: [[CALL:%.*]] = tail call i32 @test4-f2(i32 [[U]]) #[[ATTR3]], !range [[RNG3:![0-9]+]] ; TUNIT-NEXT: ret i32 [[CALL]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test4-g2 ; CGSCC-SAME: (i32 [[U:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[CALL:%.*]] = tail call i32 @test4-f2(i32 [[U]]) #[[ATTR6]] +; CGSCC-NEXT: [[CALL:%.*]] = tail call i32 @test4-f2(i32 [[U]]) #[[ATTR5]] ; CGSCC-NEXT: ret i32 [[CALL]] ; entry: @@ -718,7 +718,7 @@ declare dso_local i32 @foo(i32) ; FIXME: All but the return is not needed anymore define dso_local zeroext i1 @phi(i32 %arg) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@phi ; TUNIT-SAME: (i32 [[ARG:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: bb: @@ -750,7 +750,7 @@ define dso_local zeroext i1 @phi(i32 %arg) { ; TUNIT-NEXT: [[DOT0:%.*]] = phi i1 [ true, [[BB11]] ], [ false, [[BB12]] ] ; TUNIT-NEXT: ret i1 [[DOT0]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@phi ; CGSCC-SAME: (i32 [[ARG:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: bb: @@ -822,7 +822,7 @@ bb13: ; preds = %bb12, %bb11 } define dso_local i1 @select(i32 %a) local_unnamed_addr #0 { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@select ; TUNIT-SAME: (i32 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] { ; TUNIT-NEXT: entry: @@ -834,7 +834,7 @@ define dso_local i1 @select(i32 %a) local_unnamed_addr #0 { ; TUNIT-NEXT: [[CMP6:%.*]] = icmp eq i32 [[Y_0]], 5 ; TUNIT-NEXT: ret i1 [[CMP6]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@select ; CGSCC-SAME: (i32 [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { ; CGSCC-NEXT: entry: @@ -857,7 +857,7 @@ entry: } define dso_local i32 @select_zext(i32 %a) local_unnamed_addr #0 { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@select_zext ; TUNIT-SAME: (i32 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] { ; TUNIT-NEXT: entry: @@ -870,7 +870,7 @@ define dso_local i32 @select_zext(i32 %a) local_unnamed_addr #0 { ; TUNIT-NEXT: [[DOT13:%.*]] = zext i1 [[CMP6]] to i32 ; TUNIT-NEXT: ret i32 [[DOT13]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@select_zext ; CGSCC-SAME: (i32 [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { ; CGSCC-NEXT: entry: @@ -895,7 +895,7 @@ entry: } define dso_local i64 @select_int2ptr_bitcast_ptr2int(i32 %a) local_unnamed_addr #0 { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@select_int2ptr_bitcast_ptr2int ; TUNIT-SAME: (i32 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] { ; TUNIT-NEXT: entry: @@ -910,7 +910,7 @@ define dso_local i64 @select_int2ptr_bitcast_ptr2int(i32 %a) local_unnamed_addr ; TUNIT-NEXT: [[P2I:%.*]] = ptrtoint i32* [[BC]] to i64 ; TUNIT-NEXT: ret i64 [[P2I]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@select_int2ptr_bitcast_ptr2int ; CGSCC-SAME: (i32 [[A:%.*]]) local_unnamed_addr #[[ATTR2]] { ; CGSCC-NEXT: entry: @@ -941,14 +941,14 @@ entry: ; } define i1 @f_fcmp(float %a, float %b) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@f_fcmp ; TUNIT-SAME: (float [[A:%.*]], float [[B:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: [[R:%.*]] = fcmp uge float [[A]], [[B]] ; TUNIT-NEXT: [[S:%.*]] = select i1 [[R]], i1 [[R]], i1 false ; TUNIT-NEXT: ret i1 [[S]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@f_fcmp ; CGSCC-SAME: (float [[A:%.*]], float [[B:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[R:%.*]] = fcmp uge float [[A]], [[B]] @@ -960,14 +960,14 @@ define i1 @f_fcmp(float %a, float %b) { ret i1 %s } define i1 @d_fcmp(double %a, double %b) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@d_fcmp ; TUNIT-SAME: (double [[A:%.*]], double [[B:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: [[R:%.*]] = fcmp oeq double [[A]], [[B]] ; TUNIT-NEXT: [[S:%.*]] = select i1 [[R]], i1 [[R]], i1 false ; TUNIT-NEXT: ret i1 [[S]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@d_fcmp ; CGSCC-SAME: (double [[A:%.*]], double [[B:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[R:%.*]] = fcmp oeq double [[A]], [[B]] @@ -979,14 +979,14 @@ define i1 @d_fcmp(double %a, double %b) { ret i1 %s } define i1 @dp_icmp(double* %a, double* %b) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@dp_icmp ; TUNIT-SAME: (double* nofree readnone [[A:%.*]], double* nofree readnone [[B:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: [[R:%.*]] = icmp sge double* [[A]], [[B]] ; TUNIT-NEXT: [[S:%.*]] = select i1 [[R]], i1 [[R]], i1 false ; TUNIT-NEXT: ret i1 [[S]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@dp_icmp ; CGSCC-SAME: (double* nofree readnone [[A:%.*]], double* nofree readnone [[B:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[R:%.*]] = icmp sge double* [[A]], [[B]] @@ -998,14 +998,14 @@ define i1 @dp_icmp(double* %a, double* %b) { ret i1 %s } define i1 @ip_icmp(i8* %a, i8* %b) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@ip_icmp ; TUNIT-SAME: (i8* nofree readnone [[A:%.*]], i8* nofree readnone [[B:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: [[R:%.*]] = icmp ult i8* [[A]], [[B]] ; TUNIT-NEXT: [[S:%.*]] = select i1 [[R]], i1 [[R]], i1 false ; TUNIT-NEXT: ret i1 [[S]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@ip_icmp ; CGSCC-SAME: (i8* nofree readnone [[A:%.*]], i8* nofree readnone [[B:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[R:%.*]] = icmp ult i8* [[A]], [[B]] @@ -1017,25 +1017,25 @@ define i1 @ip_icmp(i8* %a, i8* %b) { ret i1 %s } define i1 @fcmp_caller(float %fa, float %fb, double %da, double %db, double* %dpa, double* %dpb, i8* %ipa, i8* %ipb) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@fcmp_caller ; TUNIT-SAME: (float [[FA:%.*]], float [[FB:%.*]], double [[DA:%.*]], double [[DB:%.*]], double* nofree readnone [[DPA:%.*]], double* nofree readnone [[DPB:%.*]], i8* nofree readnone [[IPA:%.*]], i8* nofree readnone [[IPB:%.*]]) #[[ATTR1]] { -; TUNIT-NEXT: [[R1:%.*]] = call i1 @f_fcmp(float [[FA]], float [[FB]]) #[[ATTR4]] -; TUNIT-NEXT: [[R2:%.*]] = call i1 @d_fcmp(double [[DA]], double [[DB]]) #[[ATTR4]] -; TUNIT-NEXT: [[R3:%.*]] = call i1 @dp_icmp(double* noalias nofree readnone [[DPA]], double* noalias nofree readnone [[DPB]]) #[[ATTR4]] -; TUNIT-NEXT: [[R4:%.*]] = call i1 @ip_icmp(i8* noalias nofree readnone [[IPA]], i8* noalias nofree readnone [[IPB]]) #[[ATTR4]] +; TUNIT-NEXT: [[R1:%.*]] = call i1 @f_fcmp(float [[FA]], float [[FB]]) #[[ATTR3]] +; TUNIT-NEXT: [[R2:%.*]] = call i1 @d_fcmp(double [[DA]], double [[DB]]) #[[ATTR3]] +; TUNIT-NEXT: [[R3:%.*]] = call i1 @dp_icmp(double* noalias nofree readnone [[DPA]], double* noalias nofree readnone [[DPB]]) #[[ATTR3]] +; TUNIT-NEXT: [[R4:%.*]] = call i1 @ip_icmp(i8* noalias nofree readnone [[IPA]], i8* noalias nofree readnone [[IPB]]) #[[ATTR3]] ; TUNIT-NEXT: [[O1:%.*]] = or i1 [[R1]], [[R2]] ; TUNIT-NEXT: [[O2:%.*]] = or i1 [[R3]], [[R4]] ; TUNIT-NEXT: [[O3:%.*]] = or i1 [[O1]], [[O2]] ; TUNIT-NEXT: ret i1 [[O3]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@fcmp_caller ; CGSCC-SAME: (float [[FA:%.*]], float [[FB:%.*]], double [[DA:%.*]], double [[DB:%.*]], double* nofree readnone [[DPA:%.*]], double* nofree readnone [[DPB:%.*]], i8* nofree readnone [[IPA:%.*]], i8* nofree readnone [[IPB:%.*]]) #[[ATTR3]] { -; CGSCC-NEXT: [[R1:%.*]] = call i1 @f_fcmp(float [[FA]], float [[FB]]) #[[ATTR6]] -; CGSCC-NEXT: [[R2:%.*]] = call i1 @d_fcmp(double [[DA]], double [[DB]]) #[[ATTR6]] -; CGSCC-NEXT: [[R3:%.*]] = call i1 @dp_icmp(double* noalias nofree readnone [[DPA]], double* noalias nofree readnone [[DPB]]) #[[ATTR6]] -; CGSCC-NEXT: [[R4:%.*]] = call i1 @ip_icmp(i8* noalias nofree readnone [[IPA]], i8* noalias nofree readnone [[IPB]]) #[[ATTR6]] +; CGSCC-NEXT: [[R1:%.*]] = call i1 @f_fcmp(float [[FA]], float [[FB]]) #[[ATTR5]] +; CGSCC-NEXT: [[R2:%.*]] = call i1 @d_fcmp(double [[DA]], double [[DB]]) #[[ATTR5]] +; CGSCC-NEXT: [[R3:%.*]] = call i1 @dp_icmp(double* noalias nofree readnone [[DPA]], double* noalias nofree readnone [[DPB]]) #[[ATTR5]] +; CGSCC-NEXT: [[R4:%.*]] = call i1 @ip_icmp(i8* noalias nofree readnone [[IPA]], i8* noalias nofree readnone [[IPB]]) #[[ATTR5]] ; CGSCC-NEXT: [[O1:%.*]] = or i1 [[R1]], [[R2]] ; CGSCC-NEXT: [[O2:%.*]] = or i1 [[R3]], [[R4]] ; CGSCC-NEXT: [[O3:%.*]] = or i1 [[O1]], [[O2]] @@ -1052,12 +1052,12 @@ define i1 @fcmp_caller(float %fa, float %fb, double %da, double %db, double* %dp } define i8 @ret_two() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@ret_two ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i8 2 ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@ret_two ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: ret i8 2 @@ -1065,12 +1065,12 @@ define i8 @ret_two() { ret i8 2 } define i8 @ret_undef() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@ret_undef ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i8 undef ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@ret_undef ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: ret i8 undef @@ -1080,15 +1080,15 @@ define i8 @ret_undef() { ; Verify we collapse undef to a value and return something non-undef here. define i8 @undef_collapse_1() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@undef_collapse_1 ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i8 0 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@undef_collapse_1 ; CGSCC-SAME: () #[[ATTR3]] { -; CGSCC-NEXT: [[C:%.*]] = call i8 @ret_undef() #[[ATTR6]] +; CGSCC-NEXT: [[C:%.*]] = call i8 @ret_undef() #[[ATTR5]] ; CGSCC-NEXT: [[S:%.*]] = shl i8 [[C]], 2 ; CGSCC-NEXT: ret i8 [[S]] ; @@ -1099,15 +1099,15 @@ define i8 @undef_collapse_1() { ; Verify we collapse undef to a value and return something non-undef here. define i8 @undef_collapse_2() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@undef_collapse_2 ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i8 0 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@undef_collapse_2 ; CGSCC-SAME: () #[[ATTR3]] { -; CGSCC-NEXT: [[C:%.*]] = call i8 @ret_two() #[[ATTR6]] +; CGSCC-NEXT: [[C:%.*]] = call i8 @ret_two() #[[ATTR5]] ; CGSCC-NEXT: [[S:%.*]] = shl i8 undef, [[C]] ; CGSCC-NEXT: ret i8 [[S]] ; @@ -1118,16 +1118,16 @@ define i8 @undef_collapse_2() { define i8 @undef_collapse_caller() { ; -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@undef_collapse_caller ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i8 0 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@undef_collapse_caller ; CGSCC-SAME: () #[[ATTR3]] { -; CGSCC-NEXT: [[C1:%.*]] = call i8 @undef_collapse_1() #[[ATTR6]] -; CGSCC-NEXT: [[C2:%.*]] = call i8 @undef_collapse_2() #[[ATTR6]] +; CGSCC-NEXT: [[C1:%.*]] = call i8 @undef_collapse_1() #[[ATTR5]] +; CGSCC-NEXT: [[C2:%.*]] = call i8 @undef_collapse_2() #[[ATTR5]] ; CGSCC-NEXT: [[A:%.*]] = add i8 [[C1]], [[C2]] ; CGSCC-NEXT: ret i8 [[A]] ; @@ -1138,13 +1138,13 @@ define i8 @undef_collapse_caller() { } define i32 @ret1or2(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@ret1or2 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: [[S:%.*]] = select i1 [[C]], i32 1, i32 2 ; TUNIT-NEXT: ret i32 [[S]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@ret1or2 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[S:%.*]] = select i1 [[C]], i32 1, i32 2 @@ -1155,11 +1155,11 @@ define i32 @ret1or2(i1 %c) { } define i1 @callee_range_1(i1 %c1, i1 %c2, i1 %c3) { ; -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@callee_range_1 ; TUNIT-SAME: (i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]]) #[[ATTR1]] { -; TUNIT-NEXT: [[R1:%.*]] = call i32 @ret1or2(i1 [[C1]]) #[[ATTR4]] -; TUNIT-NEXT: [[R2:%.*]] = call i32 @ret1or2(i1 [[C2]]) #[[ATTR4]] +; TUNIT-NEXT: [[R1:%.*]] = call i32 @ret1or2(i1 [[C1]]) #[[ATTR3]] +; TUNIT-NEXT: [[R2:%.*]] = call i32 @ret1or2(i1 [[C2]]) #[[ATTR3]] ; TUNIT-NEXT: [[INDIRECTION:%.*]] = select i1 [[C3]], i32 [[R1]], i32 [[R2]] ; TUNIT-NEXT: [[A:%.*]] = add i32 [[R1]], [[INDIRECTION]] ; TUNIT-NEXT: [[I1:%.*]] = icmp sle i32 [[A]], 4 @@ -1167,11 +1167,11 @@ define i1 @callee_range_1(i1 %c1, i1 %c2, i1 %c3) { ; TUNIT-NEXT: [[F:%.*]] = and i1 [[I1]], [[I2]] ; TUNIT-NEXT: ret i1 [[F]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@callee_range_1 ; CGSCC-SAME: (i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]]) #[[ATTR3]] { -; CGSCC-NEXT: [[R1:%.*]] = call i32 @ret1or2(i1 [[C1]]) #[[ATTR6]] -; CGSCC-NEXT: [[R2:%.*]] = call i32 @ret1or2(i1 [[C2]]) #[[ATTR6]] +; CGSCC-NEXT: [[R1:%.*]] = call i32 @ret1or2(i1 [[C1]]) #[[ATTR5]] +; CGSCC-NEXT: [[R2:%.*]] = call i32 @ret1or2(i1 [[C2]]) #[[ATTR5]] ; CGSCC-NEXT: [[INDIRECTION:%.*]] = select i1 [[C3]], i32 [[R1]], i32 [[R2]] ; CGSCC-NEXT: [[A:%.*]] = add i32 [[R1]], [[INDIRECTION]] ; CGSCC-NEXT: [[I1:%.*]] = icmp sle i32 [[A]], 4 @@ -1191,22 +1191,22 @@ define i1 @callee_range_1(i1 %c1, i1 %c2, i1 %c3) { define i1 @callee_range_2(i1 %c1, i1 %c2) { ; -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@callee_range_2 ; TUNIT-SAME: (i1 [[C1:%.*]], i1 [[C2:%.*]]) #[[ATTR1]] { -; TUNIT-NEXT: [[R1:%.*]] = call i32 @ret1or2(i1 [[C1]]) #[[ATTR4]] -; TUNIT-NEXT: [[R2:%.*]] = call i32 @ret1or2(i1 [[C2]]) #[[ATTR4]] +; TUNIT-NEXT: [[R1:%.*]] = call i32 @ret1or2(i1 [[C1]]) #[[ATTR3]] +; TUNIT-NEXT: [[R2:%.*]] = call i32 @ret1or2(i1 [[C2]]) #[[ATTR3]] ; TUNIT-NEXT: [[A:%.*]] = add i32 [[R1]], [[R2]] ; TUNIT-NEXT: [[I1:%.*]] = icmp sle i32 [[A]], 3 ; TUNIT-NEXT: [[I2:%.*]] = icmp sge i32 [[A]], 2 ; TUNIT-NEXT: [[F:%.*]] = and i1 [[I1]], [[I2]] ; TUNIT-NEXT: ret i1 [[F]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@callee_range_2 ; CGSCC-SAME: (i1 [[C1:%.*]], i1 [[C2:%.*]]) #[[ATTR3]] { -; CGSCC-NEXT: [[R1:%.*]] = call i32 @ret1or2(i1 [[C1]]) #[[ATTR6]] -; CGSCC-NEXT: [[R2:%.*]] = call i32 @ret1or2(i1 [[C2]]) #[[ATTR6]] +; CGSCC-NEXT: [[R1:%.*]] = call i32 @ret1or2(i1 [[C1]]) #[[ATTR5]] +; CGSCC-NEXT: [[R2:%.*]] = call i32 @ret1or2(i1 [[C2]]) #[[ATTR5]] ; CGSCC-NEXT: [[A:%.*]] = add i32 [[R1]], [[R2]] ; CGSCC-NEXT: [[I1:%.*]] = icmp sle i32 [[A]], 3 ; CGSCC-NEXT: [[I2:%.*]] = icmp sge i32 [[A]], 2 @@ -1224,12 +1224,12 @@ define i1 @callee_range_2(i1 %c1, i1 %c2) { define i32 @ret100() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@ret100 ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i32 100 ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@ret100 ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: ret i32 100 @@ -1239,7 +1239,7 @@ define i32 @ret100() { define i1 @ctx_adjustment(i32 %V) { ; -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@ctx_adjustment ; TUNIT-SAME: (i32 [[V:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: [[C1:%.*]] = icmp sge i32 [[V]], 100 @@ -1253,7 +1253,7 @@ define i1 @ctx_adjustment(i32 %V) { ; TUNIT-NEXT: [[C2:%.*]] = icmp sge i32 [[PHI]], 100 ; TUNIT-NEXT: ret i1 [[C2]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@ctx_adjustment ; CGSCC-SAME: (i32 [[V:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: [[C1:%.*]] = icmp sge i32 [[V]], 100 @@ -1261,7 +1261,7 @@ define i1 @ctx_adjustment(i32 %V) { ; CGSCC: if.true: ; CGSCC-NEXT: br label [[END:%.*]] ; CGSCC: if.false: -; CGSCC-NEXT: [[CALL:%.*]] = call i32 @ret100() #[[ATTR6]] +; CGSCC-NEXT: [[CALL:%.*]] = call i32 @ret100() #[[ATTR5]] ; CGSCC-NEXT: br label [[END]] ; CGSCC: end: ; CGSCC-NEXT: [[PHI:%.*]] = phi i32 [ [[V]], [[IF_TRUE]] ], [ [[CALL]], [[IF_FALSE]] ] @@ -1283,13 +1283,13 @@ end: define i32 @func(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@func ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: [[RET:%.*]] = select i1 [[C]], i32 0, i32 1 ; TUNIT-NEXT: ret i32 [[RET]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@func ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[RET:%.*]] = select i1 [[C]], i32 0, i32 1 @@ -1300,28 +1300,28 @@ define i32 @func(i1 %c) { } define i32 @simplify_callsite_argument(i1 %d) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@simplify_callsite_argument ; TUNIT-SAME: (i1 [[D:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: [[C:%.*]] = select i1 [[D]], i1 true, i1 false ; TUNIT-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; TUNIT: t: -; TUNIT-NEXT: [[RET1:%.*]] = call i32 @func(i1 noundef [[C]]) #[[ATTR4]] +; TUNIT-NEXT: [[RET1:%.*]] = call i32 @func(i1 noundef [[C]]) #[[ATTR3]] ; TUNIT-NEXT: ret i32 [[RET1]] ; TUNIT: f: -; TUNIT-NEXT: [[RET2:%.*]] = call i32 @func(i1 noundef false) #[[ATTR4]] +; TUNIT-NEXT: [[RET2:%.*]] = call i32 @func(i1 noundef false) #[[ATTR3]] ; TUNIT-NEXT: ret i32 [[RET2]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@simplify_callsite_argument ; CGSCC-SAME: (i1 [[D:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: [[C:%.*]] = select i1 [[D]], i1 true, i1 false ; CGSCC-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; CGSCC: t: -; CGSCC-NEXT: [[RET1:%.*]] = call noundef i32 @func(i1 noundef [[C]]) #[[ATTR6]] +; CGSCC-NEXT: [[RET1:%.*]] = call noundef i32 @func(i1 noundef [[C]]) #[[ATTR5]] ; CGSCC-NEXT: ret i32 [[RET1]] ; CGSCC: f: -; CGSCC-NEXT: [[RET2:%.*]] = call noundef i32 @func(i1 noundef false) #[[ATTR6]] +; CGSCC-NEXT: [[RET2:%.*]] = call noundef i32 @func(i1 noundef false) #[[ATTR5]] ; CGSCC-NEXT: ret i32 [[RET2]] ; %c = select i1 %d, i1 true, i1 false @@ -1336,7 +1336,7 @@ f: define internal i32 @less_than_65536(i32 %arg) { ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@less_than_65536 ; CGSCC-SAME: (i32 [[ARG:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[SHRINKED:%.*]] = udiv i32 [[ARG]], 65536 @@ -1347,7 +1347,7 @@ define internal i32 @less_than_65536(i32 %arg) { } define internal i1 @is_less_than_65536(i32 %arg) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@is_less_than_65536 ; CGSCC-SAME: (i32 [[ARG:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[CMP:%.*]] = icmp ult i32 [[ARG]], 65536 @@ -1358,18 +1358,18 @@ define internal i1 @is_less_than_65536(i32 %arg) { } define i1 @check_divided_range(i32 %arg) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@check_divided_range ; TUNIT-SAME: (i32 [[ARG:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: ret i1 true ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@check_divided_range ; CGSCC-SAME: (i32 [[ARG:%.*]]) #[[ATTR3]] { -; CGSCC-NEXT: [[CSRET1:%.*]] = call i32 @less_than_65536(i32 noundef 0) #[[ATTR6]] -; CGSCC-NEXT: [[CSRET2:%.*]] = call i32 @less_than_65536(i32 [[ARG]]) #[[ATTR6]] -; CGSCC-NEXT: [[TRUE1:%.*]] = call i1 @is_less_than_65536(i32 [[CSRET1]]) #[[ATTR6]] -; CGSCC-NEXT: [[TRUE2:%.*]] = call i1 @is_less_than_65536(i32 [[CSRET2]]) #[[ATTR6]] +; CGSCC-NEXT: [[CSRET1:%.*]] = call i32 @less_than_65536(i32 noundef 0) #[[ATTR5]] +; CGSCC-NEXT: [[CSRET2:%.*]] = call i32 @less_than_65536(i32 [[ARG]]) #[[ATTR5]] +; CGSCC-NEXT: [[TRUE1:%.*]] = call i1 @is_less_than_65536(i32 [[CSRET1]]) #[[ATTR5]] +; CGSCC-NEXT: [[TRUE2:%.*]] = call i1 @is_less_than_65536(i32 [[CSRET2]]) #[[ATTR5]] ; CGSCC-NEXT: [[RET:%.*]] = and i1 [[TRUE1]], [[TRUE2]] ; CGSCC-NEXT: ret i1 [[RET]] ; @@ -1383,7 +1383,7 @@ define i1 @check_divided_range(i32 %arg) { define internal i32 @cast_and_return(i1 %c) { ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@cast_and_return ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[RET:%.*]] = zext i1 [[C]] to i32 @@ -1394,7 +1394,7 @@ define internal i32 @cast_and_return(i1 %c) { } define internal i1 @is_less_than_3(i32 %c) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@is_less_than_3 ; CGSCC-SAME: (i32 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[CMP:%.*]] = icmp slt i32 [[C]], 3 @@ -1405,18 +1405,18 @@ define internal i1 @is_less_than_3(i32 %c) { } define i1 @check_casted_range(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@check_casted_range ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: ret i1 true ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@check_casted_range ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR3]] { -; CGSCC-NEXT: [[CSRET1:%.*]] = call i32 @cast_and_return(i1 noundef true) #[[ATTR6]] -; CGSCC-NEXT: [[CSRET2:%.*]] = call i32 @cast_and_return(i1 [[C]]) #[[ATTR6]] +; CGSCC-NEXT: [[CSRET1:%.*]] = call i32 @cast_and_return(i1 noundef true) #[[ATTR5]] +; CGSCC-NEXT: [[CSRET2:%.*]] = call i32 @cast_and_return(i1 [[C]]) #[[ATTR5]] ; CGSCC-NEXT: [[ADD:%.*]] = add i32 [[CSRET1]], [[CSRET2]] -; CGSCC-NEXT: [[RET:%.*]] = call i1 @is_less_than_3(i32 [[ADD]]) #[[ATTR6]] +; CGSCC-NEXT: [[RET:%.*]] = call i1 @is_less_than_3(i32 [[ADD]]) #[[ATTR5]] ; CGSCC-NEXT: ret i1 [[RET]] ; %csret1 = call i32 @cast_and_return(i1 true) @@ -1427,7 +1427,7 @@ define i1 @check_casted_range(i1 %c) { } define internal i32 @less_than_100_1(i32 %c) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@less_than_100_1 ; CGSCC-SAME: (i32 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: switch i32 [[C]], label [[OTHERWISE:%.*]] [ @@ -1482,7 +1482,7 @@ otherwise: } define internal i1 @is_less_than_100_1(i32 %c) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@is_less_than_100_1 ; CGSCC-SAME: (i32 noundef [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[CMP:%.*]] = icmp slt i32 [[C]], 100 @@ -1493,16 +1493,16 @@ define internal i1 @is_less_than_100_1(i32 %c) { } define i1 @propagate_range1(i32 %c){ -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@propagate_range1 ; TUNIT-SAME: (i32 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: ret i1 true ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@propagate_range1 ; CGSCC-SAME: (i32 [[C:%.*]]) #[[ATTR3]] { -; CGSCC-NEXT: [[CSRET:%.*]] = call i32 @less_than_100_1(i32 [[C]]) #[[ATTR6]] -; CGSCC-NEXT: [[TRUE:%.*]] = call i1 @is_less_than_100_1(i32 noundef [[CSRET]]) #[[ATTR6]] +; CGSCC-NEXT: [[CSRET:%.*]] = call i32 @less_than_100_1(i32 [[C]]) #[[ATTR5]] +; CGSCC-NEXT: [[TRUE:%.*]] = call i1 @is_less_than_100_1(i32 noundef [[CSRET]]) #[[ATTR5]] ; CGSCC-NEXT: ret i1 [[TRUE]] ; %csret = call i32 @less_than_100_1(i32 %c) @@ -1512,7 +1512,7 @@ define i1 @propagate_range1(i32 %c){ define internal i32 @less_than_100_2(i32 %c) { ; -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@less_than_100_2 ; TUNIT-SAME: (i32 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: switch i32 [[C]], label [[OTHERWISE:%.*]] [ @@ -1541,7 +1541,7 @@ define internal i32 @less_than_100_2(i32 %c) { ; TUNIT: otherwise: ; TUNIT-NEXT: ret i32 99 ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@less_than_100_2 ; CGSCC-SAME: (i32 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: switch i32 [[C]], label [[OTHERWISE:%.*]] [ @@ -1597,13 +1597,13 @@ otherwise: define internal i1 @is_less_than_100_2(i32 %c) { ; -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@is_less_than_100_2 ; TUNIT-SAME: (i32 noundef [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: [[CMP:%.*]] = icmp slt i32 [[C]], 100 ; TUNIT-NEXT: ret i1 [[CMP]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@is_less_than_100_2 ; CGSCC-SAME: (i32 noundef [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[CMP:%.*]] = icmp slt i32 [[C]], 100 @@ -1614,23 +1614,23 @@ define internal i1 @is_less_than_100_2(i32 %c) { } define i1 @propagate_range2(i32 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@propagate_range2 ; TUNIT-SAME: (i32 [[C:%.*]]) #[[ATTR1]] { -; TUNIT-NEXT: [[CSRET1:%.*]] = call noundef i32 @less_than_100_2(i32 noundef 0) #[[ATTR4]] -; TUNIT-NEXT: [[TRUE1:%.*]] = call i1 @is_less_than_100_2(i32 noundef [[CSRET1]]) #[[ATTR4]] -; TUNIT-NEXT: [[CSRET2:%.*]] = call noundef i32 @less_than_100_2(i32 [[C]]) #[[ATTR4]] -; TUNIT-NEXT: [[TRUE2:%.*]] = call i1 @is_less_than_100_2(i32 noundef [[CSRET2]]) #[[ATTR4]] +; TUNIT-NEXT: [[CSRET1:%.*]] = call noundef i32 @less_than_100_2(i32 noundef 0) #[[ATTR3]] +; TUNIT-NEXT: [[TRUE1:%.*]] = call i1 @is_less_than_100_2(i32 noundef [[CSRET1]]) #[[ATTR3]] +; TUNIT-NEXT: [[CSRET2:%.*]] = call noundef i32 @less_than_100_2(i32 [[C]]) #[[ATTR3]] +; TUNIT-NEXT: [[TRUE2:%.*]] = call i1 @is_less_than_100_2(i32 noundef [[CSRET2]]) #[[ATTR3]] ; TUNIT-NEXT: [[TRUE:%.*]] = and i1 [[TRUE1]], [[TRUE2]] ; TUNIT-NEXT: ret i1 [[TRUE]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@propagate_range2 ; CGSCC-SAME: (i32 [[C:%.*]]) #[[ATTR3]] { -; CGSCC-NEXT: [[CSRET1:%.*]] = call i32 @less_than_100_2(i32 noundef 0) #[[ATTR6]] -; CGSCC-NEXT: [[TRUE1:%.*]] = call i1 @is_less_than_100_2(i32 noundef [[CSRET1]]) #[[ATTR6]] -; CGSCC-NEXT: [[CSRET2:%.*]] = call i32 @less_than_100_2(i32 [[C]]) #[[ATTR6]] -; CGSCC-NEXT: [[TRUE2:%.*]] = call i1 @is_less_than_100_2(i32 noundef [[CSRET2]]) #[[ATTR6]] +; CGSCC-NEXT: [[CSRET1:%.*]] = call i32 @less_than_100_2(i32 noundef 0) #[[ATTR5]] +; CGSCC-NEXT: [[TRUE1:%.*]] = call i1 @is_less_than_100_2(i32 noundef [[CSRET1]]) #[[ATTR5]] +; CGSCC-NEXT: [[CSRET2:%.*]] = call i32 @less_than_100_2(i32 [[C]]) #[[ATTR5]] +; CGSCC-NEXT: [[TRUE2:%.*]] = call i1 @is_less_than_100_2(i32 noundef [[CSRET2]]) #[[ATTR5]] ; CGSCC-NEXT: [[TRUE:%.*]] = and i1 [[TRUE1]], [[TRUE2]] ; CGSCC-NEXT: ret i1 [[TRUE]] ; @@ -1643,13 +1643,13 @@ define i1 @propagate_range2(i32 %c) { } define internal i1 @non_zero(i8 %v) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@non_zero ; TUNIT-SAME: (i8 [[V:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: [[R:%.*]] = icmp ne i8 [[V]], 0 ; TUNIT-NEXT: ret i1 [[R]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@non_zero ; CGSCC-SAME: (i8 [[V:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[R:%.*]] = icmp ne i8 [[V]], 0 @@ -1661,26 +1661,26 @@ define internal i1 @non_zero(i8 %v) { ; Avoid range metadata for %l below define i1 @context(i8* %p) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@context ; TUNIT-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1) [[P:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: [[L:%.*]] = load i8, i8* [[P]], align 1 ; TUNIT-NEXT: [[C:%.*]] = icmp slt i8 0, [[L]] ; TUNIT-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; TUNIT: t: -; TUNIT-NEXT: [[R:%.*]] = call i1 @non_zero(i8 [[L]]) #[[ATTR4]] +; TUNIT-NEXT: [[R:%.*]] = call i1 @non_zero(i8 [[L]]) #[[ATTR3]] ; TUNIT-NEXT: ret i1 [[R]] ; TUNIT: f: ; TUNIT-NEXT: ret i1 false ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@context ; CGSCC-SAME: (i8* nocapture nofree noundef nonnull readonly dereferenceable(1) [[P:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: [[L:%.*]] = load i8, i8* [[P]], align 1 ; CGSCC-NEXT: [[C:%.*]] = icmp slt i8 0, [[L]] ; CGSCC-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; CGSCC: t: -; CGSCC-NEXT: [[R:%.*]] = call i1 @non_zero(i8 [[L]]) #[[ATTR6]] +; CGSCC-NEXT: [[R:%.*]] = call i1 @non_zero(i8 [[L]]) #[[ATTR5]] ; CGSCC-NEXT: ret i1 [[R]] ; CGSCC: f: ; CGSCC-NEXT: ret i1 false @@ -1759,7 +1759,7 @@ bb3: ; preds = %bb2, %bb1 } define i1 @loop_1(i32 %N) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone +; TUNIT: Function Attrs: nofree norecurse nosync nounwind memory(none) ; TUNIT-LABEL: define {{[^@]+}}@loop_1 ; TUNIT-SAME: (i32 [[N:%.*]]) #[[ATTR2:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -1774,7 +1774,7 @@ define i1 @loop_1(i32 %N) { ; TUNIT-NEXT: [[R:%.*]] = icmp sle i32 [[I]], 5 ; TUNIT-NEXT: ret i1 [[R]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone +; CGSCC: Function Attrs: nofree norecurse nosync nounwind memory(none) ; CGSCC-LABEL: define {{[^@]+}}@loop_1 ; CGSCC-SAME: (i32 [[N:%.*]]) #[[ATTR4:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -1810,19 +1810,17 @@ declare void @barney(i32 signext, i32 signext) !0 = !{i32 0, i32 10} !1 = !{i32 10, i32 100} ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind readnone } -; TUNIT: attributes #[[ATTR3]] = { nofree nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR4]] = { nofree nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind memory(none) } +; TUNIT: attributes #[[ATTR3]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR1]] = { argmemonly nofree nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR3]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind readnone } -; CGSCC: attributes #[[ATTR5]] = { readonly willreturn } -; CGSCC: attributes #[[ATTR6]] = { readnone willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(argmem: read) } +; CGSCC: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR3]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind memory(none) } +; CGSCC: attributes #[[ATTR5]] = { willreturn } ;. ; TUNIT: [[RNG0]] = !{i32 0, i32 10} ; TUNIT: [[RNG1]] = !{i32 10, i32 100} diff --git a/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll b/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll index 4599dd2abb16d..e528007cada60 100644 --- a/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll +++ b/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll @@ -33,7 +33,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define i32* @external_ret2_nrw(i32* %n0, i32* %r0, i32* %w0) { -; TUNIT: Function Attrs: argmemonly nofree nosync nounwind +; TUNIT: Function Attrs: nofree nosync nounwind memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@external_ret2_nrw ; TUNIT-SAME: (i32* nofree [[N0:%.*]], i32* nofree [[R0:%.*]], i32* nofree returned [[W0:%.*]]) #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -43,7 +43,7 @@ define i32* @external_ret2_nrw(i32* %n0, i32* %r0, i32* %w0) { ; TUNIT-NEXT: [[CALL3:%.*]] = call i32* @internal_ret1_rw(i32* nofree align 4 [[R0]], i32* nofree [[W0]]) #[[ATTR3]] ; TUNIT-NEXT: ret i32* [[W0]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind +; CGSCC: Function Attrs: nofree nosync nounwind memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@external_ret2_nrw ; CGSCC-SAME: (i32* nofree [[N0:%.*]], i32* nofree [[R0:%.*]], i32* nofree returned [[W0:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -62,7 +62,7 @@ entry: } define internal i32* @internal_ret0_nw(i32* %n0, i32* %w0) { -; TUNIT: Function Attrs: argmemonly nofree nosync nounwind +; TUNIT: Function Attrs: nofree nosync nounwind memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@internal_ret0_nw ; TUNIT-SAME: (i32* nofree [[N0:%.*]], i32* nofree [[W0:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: entry: @@ -87,7 +87,7 @@ define internal i32* @internal_ret0_nw(i32* %n0, i32* %w0) { ; TUNIT-NEXT: [[RETVAL_0:%.*]] = phi i32* [ [[CALL5]], [[IF_END]] ], [ [[N0]], [[IF_THEN]] ] ; TUNIT-NEXT: ret i32* [[RETVAL_0]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind +; CGSCC: Function Attrs: nofree nosync nounwind memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@internal_ret0_nw ; CGSCC-SAME: (i32* nofree [[N0:%.*]], i32* nofree [[W0:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: entry: @@ -104,8 +104,8 @@ define internal i32* @internal_ret0_nw(i32* %n0, i32* %w0) { ; CGSCC-NEXT: [[CALL:%.*]] = call i32* @internal_ret1_rrw(i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nofree noundef nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]] ; CGSCC-NEXT: [[CALL1:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]] ; CGSCC-NEXT: [[CALL2:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree noundef nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]] -; CGSCC-NEXT: [[CALL3:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) #[[ATTR3]] -; CGSCC-NEXT: [[CALL4:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R1]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) #[[ATTR3]] +; CGSCC-NEXT: [[CALL3:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) #[[ATTR4:[0-9]+]] +; CGSCC-NEXT: [[CALL4:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R1]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) #[[ATTR4]] ; CGSCC-NEXT: [[CALL5:%.*]] = call i32* @internal_ret0_nw(i32* nofree [[N0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]] ; CGSCC-NEXT: br label [[RETURN]] ; CGSCC: return: @@ -139,7 +139,7 @@ return: ; preds = %if.end, %if.then } define internal i32* @internal_ret1_rrw(i32* %r0, i32* %r1, i32* %w0) { -; TUNIT: Function Attrs: argmemonly nofree nosync nounwind +; TUNIT: Function Attrs: nofree nosync nounwind memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@internal_ret1_rrw ; TUNIT-SAME: (i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0:%.*]], i32* nofree align 4 [[R1:%.*]], i32* nofree [[W0:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: entry: @@ -167,7 +167,7 @@ define internal i32* @internal_ret1_rrw(i32* %r0, i32* %r1, i32* %w0) { ; TUNIT-NEXT: [[RETVAL_0:%.*]] = phi i32* [ [[CALL8]], [[IF_END]] ], [ [[R1]], [[IF_THEN]] ] ; TUNIT-NEXT: ret i32* undef ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind +; CGSCC: Function Attrs: nofree nosync nounwind memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@internal_ret1_rrw ; CGSCC-SAME: (i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0:%.*]], i32* nofree align 4 [[R1:%.*]], i32* nofree [[W0:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: entry: @@ -187,8 +187,8 @@ define internal i32* @internal_ret1_rrw(i32* %r0, i32* %r1, i32* %w0) { ; CGSCC-NEXT: [[CALL3:%.*]] = call i32* @internal_ret0_nw(i32* nofree nonnull align 4 dereferenceable(4) [[W0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]] ; CGSCC-NEXT: [[CALL4:%.*]] = call i32* @external_ret2_nrw(i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]] ; CGSCC-NEXT: [[CALL5:%.*]] = call i32* @external_ret2_nrw(i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]] -; CGSCC-NEXT: [[CALL6:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[R1]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) #[[ATTR3]] -; CGSCC-NEXT: [[CALL7:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) #[[ATTR3]] +; CGSCC-NEXT: [[CALL6:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[R1]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) #[[ATTR4]] +; CGSCC-NEXT: [[CALL7:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) #[[ATTR4]] ; CGSCC-NEXT: [[CALL8:%.*]] = call i32* @internal_ret0_nw(i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]] ; CGSCC-NEXT: br label [[RETURN]] ; CGSCC: return: @@ -225,7 +225,7 @@ return: ; preds = %if.end, %if.then } define i32* @external_sink_ret2_nrw(i32* %n0, i32* %r0, i32* %w0) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@external_sink_ret2_nrw ; CHECK-SAME: (i32* nofree [[N0:%.*]], i32* nocapture nofree readonly [[R0:%.*]], i32* nofree returned writeonly "no-capture-maybe-returned" [[W0:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: @@ -257,7 +257,7 @@ return: ; preds = %if.end, %if.then } define internal i32* @internal_ret1_rw(i32* %r0, i32* %w0) { -; TUNIT: Function Attrs: argmemonly nofree nosync nounwind +; TUNIT: Function Attrs: nofree nosync nounwind memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@internal_ret1_rw ; TUNIT-SAME: (i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0:%.*]], i32* nofree [[W0:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: entry: @@ -279,7 +279,7 @@ define internal i32* @internal_ret1_rw(i32* %r0, i32* %w0) { ; TUNIT-NEXT: [[RETVAL_0:%.*]] = phi i32* [ [[CALL4]], [[IF_END]] ], [ [[W0]], [[IF_THEN]] ] ; TUNIT-NEXT: ret i32* [[RETVAL_0]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind +; CGSCC: Function Attrs: nofree nosync nounwind memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@internal_ret1_rw ; CGSCC-SAME: (i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0:%.*]], i32* nofree [[W0:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: entry: @@ -294,7 +294,7 @@ define internal i32* @internal_ret1_rw(i32* %r0, i32* %w0) { ; CGSCC-NEXT: store i32 [[TMP1]], i32* [[W0]], align 4 ; CGSCC-NEXT: [[CALL1:%.*]] = call i32* @internal_ret0_nw(i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]] ; CGSCC-NEXT: [[CALL2:%.*]] = call i32* @internal_ret0_nw(i32* nofree nonnull align 4 dereferenceable(4) [[W0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]] -; CGSCC-NEXT: [[CALL3:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) #[[ATTR3]] +; CGSCC-NEXT: [[CALL3:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) #[[ATTR4]] ; CGSCC-NEXT: [[CALL4:%.*]] = call i32* @external_ret2_nrw(i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]] ; CGSCC-NEXT: br label [[RETURN]] ; CGSCC: return: @@ -325,7 +325,7 @@ return: ; preds = %if.end, %if.then } define i32* @external_source_ret2_nrw(i32* %n0, i32* %r0, i32* %w0) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind +; TUNIT: Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@external_source_ret2_nrw ; TUNIT-SAME: (i32* nofree [[N0:%.*]], i32* nofree [[R0:%.*]], i32* nofree returned [[W0:%.*]]) #[[ATTR2:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -333,11 +333,11 @@ define i32* @external_source_ret2_nrw(i32* %n0, i32* %r0, i32* %w0) { ; TUNIT-NEXT: [[CALL1:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree [[R0]], i32* nofree [[W0]]) #[[ATTR3]] ; TUNIT-NEXT: ret i32* [[W0]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind +; CGSCC: Function Attrs: nofree nosync nounwind memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@external_source_ret2_nrw ; CGSCC-SAME: (i32* nofree [[N0:%.*]], i32* nofree [[R0:%.*]], i32* nofree [[W0:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[CALL:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree readonly [[R0]], i32* nofree writeonly [[W0]]) #[[ATTR4:[0-9]+]] +; CGSCC-NEXT: [[CALL:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree readonly [[R0]], i32* nofree writeonly [[W0]]) #[[ATTR5:[0-9]+]] ; CGSCC-NEXT: [[CALL1:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree [[R0]], i32* nofree [[W0]]) #[[ATTR3]] ; CGSCC-NEXT: ret i32* [[CALL1]] ; @@ -350,15 +350,16 @@ entry: ; Verify that we see only expected attribute sets, the above lines only check ; for a subset relation. ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly nofree nosync nounwind } -; TUNIT: attributes #[[ATTR1]] = { argmemonly nofree norecurse nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR2]] = { argmemonly nofree norecurse nosync nounwind } +; TUNIT: attributes #[[ATTR0]] = { nofree nosync nounwind memory(argmem: readwrite) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } +; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind memory(argmem: readwrite) } ; TUNIT: attributes #[[ATTR3]] = { nofree nosync nounwind } ; TUNIT: attributes #[[ATTR4]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree nosync nounwind } -; CGSCC: attributes #[[ATTR1]] = { argmemonly nofree norecurse nosync nounwind willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree nosync nounwind memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } ; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind } ; CGSCC: attributes #[[ATTR3]] = { nounwind } -; CGSCC: attributes #[[ATTR4]] = { nounwind willreturn } +; CGSCC: attributes #[[ATTR4]] = { nounwind memory(readwrite) } +; CGSCC: attributes #[[ATTR5]] = { nounwind willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/readattrs.ll b/llvm/test/Transforms/Attributor/readattrs.ll index 2ef52f86827d3..d3936a18a3983 100644 --- a/llvm/test/Transforms/Attributor/readattrs.ll +++ b/llvm/test/Transforms/Attributor/readattrs.ll @@ -26,7 +26,7 @@ define void @test1_2(i8* %x1_2, i8* %y1_2, i8* %z1_2) { } define i8* @test2(i8* %p) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CHECK-LABEL: define {{[^@]+}}@test2 ; CHECK-SAME: (i8* nofree readnone returned "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: store i32 0, i32* @x, align 4 @@ -37,7 +37,7 @@ define i8* @test2(i8* %p) { } define i1 @test3(i8* %p, i8* %q) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test3 ; CHECK-SAME: (i8* nofree readnone [[P:%.*]], i8* nofree readnone [[Q:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: [[A:%.*]] = icmp ult i8* [[P]], [[Q]] @@ -50,10 +50,10 @@ define i1 @test3(i8* %p, i8* %q) { declare void @test4_1(i8* nocapture) readonly define void @test4_2(i8* %p) { -; CHECK: Function Attrs: readonly +; CHECK: Function Attrs: memory(read) ; CHECK-LABEL: define {{[^@]+}}@test4_2 ; CHECK-SAME: (i8* nocapture readonly [[P:%.*]]) #[[ATTR2:[0-9]+]] { -; CHECK-NEXT: call void @test4_1(i8* nocapture readonly [[P]]) #[[ATTR2]] +; CHECK-NEXT: call void @test4_1(i8* nocapture readonly [[P]]) ; CHECK-NEXT: ret void ; call void @test4_1(i8* %p) @@ -62,7 +62,7 @@ define void @test4_2(i8* %p) { ; Missed optz'n: we could make %q readnone, but don't break test6! define void @test5(i8** %p, i8* %q) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@test5 ; CHECK-SAME: (i8** nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[P:%.*]], i8* nofree writeonly [[Q:%.*]]) #[[ATTR3:[0-9]+]] { ; CHECK-NEXT: store i8* [[Q]], i8** [[P]], align 8 @@ -88,7 +88,7 @@ define void @test6_2(i8** %p, i8* %q) { ; inalloca parameters are always considered written define void @test7_1(i32* inalloca(i32) %a) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test7_1 ; CHECK-SAME: (i32* nocapture nofree nonnull writeonly inalloca(i32) dereferenceable(4) [[A:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: ret void @@ -97,7 +97,7 @@ define void @test7_1(i32* inalloca(i32) %a) { } define i32* @test8_1(i32* %p) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test8_1 ; CHECK-SAME: (i32* nofree readnone returned "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: entry: @@ -108,14 +108,14 @@ entry: } define void @test8_2(i32* %p) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@test8_2 ; TUNIT-SAME: (i32* nocapture nofree writeonly [[P:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: store i32 10, i32* [[P]], align 4 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@test8_2 ; CGSCC-SAME: (i32* nofree writeonly [[P:%.*]]) #[[ATTR4:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -135,16 +135,16 @@ declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>%val, <4 x i32*>, i32, ; CHECK-NOT: readnone ; CHECK-NOT: readonly define void @test9(<4 x i32*> %ptrs, <4 x i32>%val) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@test9 ; TUNIT-SAME: (<4 x i32*> [[PTRS:%.*]], <4 x i32> [[VAL:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> [[VAL]], <4 x i32*> [[PTRS]], i32 noundef 4, <4 x i1> noundef ) #[[ATTR12:[0-9]+]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@test9 ; CGSCC-SAME: (<4 x i32*> [[PTRS:%.*]], <4 x i32> [[VAL:%.*]]) #[[ATTR0]] { -; CGSCC-NEXT: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> [[VAL]], <4 x i32*> [[PTRS]], i32 noundef 4, <4 x i1> noundef ) #[[ATTR14:[0-9]+]] +; CGSCC-NEXT: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> [[VAL]], <4 x i32*> [[PTRS]], i32 noundef 4, <4 x i1> noundef ) #[[ATTR13]] ; CGSCC-NEXT: ret void ; call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>%val, <4 x i32*> %ptrs, i32 4, <4 x i1>) @@ -154,16 +154,16 @@ define void @test9(<4 x i32*> %ptrs, <4 x i32>%val) { ; CHECK: declare <4 x i32> @llvm.masked.gather declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>) define <4 x i32> @test10(<4 x i32*> %ptrs) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) ; TUNIT-LABEL: define {{[^@]+}}@test10 ; TUNIT-SAME: (<4 x i32*> [[PTRS:%.*]]) #[[ATTR6:[0-9]+]] { -; TUNIT-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[PTRS]], i32 noundef 4, <4 x i1> noundef , <4 x i32> undef) #[[ATTR13:[0-9]+]] +; TUNIT-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[PTRS]], i32 noundef 4, <4 x i1> noundef , <4 x i32> undef) #[[ATTR12]] ; TUNIT-NEXT: ret <4 x i32> [[RES]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) ; CGSCC-LABEL: define {{[^@]+}}@test10 ; CGSCC-SAME: (<4 x i32*> [[PTRS:%.*]]) #[[ATTR7:[0-9]+]] { -; CGSCC-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[PTRS]], i32 noundef 4, <4 x i1> noundef , <4 x i32> undef) #[[ATTR15:[0-9]+]] +; CGSCC-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[PTRS]], i32 noundef 4, <4 x i1> noundef , <4 x i32> undef) #[[ATTR13]] ; CGSCC-NEXT: ret <4 x i32> [[RES]] ; %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1>, <4 x i32>undef) @@ -173,16 +173,16 @@ define <4 x i32> @test10(<4 x i32*> %ptrs) { ; CHECK: declare <4 x i32> @test11_1 declare <4 x i32> @test11_1(<4 x i32*>) argmemonly nounwind readonly define <4 x i32> @test11_2(<4 x i32*> %ptrs) { -; TUNIT: Function Attrs: argmemonly nounwind readonly +; TUNIT: Function Attrs: nounwind memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@test11_2 ; TUNIT-SAME: (<4 x i32*> [[PTRS:%.*]]) #[[ATTR7:[0-9]+]] { -; TUNIT-NEXT: [[RES:%.*]] = call <4 x i32> @test11_1(<4 x i32*> [[PTRS]]) #[[ATTR11:[0-9]+]] +; TUNIT-NEXT: [[RES:%.*]] = call <4 x i32> @test11_1(<4 x i32*> [[PTRS]]) #[[ATTR13:[0-9]+]] ; TUNIT-NEXT: ret <4 x i32> [[RES]] ; -; CGSCC: Function Attrs: argmemonly nounwind readonly +; CGSCC: Function Attrs: nounwind memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@test11_2 ; CGSCC-SAME: (<4 x i32*> [[PTRS:%.*]]) #[[ATTR8:[0-9]+]] { -; CGSCC-NEXT: [[RES:%.*]] = call <4 x i32> @test11_1(<4 x i32*> [[PTRS]]) #[[ATTR12:[0-9]+]] +; CGSCC-NEXT: [[RES:%.*]] = call <4 x i32> @test11_1(<4 x i32*> [[PTRS]]) #[[ATTR14:[0-9]+]] ; CGSCC-NEXT: ret <4 x i32> [[RES]] ; %res = call <4 x i32> @test11_1(<4 x i32*> %ptrs) @@ -192,16 +192,16 @@ define <4 x i32> @test11_2(<4 x i32*> %ptrs) { declare <4 x i32> @test12_1(<4 x i32*>) argmemonly nounwind ; CHECK-NOT: readnone define <4 x i32> @test12_2(<4 x i32*> %ptrs) { -; TUNIT: Function Attrs: argmemonly nounwind +; TUNIT: Function Attrs: nounwind memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@test12_2 ; TUNIT-SAME: (<4 x i32*> [[PTRS:%.*]]) #[[ATTR8:[0-9]+]] { -; TUNIT-NEXT: [[RES:%.*]] = call <4 x i32> @test12_1(<4 x i32*> [[PTRS]]) #[[ATTR14:[0-9]+]] +; TUNIT-NEXT: [[RES:%.*]] = call <4 x i32> @test12_1(<4 x i32*> [[PTRS]]) #[[ATTR13]] ; TUNIT-NEXT: ret <4 x i32> [[RES]] ; -; CGSCC: Function Attrs: argmemonly nounwind +; CGSCC: Function Attrs: nounwind memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@test12_2 ; CGSCC-SAME: (<4 x i32*> [[PTRS:%.*]]) #[[ATTR9:[0-9]+]] { -; CGSCC-NEXT: [[RES:%.*]] = call <4 x i32> @test12_1(<4 x i32*> [[PTRS]]) #[[ATTR16:[0-9]+]] +; CGSCC-NEXT: [[RES:%.*]] = call <4 x i32> @test12_1(<4 x i32*> [[PTRS]]) #[[ATTR14]] ; CGSCC-NEXT: ret <4 x i32> [[RES]] ; %res = call <4 x i32> @test12_1(<4 x i32*> %ptrs) @@ -209,13 +209,13 @@ define <4 x i32> @test12_2(<4 x i32*> %ptrs) { } define i32 @volatile_load(i32* %p) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@volatile_load ; TUNIT-SAME: (i32* nofree noundef align 4 [[P:%.*]]) #[[ATTR9:[0-9]+]] { ; TUNIT-NEXT: [[LOAD:%.*]] = load volatile i32, i32* [[P]], align 4 ; TUNIT-NEXT: ret i32 [[LOAD]] ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@volatile_load ; CGSCC-SAME: (i32* nofree noundef align 4 [[P:%.*]]) #[[ATTR10:[0-9]+]] { ; CGSCC-NEXT: [[LOAD:%.*]] = load volatile i32, i32* [[P]], align 4 @@ -271,7 +271,7 @@ define void @unsound_readonly(i8* %ignored, i8* %escaped_then_written) { declare void @escape_i8(i8* %ptr) define void @byval_not_readonly_1(i8* byval(i8) %written) readonly { -; CHECK: Function Attrs: readonly +; CHECK: Function Attrs: memory(read) ; CHECK-LABEL: define {{[^@]+}}@byval_not_readonly_1 ; CHECK-SAME: (i8* noalias nonnull byval(i8) dereferenceable(1) [[WRITTEN:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: call void @escape_i8(i8* nonnull dereferenceable(1) [[WRITTEN]]) @@ -282,7 +282,7 @@ define void @byval_not_readonly_1(i8* byval(i8) %written) readonly { } define void @byval_not_readonly_2(i8* byval(i8) %written) readonly { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@byval_not_readonly_2 ; CHECK-SAME: (i8* noalias nocapture nofree noundef nonnull writeonly byval(i8) dereferenceable(1) [[WRITTEN:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: store i8 0, i8* [[WRITTEN]], align 1 @@ -293,13 +293,13 @@ define void @byval_not_readonly_2(i8* byval(i8) %written) readonly { } define void @byval_not_readnone_1(i8* byval(i8) %written) readnone { -; TUNIT: Function Attrs: readnone +; TUNIT: Function Attrs: memory(none) ; TUNIT-LABEL: define {{[^@]+}}@byval_not_readnone_1 ; TUNIT-SAME: (i8* noalias nonnull byval(i8) dereferenceable(1) [[WRITTEN:%.*]]) #[[ATTR10:[0-9]+]] { ; TUNIT-NEXT: call void @escape_i8(i8* nonnull dereferenceable(1) [[WRITTEN]]) ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: readnone +; CGSCC: Function Attrs: memory(none) ; CGSCC-LABEL: define {{[^@]+}}@byval_not_readnone_1 ; CGSCC-SAME: (i8* noalias nonnull byval(i8) dereferenceable(1) [[WRITTEN:%.*]]) #[[ATTR11:[0-9]+]] { ; CGSCC-NEXT: call void @escape_i8(i8* nonnull dereferenceable(1) [[WRITTEN]]) @@ -310,7 +310,7 @@ define void @byval_not_readnone_1(i8* byval(i8) %written) readnone { } define void @byval_not_readnone_2(i8* byval(i8) %written) readnone { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@byval_not_readnone_2 ; CHECK-SAME: (i8* noalias nocapture nofree noundef nonnull writeonly byval(i8) dereferenceable(1) [[WRITTEN:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: store i8 0, i8* [[WRITTEN]], align 1 @@ -321,7 +321,7 @@ define void @byval_not_readnone_2(i8* byval(i8) %written) readnone { } define void @byval_no_fnarg(i8* byval(i8) %written) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@byval_no_fnarg ; CHECK-SAME: (i8* noalias nocapture nofree noundef nonnull writeonly byval(i8) dereferenceable(1) [[WRITTEN:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: store i8 0, i8* [[WRITTEN]], align 1 @@ -334,16 +334,16 @@ define void @byval_no_fnarg(i8* byval(i8) %written) { define void @testbyval(i8* %read_only) { ; TUNIT-LABEL: define {{[^@]+}}@testbyval ; TUNIT-SAME: (i8* nocapture readonly [[READ_ONLY:%.*]]) { -; TUNIT-NEXT: call void @byval_not_readonly_1(i8* nocapture readonly byval(i8) [[READ_ONLY]]) #[[ATTR2]] +; TUNIT-NEXT: call void @byval_not_readonly_1(i8* nocapture readonly byval(i8) [[READ_ONLY]]) ; TUNIT-NEXT: call void @byval_not_readnone_1(i8* noalias nocapture readnone byval(i8) [[READ_ONLY]]) -; TUNIT-NEXT: call void @byval_no_fnarg(i8* nocapture nofree readonly byval(i8) [[READ_ONLY]]) #[[ATTR15:[0-9]+]] +; TUNIT-NEXT: call void @byval_no_fnarg(i8* nocapture nofree readonly byval(i8) [[READ_ONLY]]) #[[ATTR13]] ; TUNIT-NEXT: ret void ; ; CGSCC-LABEL: define {{[^@]+}}@testbyval ; CGSCC-SAME: (i8* nocapture noundef nonnull readonly dereferenceable(1) [[READ_ONLY:%.*]]) { -; CGSCC-NEXT: call void @byval_not_readonly_1(i8* noalias nocapture noundef nonnull readonly byval(i8) dereferenceable(1) [[READ_ONLY]]) #[[ATTR2]] +; CGSCC-NEXT: call void @byval_not_readonly_1(i8* noalias nocapture noundef nonnull readonly byval(i8) dereferenceable(1) [[READ_ONLY]]) ; CGSCC-NEXT: call void @byval_not_readnone_1(i8* noalias nocapture noundef nonnull readnone byval(i8) dereferenceable(1) [[READ_ONLY]]) -; CGSCC-NEXT: call void @byval_no_fnarg(i8* noalias nocapture nofree noundef nonnull readnone byval(i8) dereferenceable(1) [[READ_ONLY]]) #[[ATTR17:[0-9]+]] +; CGSCC-NEXT: call void @byval_no_fnarg(i8* noalias nocapture nofree noundef nonnull readnone byval(i8) dereferenceable(1) [[READ_ONLY]]) #[[ATTR14]] ; CGSCC-NEXT: ret void ; call void @byval_not_readonly_1(i8* byval(i8) %read_only) @@ -360,18 +360,18 @@ declare i8 @maybe_returned_val(i8* %ptr) readonly nounwind declare void @val_use(i8 %ptr) readonly nounwind define void @ptr_uses(i8* %ptr) { -; TUNIT: Function Attrs: nounwind readonly +; TUNIT: Function Attrs: nounwind memory(read) ; TUNIT-LABEL: define {{[^@]+}}@ptr_uses -; TUNIT-SAME: (i8* nocapture readonly [[PTR:%.*]]) #[[ATTR11]] { -; TUNIT-NEXT: [[CALL_PTR:%.*]] = call i8* @maybe_returned_ptr(i8* readonly [[PTR]]) #[[ATTR11]] -; TUNIT-NEXT: [[CALL_VAL:%.*]] = call i8 @maybe_returned_val(i8* readonly [[CALL_PTR]]) #[[ATTR11]] +; TUNIT-SAME: (i8* nocapture readonly [[PTR:%.*]]) #[[ATTR11:[0-9]+]] { +; TUNIT-NEXT: [[CALL_PTR:%.*]] = call i8* @maybe_returned_ptr(i8* readonly [[PTR]]) #[[ATTR13]] +; TUNIT-NEXT: [[CALL_VAL:%.*]] = call i8 @maybe_returned_val(i8* readonly [[CALL_PTR]]) #[[ATTR13]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nounwind readonly +; CGSCC: Function Attrs: nounwind memory(read) ; CGSCC-LABEL: define {{[^@]+}}@ptr_uses -; CGSCC-SAME: (i8* nocapture readonly [[PTR:%.*]]) #[[ATTR12]] { -; CGSCC-NEXT: [[CALL_PTR:%.*]] = call i8* @maybe_returned_ptr(i8* readonly [[PTR]]) #[[ATTR12]] -; CGSCC-NEXT: [[CALL_VAL:%.*]] = call i8 @maybe_returned_val(i8* readonly [[CALL_PTR]]) #[[ATTR12]] +; CGSCC-SAME: (i8* nocapture readonly [[PTR:%.*]]) #[[ATTR12:[0-9]+]] { +; CGSCC-NEXT: [[CALL_PTR:%.*]] = call i8* @maybe_returned_ptr(i8* readonly [[PTR]]) #[[ATTR14]] +; CGSCC-NEXT: [[CALL_VAL:%.*]] = call i8 @maybe_returned_val(i8* readonly [[CALL_PTR]]) #[[ATTR14]] ; CGSCC-NEXT: ret void ; %call_ptr = call i8* @maybe_returned_ptr(i8* %ptr) @@ -410,7 +410,7 @@ define void @ptr_use_chain(i8* %ptr) { @constant_mem = external dso_local constant i32, align 4 define i32 @read_only_constant_mem() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@read_only_constant_mem ; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: [[L:%.*]] = load i32, i32* @constant_mem, align 4 @@ -420,39 +420,34 @@ define i32 @read_only_constant_mem() { ret i32 %l } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR2]] = { readonly } -; TUNIT: attributes #[[ATTR3]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR6]] = { nofree norecurse nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR7]] = { argmemonly nounwind readonly } -; TUNIT: attributes #[[ATTR8]] = { argmemonly nounwind } -; TUNIT: attributes #[[ATTR9]] = { argmemonly nofree norecurse nounwind willreturn } -; TUNIT: attributes #[[ATTR10]] = { readnone } -; TUNIT: attributes #[[ATTR11]] = { nounwind readonly } -; TUNIT: attributes #[[ATTR12]] = { willreturn writeonly } -; TUNIT: attributes #[[ATTR13]] = { readonly willreturn } -; TUNIT: attributes #[[ATTR14]] = { nounwind } -; TUNIT: attributes #[[ATTR15]] = { nounwind writeonly } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(write) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR2]] = { memory(read) } +; TUNIT: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; TUNIT: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(write) } +; TUNIT: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(read) } +; TUNIT: attributes #[[ATTR6]] = { nofree norecurse nosync nounwind willreturn memory(read) } +; TUNIT: attributes #[[ATTR7]] = { nounwind memory(argmem: read) } +; TUNIT: attributes #[[ATTR8]] = { nounwind memory(argmem: readwrite) } +; TUNIT: attributes #[[ATTR9]] = { nofree norecurse nounwind willreturn memory(argmem: readwrite) } +; TUNIT: attributes #[[ATTR10]] = { memory(none) } +; TUNIT: attributes #[[ATTR11]] = { nounwind memory(read) } +; TUNIT: attributes #[[ATTR12]] = { willreturn } +; TUNIT: attributes #[[ATTR13]] = { nounwind } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { readonly } -; CGSCC: attributes #[[ATTR3]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR4]] = { nofree nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR7]] = { nofree norecurse nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR8]] = { argmemonly nounwind readonly } -; CGSCC: attributes #[[ATTR9]] = { argmemonly nounwind } -; CGSCC: attributes #[[ATTR10]] = { argmemonly nofree norecurse nounwind willreturn } -; CGSCC: attributes #[[ATTR11]] = { readnone } -; CGSCC: attributes #[[ATTR12]] = { nounwind readonly } -; CGSCC: attributes #[[ATTR13]] = { readnone willreturn } -; CGSCC: attributes #[[ATTR14]] = { willreturn writeonly } -; CGSCC: attributes #[[ATTR15]] = { readonly willreturn } -; CGSCC: attributes #[[ATTR16]] = { nounwind } -; CGSCC: attributes #[[ATTR17]] = { nounwind writeonly } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(write) } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { memory(read) } +; CGSCC: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; CGSCC: attributes #[[ATTR4]] = { nofree nosync nounwind willreturn memory(write) } +; CGSCC: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(write) } +; CGSCC: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(read) } +; CGSCC: attributes #[[ATTR7]] = { nofree norecurse nosync nounwind willreturn memory(read) } +; CGSCC: attributes #[[ATTR8]] = { nounwind memory(argmem: read) } +; CGSCC: attributes #[[ATTR9]] = { nounwind memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR10]] = { nofree norecurse nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR11]] = { memory(none) } +; CGSCC: attributes #[[ATTR12]] = { nounwind memory(read) } +; CGSCC: attributes #[[ATTR13]] = { willreturn } +; CGSCC: attributes #[[ATTR14]] = { nounwind } ;. diff --git a/llvm/test/Transforms/Attributor/returned.ll b/llvm/test/Transforms/Attributor/returned.ll index 5e65e826f96cc..555ffe113c3d0 100644 --- a/llvm/test/Transforms/Attributor/returned.ll +++ b/llvm/test/Transforms/Attributor/returned.ll @@ -41,7 +41,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; CHECK: @[[_ZTI1Y:[a-zA-Z0-9_$"\\.-]+]] = external dso_local constant { i8*, i8*, i8* }, align 8 ;. define i32 @sink_r0(i32 %r) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@sink_r0 ; CHECK-SAME: (i32 returned [[R:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -52,14 +52,14 @@ entry: } define i32 @scc_r1(i32 %a, i32 %r, i32 %b) #0 { -; TUNIT: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; TUNIT: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@scc_r1 ; TUNIT-SAME: (i32 [[A:%.*]], i32 returned [[R:%.*]], i32 [[B:%.*]]) #[[ATTR1:[0-9]+]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[CALL1:%.*]] = call i32 @scc_r2(i32 [[R]], i32 [[A]], i32 [[R]]) #[[ATTR10:[0-9]+]] ; TUNIT-NEXT: ret i32 [[R]] ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@scc_r1 ; CGSCC-SAME: (i32 [[A:%.*]], i32 returned [[R:%.*]], i32 [[B:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -73,7 +73,7 @@ entry: } define i32 @scc_r2(i32 %a, i32 %b, i32 %r) #0 { -; TUNIT: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; TUNIT: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@scc_r2 ; TUNIT-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 returned [[R:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: @@ -108,7 +108,7 @@ define i32 @scc_r2(i32 %a, i32 %b, i32 %r) #0 { ; TUNIT-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[R]], [[IF_THEN]] ], [ [[R]], [[IF_THEN3]] ], [ [[COND]], [[COND_END]] ] ; TUNIT-NEXT: ret i32 [[R]] ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@scc_r2 ; CGSCC-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 returned [[R:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: entry: @@ -188,7 +188,7 @@ return: ; preds = %cond.end, %if.then3 } define i32 @scc_rX(i32 %a, i32 %b, i32 %r) #0 { -; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind readnone uwtable +; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@scc_rX ; TUNIT-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[R:%.*]]) #[[ATTR2:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -223,7 +223,7 @@ define i32 @scc_rX(i32 %a, i32 %b, i32 %r) #0 { ; TUNIT-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[R]], [[IF_THEN]] ], [ [[B]], [[IF_THEN3]] ], [ [[COND]], [[COND_END]] ] ; TUNIT-NEXT: ret i32 [[RETVAL_0]] ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@scc_rX ; CGSCC-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[R:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: entry: @@ -325,7 +325,7 @@ return: ; preds = %cond.end, %if.then3 ; return a == b ? r : ptr_scc_r2(a, b, r); ; } define double* @ptr_sink_r0(double* %r) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@ptr_sink_r0 ; CHECK-SAME: (double* nofree readnone returned "no-capture-maybe-returned" [[R:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -336,14 +336,14 @@ entry: } define double* @ptr_scc_r1(double* %a, double* %r, double* %b) #0 { -; TUNIT: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; TUNIT: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@ptr_scc_r1 ; TUNIT-SAME: (double* nocapture nofree readnone [[A:%.*]], double* nofree readnone returned "no-capture-maybe-returned" [[R:%.*]], double* nocapture nofree readnone [[B:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[CALL1:%.*]] = call double* @ptr_scc_r2(double* noalias nocapture nofree readnone [[R]], double* noalias nocapture nofree readnone [[A]], double* noalias nofree readnone "no-capture-maybe-returned" [[R]]) #[[ATTR10]] ; TUNIT-NEXT: ret double* [[R]] ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@ptr_scc_r1 ; CGSCC-SAME: (double* nocapture nofree readnone [[A:%.*]], double* nofree readnone returned [[R:%.*]], double* nocapture nofree readnone [[B:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: entry: @@ -357,7 +357,7 @@ entry: } define double* @ptr_scc_r2(double* %a, double* %b, double* %r) #0 { -; TUNIT: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; TUNIT: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@ptr_scc_r2 ; TUNIT-SAME: (double* nocapture nofree readnone [[A:%.*]], double* nocapture nofree readnone [[B:%.*]], double* nofree readnone returned "no-capture-maybe-returned" [[R:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: @@ -392,7 +392,7 @@ define double* @ptr_scc_r2(double* %a, double* %b, double* %r) #0 { ; TUNIT-NEXT: [[RETVAL_0:%.*]] = phi double* [ [[R]], [[IF_THEN]] ], [ [[R]], [[IF_THEN3]] ], [ [[COND]], [[COND_END]] ] ; TUNIT-NEXT: ret double* [[R]] ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@ptr_scc_r2 ; CGSCC-SAME: (double* nocapture nofree readnone [[A:%.*]], double* nocapture nofree readnone [[B:%.*]], double* nofree readnone returned [[R:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: entry: @@ -479,18 +479,18 @@ return: ; preds = %cond.end, %if.then3 ; } ; define i32* @rt0(i32* %a) #0 { -; TUNIT: Function Attrs: argmemonly nofree noinline nosync nounwind readonly uwtable +; TUNIT: Function Attrs: nofree noinline nosync nounwind memory(argmem: read) uwtable ; TUNIT-LABEL: define {{[^@]+}}@rt0 ; TUNIT-SAME: (i32* nofree noundef nonnull readonly returned align 4 dereferenceable(4) "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR3:[0-9]+]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[CALL:%.*]] = call i32* @rt0(i32* nofree noundef nonnull readonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[A]]) #[[ATTR11:[0-9]+]] +; TUNIT-NEXT: [[CALL:%.*]] = call i32* @rt0(i32* nofree noundef nonnull readonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[A]]) #[[ATTR10]] ; TUNIT-NEXT: ret i32* [[A]] ; -; CGSCC: Function Attrs: argmemonly nofree noinline nosync nounwind readonly uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind memory(argmem: read) uwtable ; CGSCC-LABEL: define {{[^@]+}}@rt0 ; CGSCC-SAME: (i32* nofree noundef nonnull readonly returned align 4 dereferenceable(4) "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR2:[0-9]+]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[CALL:%.*]] = call i32* @rt0(i32* nofree noundef nonnull readonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[A]]) #[[ATTR9:[0-9]+]] +; CGSCC-NEXT: [[CALL:%.*]] = call i32* @rt0(i32* nofree noundef nonnull readonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[A]]) #[[ATTR7]] ; CGSCC-NEXT: ret i32* [[A]] ; entry: @@ -508,13 +508,13 @@ entry: ; } ; define i32* @rt1(i32* %a) #0 { -; TUNIT: Function Attrs: nofree noinline nosync nounwind readnone willreturn uwtable +; TUNIT: Function Attrs: nofree noinline nosync nounwind willreturn memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@rt1 ; TUNIT-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR4:[0-9]+]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: ret i32* undef ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone willreturn uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind willreturn memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@rt1 ; CGSCC-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -531,14 +531,14 @@ entry: ; TEST another SCC test ; define i32* @rt2_helper(i32* %a) #0 { -; TUNIT: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; TUNIT: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@rt2_helper ; TUNIT-SAME: (i32* nofree readnone returned [[A:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[CALL:%.*]] = call i32* @rt2(i32* noalias nofree readnone [[A]], i32* noalias nofree readnone "no-capture-maybe-returned" [[A]]) #[[ATTR10]] ; TUNIT-NEXT: ret i32* [[A]] ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@rt2_helper ; CGSCC-SAME: (i32* nofree readnone returned [[A:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: entry: @@ -551,7 +551,7 @@ entry: } define i32* @rt2(i32* %a, i32 *%b) #0 { -; TUNIT: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; TUNIT: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@rt2 ; TUNIT-SAME: (i32* nofree readnone [[A:%.*]], i32* nofree readnone "no-capture-maybe-returned" [[B:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: @@ -564,7 +564,7 @@ define i32* @rt2(i32* %a, i32 *%b) #0 { ; TUNIT-NEXT: [[SEL:%.*]] = phi i32* [ [[B]], [[ENTRY:%.*]] ], [ [[A]], [[IF_THEN]] ] ; TUNIT-NEXT: ret i32* [[SEL]] ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@rt2 ; CGSCC-SAME: (i32* nofree readnone [[A:%.*]], i32* nofree readnone "no-capture-maybe-returned" [[B:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: entry: @@ -593,14 +593,14 @@ if.end: ; TEST another SCC test ; define i32* @rt3_helper(i32* %a, i32* %b) #0 { -; TUNIT: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; TUNIT: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@rt3_helper ; TUNIT-SAME: (i32* nocapture nofree readnone [[A:%.*]], i32* nofree readnone returned "no-capture-maybe-returned" [[B:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[CALL:%.*]] = call i32* @rt3(i32* noalias nocapture nofree readnone [[A]], i32* noalias nofree readnone "no-capture-maybe-returned" [[B]]) #[[ATTR10]] ; TUNIT-NEXT: ret i32* [[B]] ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@rt3_helper ; CGSCC-SAME: (i32* nocapture nofree readnone [[A:%.*]], i32* nofree readnone returned "no-capture-maybe-returned" [[B:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: entry: @@ -613,7 +613,7 @@ entry: } define i32* @rt3(i32* %a, i32 *%b) #0 { -; TUNIT: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; TUNIT: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@rt3 ; TUNIT-SAME: (i32* nocapture nofree readnone [[A:%.*]], i32* nofree readnone returned "no-capture-maybe-returned" [[B:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: @@ -626,7 +626,7 @@ define i32* @rt3(i32* %a, i32 *%b) #0 { ; TUNIT-NEXT: [[SEL:%.*]] = phi i32* [ [[B]], [[ENTRY:%.*]] ], [ [[B]], [[IF_THEN]] ] ; TUNIT-NEXT: ret i32* [[B]] ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@rt3 ; CGSCC-SAME: (i32* nocapture nofree readnone [[A:%.*]], i32* nofree readnone returned "no-capture-maybe-returned" [[B:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: entry: @@ -667,13 +667,13 @@ define i32* @calls_unknown_fn(i32* %r) #0 { ; TUNIT: Function Attrs: noinline nounwind uwtable ; TUNIT-LABEL: define {{[^@]+}}@calls_unknown_fn ; TUNIT-SAME: (i32* nofree readnone returned "no-capture-maybe-returned" [[R:%.*]]) #[[ATTR5:[0-9]+]] { -; TUNIT-NEXT: tail call void @unknown_fn(i32* (i32*)* noundef nonnull @calls_unknown_fn) #[[ATTR12:[0-9]+]] +; TUNIT-NEXT: tail call void @unknown_fn(i32* (i32*)* noundef nonnull @calls_unknown_fn) #[[ATTR11:[0-9]+]] ; TUNIT-NEXT: ret i32* [[R]] ; ; CGSCC: Function Attrs: noinline nounwind uwtable ; CGSCC-LABEL: define {{[^@]+}}@calls_unknown_fn ; CGSCC-SAME: (i32* nofree readnone returned "no-capture-maybe-returned" [[R:%.*]]) #[[ATTR4:[0-9]+]] { -; CGSCC-NEXT: tail call void @unknown_fn(i32* (i32*)* noundef nonnull @calls_unknown_fn) #[[ATTR10:[0-9]+]] +; CGSCC-NEXT: tail call void @unknown_fn(i32* (i32*)* noundef nonnull @calls_unknown_fn) #[[ATTR8]] ; CGSCC-NEXT: ret i32* [[R]] ; tail call void @unknown_fn(i32* (i32*)* nonnull @calls_unknown_fn) @@ -716,14 +716,14 @@ define i32* @calls_maybe_redefined_fn(i32* %r) #0 { ; TUNIT-LABEL: define {{[^@]+}}@calls_maybe_redefined_fn ; TUNIT-SAME: (i32* returned [[R:%.*]]) #[[ATTR6:[0-9]+]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[CALL:%.*]] = call i32* @maybe_redefined_fn(i32* [[R]]) #[[ATTR12]] +; TUNIT-NEXT: [[CALL:%.*]] = call i32* @maybe_redefined_fn(i32* [[R]]) #[[ATTR11]] ; TUNIT-NEXT: ret i32* [[R]] ; ; CGSCC: Function Attrs: noinline nounwind uwtable ; CGSCC-LABEL: define {{[^@]+}}@calls_maybe_redefined_fn ; CGSCC-SAME: (i32* returned [[R:%.*]]) #[[ATTR4]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[CALL:%.*]] = call i32* @maybe_redefined_fn(i32* [[R]]) #[[ATTR10]] +; CGSCC-NEXT: [[CALL:%.*]] = call i32* @maybe_redefined_fn(i32* [[R]]) #[[ATTR8]] ; CGSCC-NEXT: ret i32* [[R]] ; entry: @@ -765,14 +765,14 @@ define i32* @calls_maybe_redefined_fn2(i32* %r) #0 { ; TUNIT-LABEL: define {{[^@]+}}@calls_maybe_redefined_fn2 ; TUNIT-SAME: (i32* [[R:%.*]]) #[[ATTR6]] { ; TUNIT-NEXT: entry: -; TUNIT-NEXT: [[CALL:%.*]] = call i32* @maybe_redefined_fn2(i32* [[R]]) #[[ATTR12]] +; TUNIT-NEXT: [[CALL:%.*]] = call i32* @maybe_redefined_fn2(i32* [[R]]) #[[ATTR11]] ; TUNIT-NEXT: ret i32* [[CALL]] ; ; CGSCC: Function Attrs: noinline nounwind uwtable ; CGSCC-LABEL: define {{[^@]+}}@calls_maybe_redefined_fn2 ; CGSCC-SAME: (i32* [[R:%.*]]) #[[ATTR4]] { ; CGSCC-NEXT: entry: -; CGSCC-NEXT: [[CALL:%.*]] = call i32* @maybe_redefined_fn2(i32* [[R]]) #[[ATTR10]] +; CGSCC-NEXT: [[CALL:%.*]] = call i32* @maybe_redefined_fn2(i32* [[R]]) #[[ATTR8]] ; CGSCC-NEXT: ret i32* [[CALL]] ; entry: @@ -791,7 +791,7 @@ entry: ; } ; define double @select_and_phi(double %b) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@select_and_phi ; CHECK-SAME: (double returned [[B:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -827,7 +827,7 @@ if.end: ; preds = %if.then, %entry ; } ; define double @recursion_select_and_phi(i32 %a, double %b) #0 { -; TUNIT: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; TUNIT: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@recursion_select_and_phi ; TUNIT-SAME: (i32 [[A:%.*]], double returned [[B:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: @@ -840,7 +840,7 @@ define double @recursion_select_and_phi(i32 %a, double %b) #0 { ; TUNIT: if.end: ; TUNIT-NEXT: ret double [[B]] ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@recursion_select_and_phi ; CGSCC-SAME: (i32 [[A:%.*]], double returned [[B:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: entry: @@ -877,7 +877,7 @@ if.end: ; preds = %if.then, %entry ; } ; define double* @bitcast(i32* %b) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@bitcast ; CHECK-SAME: (i32* nofree readnone "no-capture-maybe-returned" [[B:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -900,7 +900,7 @@ entry: ; } ; define double* @bitcasts_select_and_phi(i32* %b) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@bitcasts_select_and_phi ; CHECK-SAME: (i32* nofree readnone [[B:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -950,7 +950,7 @@ if.end: ; preds = %if.then, %entry ; } ; define double* @ret_arg_arg_undef(i32* %b) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@ret_arg_arg_undef ; CHECK-SAME: (i32* nofree readnone [[B:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -998,7 +998,7 @@ ret_undef: ; } ; define double* @ret_undef_arg_arg(i32* %b) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@ret_undef_arg_arg ; CHECK-SAME: (i32* nofree readnone [[B:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -1046,7 +1046,7 @@ ret_arg1: ; } ; define double* @ret_undef_arg_undef(i32* %b) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@ret_undef_arg_undef ; CHECK-SAME: (i32* nofree readnone [[B:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -1179,7 +1179,7 @@ r: ; TEST inconsistent IR in dead code. ; define i32 @deadblockcall1(i32 %A) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@deadblockcall1 ; CHECK-SAME: (i32 returned [[A:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -1197,7 +1197,7 @@ unreachableblock: declare i32 @deadblockcall_helper(i32 returned %A); define i32 @deadblockcall2(i32 %A) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@deadblockcall2 ; CHECK-SAME: (i32 returned [[A:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -1218,7 +1218,7 @@ unreachableblock2: } define i32 @deadblockphi1(i32 %A) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@deadblockphi1 ; CHECK-SAME: (i32 returned [[A:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -1244,7 +1244,7 @@ r: } define i32 @deadblockphi2(i32 %A) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@deadblockphi2 ; CHECK-SAME: (i32 returned [[A:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: @@ -1410,7 +1410,7 @@ define i32 @exact(i32* align 8 %a, i32* align 8 %b) { @G = external global i8 define i32* @ret_const() #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@ret_const ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: ret i32* bitcast (i8* @G to i32*) @@ -1419,30 +1419,30 @@ define i32* @ret_const() #0 { ret i32* %bc } define i32* @use_const() #0 { -; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@use_const ; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: ret i32* bitcast (i8* @G to i32*) ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone willreturn uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind willreturn memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@use_const ; CGSCC-SAME: () #[[ATTR3]] { -; CGSCC-NEXT: [[C:%.*]] = call noundef nonnull dereferenceable(1) i32* @ret_const() #[[ATTR11:[0-9]+]] +; CGSCC-NEXT: [[C:%.*]] = call noundef nonnull dereferenceable(1) i32* @ret_const() #[[ATTR9:[0-9]+]] ; CGSCC-NEXT: ret i32* [[C]] ; %c = call i32* @ret_const() ret i32* %c } define i32* @dont_use_const() #0 { -; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@dont_use_const ; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: ret i32* bitcast (i8* @G to i32*) ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone willreturn uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind willreturn memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@dont_use_const ; CGSCC-SAME: () #[[ATTR3]] { -; CGSCC-NEXT: [[C:%.*]] = musttail call noundef nonnull dereferenceable(1) i32* @ret_const() #[[ATTR11]] +; CGSCC-NEXT: [[C:%.*]] = musttail call noundef nonnull dereferenceable(1) i32* @ret_const() #[[ATTR9]] ; CGSCC-NEXT: ret i32* [[C]] ; %c = musttail call i32* @ret_const() @@ -1494,31 +1494,27 @@ declare dso_local i8* @__dynamic_cast(i8*, i8*, i8*, i64) attributes #0 = { noinline nounwind uwtable } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind readnone willreturn uwtable } -; TUNIT: attributes #[[ATTR1]] = { nofree noinline nosync nounwind readnone uwtable } -; TUNIT: attributes #[[ATTR2]] = { nofree noinline norecurse nosync nounwind readnone uwtable } -; TUNIT: attributes #[[ATTR3]] = { argmemonly nofree noinline nosync nounwind readonly uwtable } -; TUNIT: attributes #[[ATTR4]] = { nofree noinline nosync nounwind readnone willreturn uwtable } +; TUNIT: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable } +; TUNIT: attributes #[[ATTR1]] = { nofree noinline nosync nounwind memory(none) uwtable } +; TUNIT: attributes #[[ATTR2]] = { nofree noinline norecurse nosync nounwind memory(none) uwtable } +; TUNIT: attributes #[[ATTR3]] = { nofree noinline nosync nounwind memory(argmem: read) uwtable } +; TUNIT: attributes #[[ATTR4]] = { nofree noinline nosync nounwind willreturn memory(none) uwtable } ; TUNIT: attributes #[[ATTR5]] = { noinline nounwind uwtable } ; TUNIT: attributes #[[ATTR6]] = { noinline norecurse nounwind uwtable } ; TUNIT: attributes #[[ATTR7]] = { noreturn } ; TUNIT: attributes #[[ATTR8]] = { norecurse } -; TUNIT: attributes #[[ATTR9:[0-9]+]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR10]] = { nofree nosync nounwind readnone } -; TUNIT: attributes #[[ATTR11]] = { nofree nosync nounwind readonly } -; TUNIT: attributes #[[ATTR12]] = { nounwind } -; TUNIT: attributes #[[ATTR13:[0-9]+]] = { nounwind readnone } +; TUNIT: attributes #[[ATTR9:[0-9]+]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR10]] = { nofree nosync nounwind } +; TUNIT: attributes #[[ATTR11]] = { nounwind } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind readnone willreturn uwtable } -; CGSCC: attributes #[[ATTR1]] = { nofree noinline nosync nounwind readnone uwtable } -; CGSCC: attributes #[[ATTR2]] = { argmemonly nofree noinline nosync nounwind readonly uwtable } -; CGSCC: attributes #[[ATTR3]] = { nofree noinline nosync nounwind readnone willreturn uwtable } +; CGSCC: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable } +; CGSCC: attributes #[[ATTR1]] = { nofree noinline nosync nounwind memory(none) uwtable } +; CGSCC: attributes #[[ATTR2]] = { nofree noinline nosync nounwind memory(argmem: read) uwtable } +; CGSCC: attributes #[[ATTR3]] = { nofree noinline nosync nounwind willreturn memory(none) uwtable } ; CGSCC: attributes #[[ATTR4]] = { noinline nounwind uwtable } ; CGSCC: attributes #[[ATTR5]] = { noreturn } -; CGSCC: attributes #[[ATTR6:[0-9]+]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR7]] = { nofree nosync nounwind readnone } -; CGSCC: attributes #[[ATTR8]] = { nounwind readnone } -; CGSCC: attributes #[[ATTR9]] = { nofree nosync nounwind readonly } -; CGSCC: attributes #[[ATTR10]] = { nounwind } -; CGSCC: attributes #[[ATTR11]] = { readnone willreturn } +; CGSCC: attributes #[[ATTR6:[0-9]+]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR7]] = { nofree nosync nounwind } +; CGSCC: attributes #[[ATTR8]] = { nounwind } +; CGSCC: attributes #[[ATTR9]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/undefined_behavior.ll b/llvm/test/Transforms/Attributor/undefined_behavior.ll index a54b067acb0fa..23384c705debc 100644 --- a/llvm/test/Transforms/Attributor/undefined_behavior.ll +++ b/llvm/test/Transforms/Attributor/undefined_behavior.ll @@ -11,7 +11,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; -- Load tests -- define void @load_wholly_unreachable() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@load_wholly_unreachable ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: unreachable @@ -21,7 +21,7 @@ define void @load_wholly_unreachable() { } define void @loads_wholly_unreachable() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@loads_wholly_unreachable ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: unreachable @@ -33,7 +33,7 @@ define void @loads_wholly_unreachable() { define void @load_single_bb_unreachable(i1 %cond) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@load_single_bb_unreachable ; CHECK-SAME: (i1 [[COND:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: br i1 [[COND]], label [[T:%.*]], label [[E:%.*]] @@ -53,7 +53,7 @@ e: ; Note that while the load is removed (because it's unused), the block ; is not changed to unreachable define void @load_null_pointer_is_defined() null_pointer_is_valid { -; CHECK: Function Attrs: nofree norecurse nosync nounwind null_pointer_is_valid readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@load_null_pointer_is_defined ; CHECK-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: ret void @@ -63,7 +63,7 @@ define void @load_null_pointer_is_defined() null_pointer_is_valid { } define internal i32* @ret_null() { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@ret_null ; CGSCC-SAME: () #[[ATTR0]] { ; CGSCC-NEXT: ret i32* null @@ -72,12 +72,12 @@ define internal i32* @ret_null() { } define void @load_null_propagated() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@load_null_propagated ; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@load_null_propagated ; CGSCC-SAME: () #[[ATTR2:[0-9]+]] { ; CGSCC-NEXT: ret void @@ -90,7 +90,7 @@ define void @load_null_propagated() { ; -- Store tests -- define void @store_wholly_unreachable() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@store_wholly_unreachable ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: unreachable @@ -100,13 +100,13 @@ define void @store_wholly_unreachable() { } define void @store_wholly_unreachable_volatile() { -; TUNIT: Function Attrs: nofree norecurse nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@store_wholly_unreachable_volatile ; TUNIT-SAME: () #[[ATTR2:[0-9]+]] { ; TUNIT-NEXT: store volatile i32 5, i32* null, align 4294967296 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@store_wholly_unreachable_volatile ; CGSCC-SAME: () #[[ATTR3:[0-9]+]] { ; CGSCC-NEXT: store volatile i32 5, i32* null, align 4294967296 @@ -117,7 +117,7 @@ define void @store_wholly_unreachable_volatile() { } define void @store_single_bb_unreachable(i1 %cond) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@store_single_bb_unreachable ; CHECK-SAME: (i1 [[COND:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: br i1 [[COND]], label [[T:%.*]], label [[E:%.*]] @@ -135,13 +135,13 @@ e: } define void @store_null_pointer_is_defined() null_pointer_is_valid { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind null_pointer_is_valid willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@store_null_pointer_is_defined ; TUNIT-SAME: () #[[ATTR3:[0-9]+]] { ; TUNIT-NEXT: store i32 5, i32* null, align 4294967296 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind null_pointer_is_valid willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@store_null_pointer_is_defined ; CGSCC-SAME: () #[[ATTR4:[0-9]+]] { ; CGSCC-NEXT: store i32 5, i32* null, align 4294967296 @@ -155,12 +155,12 @@ define void @store_null_propagated() { ; ATTRIBUTOR-LABEL: @store_null_propagated( ; ATTRIBUTOR-NEXT: unreachable ; -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@store_null_propagated ; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@store_null_propagated ; CGSCC-SAME: () #[[ATTR5:[0-9]+]] { ; CGSCC-NEXT: [[PTR:%.*]] = call noalias align 4294967296 i32* @ret_null() #[[ATTR10:[0-9]+]] @@ -174,12 +174,12 @@ define void @store_null_propagated() { ; -- AtomicRMW tests -- define void @atomicrmw_wholly_unreachable() { -; TUNIT: Function Attrs: nofree norecurse nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@atomicrmw_wholly_unreachable ; TUNIT-SAME: () #[[ATTR2]] { ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree norecurse nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@atomicrmw_wholly_unreachable ; CGSCC-SAME: () #[[ATTR3]] { ; CGSCC-NEXT: unreachable @@ -189,7 +189,7 @@ define void @atomicrmw_wholly_unreachable() { } define void @atomicrmw_single_bb_unreachable(i1 %cond) { -; TUNIT: Function Attrs: nofree norecurse nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@atomicrmw_single_bb_unreachable ; TUNIT-SAME: (i1 [[COND:%.*]]) #[[ATTR2]] { ; TUNIT-NEXT: br i1 [[COND]], label [[T:%.*]], label [[E:%.*]] @@ -198,7 +198,7 @@ define void @atomicrmw_single_bb_unreachable(i1 %cond) { ; TUNIT: e: ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@atomicrmw_single_bb_unreachable ; CGSCC-SAME: (i1 [[COND:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: br i1 [[COND]], label [[T:%.*]], label [[E:%.*]] @@ -236,7 +236,7 @@ define void @atomicrmw_null_propagated() { ; ATTRIBUTOR-LABEL: @atomicrmw_null_propagated( ; ATTRIBUTOR-NEXT: unreachable ; -; TUNIT: Function Attrs: nofree norecurse nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@atomicrmw_null_propagated ; TUNIT-SAME: () #[[ATTR2]] { ; TUNIT-NEXT: unreachable @@ -256,12 +256,12 @@ define void @atomicrmw_null_propagated() { ; -- AtomicCmpXchg tests -- define void @atomiccmpxchg_wholly_unreachable() { -; TUNIT: Function Attrs: nofree norecurse nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@atomiccmpxchg_wholly_unreachable ; TUNIT-SAME: () #[[ATTR2]] { ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree norecurse nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@atomiccmpxchg_wholly_unreachable ; CGSCC-SAME: () #[[ATTR3]] { ; CGSCC-NEXT: unreachable @@ -271,7 +271,7 @@ define void @atomiccmpxchg_wholly_unreachable() { } define void @atomiccmpxchg_single_bb_unreachable(i1 %cond) { -; TUNIT: Function Attrs: nofree norecurse nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@atomiccmpxchg_single_bb_unreachable ; TUNIT-SAME: (i1 [[COND:%.*]]) #[[ATTR2]] { ; TUNIT-NEXT: br i1 [[COND]], label [[T:%.*]], label [[E:%.*]] @@ -280,7 +280,7 @@ define void @atomiccmpxchg_single_bb_unreachable(i1 %cond) { ; TUNIT: e: ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@atomiccmpxchg_single_bb_unreachable ; CGSCC-SAME: (i1 [[COND:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: br i1 [[COND]], label [[T:%.*]], label [[E:%.*]] @@ -318,7 +318,7 @@ define void @atomiccmpxchg_null_propagated() { ; ATTRIBUTOR-LABEL: @atomiccmpxchg_null_propagated( ; ATTRIBUTOR-NEXT: unreachable ; -; TUNIT: Function Attrs: nofree norecurse nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@atomiccmpxchg_null_propagated ; TUNIT-SAME: () #[[ATTR2]] { ; TUNIT-NEXT: unreachable @@ -340,7 +340,7 @@ define void @atomiccmpxchg_null_propagated() { ; Note: The unreachable on %t and %e is _not_ from AAUndefinedBehavior define i32 @cond_br_on_undef() { -; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@cond_br_on_undef ; TUNIT-SAME: () #[[ATTR5:[0-9]+]] { ; TUNIT-NEXT: unreachable @@ -349,7 +349,7 @@ define i32 @cond_br_on_undef() { ; TUNIT: e: ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse noreturn nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@cond_br_on_undef ; CGSCC-SAME: () #[[ATTR8:[0-9]+]] { ; CGSCC-NEXT: unreachable @@ -369,7 +369,7 @@ e: ; Valid branch - verify that this is not converted ; to unreachable. define void @cond_br_on_undef2(i1 %cond) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@cond_br_on_undef2 ; CHECK-SAME: (i1 [[COND:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: br i1 [[COND]], label [[T1:%.*]], label [[E1:%.*]] @@ -394,7 +394,7 @@ e1: } define i1 @ret_undef() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@ret_undef ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: ret i1 undef @@ -403,7 +403,7 @@ define i1 @ret_undef() { } define void @cond_br_on_undef_interproc() { -; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@cond_br_on_undef_interproc ; TUNIT-SAME: () #[[ATTR5]] { ; TUNIT-NEXT: unreachable @@ -412,7 +412,7 @@ define void @cond_br_on_undef_interproc() { ; TUNIT: e: ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@cond_br_on_undef_interproc ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: [[COND:%.*]] = call i1 @ret_undef() #[[ATTR10]] @@ -431,7 +431,7 @@ e: } define i1 @ret_undef2() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@ret_undef2 ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: br i1 true, label [[T:%.*]], label [[E:%.*]] @@ -449,7 +449,7 @@ e: ; More complicated interproc deduction of undef define void @cond_br_on_undef_interproc2() { -; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@cond_br_on_undef_interproc2 ; TUNIT-SAME: () #[[ATTR5]] { ; TUNIT-NEXT: unreachable @@ -458,7 +458,7 @@ define void @cond_br_on_undef_interproc2() { ; TUNIT: e: ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@cond_br_on_undef_interproc2 ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: [[COND:%.*]] = call i1 @ret_undef2() #[[ATTR10]] @@ -479,7 +479,7 @@ e: ; Branch on undef that depends on propagation of ; undef of a previous instruction. define i32 @cond_br_on_undef3() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@cond_br_on_undef3 ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 1, undef @@ -500,7 +500,7 @@ e: ; Branch on undef because of uninitialized value. ; FIXME: Currently it doesn't propagate the undef. define i32 @cond_br_on_undef_uninit() { -; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@cond_br_on_undef_uninit ; TUNIT-SAME: () #[[ATTR5]] { ; TUNIT-NEXT: unreachable @@ -509,7 +509,7 @@ define i32 @cond_br_on_undef_uninit() { ; TUNIT: e: ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse noreturn nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@cond_br_on_undef_uninit ; CGSCC-SAME: () #[[ATTR8]] { ; CGSCC-NEXT: unreachable @@ -533,7 +533,7 @@ e: ; MODULE-NOT: @callee( define internal i32 @callee(i1 %C, i32* %A) { ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@callee ; CGSCC-SAME: () #[[ATTR0]] { ; CGSCC-NEXT: entry: @@ -555,12 +555,12 @@ F: } define i32 @foo() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@foo ; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: ret i32 1 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@foo ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: [[X:%.*]] = call noundef i32 @callee() #[[ATTR10]] @@ -575,13 +575,13 @@ define i32 @foo() { ; Tests for argument position define void @arg_nonnull_1(i32* nonnull %a) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@arg_nonnull_1 ; TUNIT-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR6:[0-9]+]] { ; TUNIT-NEXT: store i32 0, i32* [[A]], align 4 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@arg_nonnull_1 ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR9:[0-9]+]] { ; CGSCC-NEXT: store i32 0, i32* [[A]], align 4 @@ -592,13 +592,13 @@ define void @arg_nonnull_1(i32* nonnull %a) { } define void @arg_nonnull_1_noundef_1(i32* nonnull noundef %a) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@arg_nonnull_1_noundef_1 ; TUNIT-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR6]] { ; TUNIT-NEXT: store i32 0, i32* [[A]], align 4 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@arg_nonnull_1_noundef_1 ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR9]] { ; CGSCC-NEXT: store i32 0, i32* [[A]], align 4 @@ -609,7 +609,7 @@ define void @arg_nonnull_1_noundef_1(i32* nonnull noundef %a) { } define void @arg_nonnull_12(i32* nonnull %a, i32* nonnull %b, i32* %c) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@arg_nonnull_12 ; TUNIT-SAME: (i32* nocapture nofree nonnull writeonly [[A:%.*]], i32* nocapture nofree nonnull writeonly [[B:%.*]], i32* nofree writeonly [[C:%.*]]) #[[ATTR6]] { ; TUNIT-NEXT: [[D:%.*]] = icmp eq i32* [[C]], null @@ -623,7 +623,7 @@ define void @arg_nonnull_12(i32* nonnull %a, i32* nonnull %b, i32* %c) { ; TUNIT: ret: ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@arg_nonnull_12 ; CGSCC-SAME: (i32* nocapture nofree nonnull writeonly [[A:%.*]], i32* nocapture nofree nonnull writeonly [[B:%.*]], i32* nofree writeonly [[C:%.*]]) #[[ATTR9]] { ; CGSCC-NEXT: [[D:%.*]] = icmp eq i32* [[C]], null @@ -650,7 +650,7 @@ ret: } define void @arg_nonnull_12_noundef_2(i32* nonnull %a, i32* noundef nonnull %b, i32* %c) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@arg_nonnull_12_noundef_2 ; TUNIT-SAME: (i32* nocapture nofree nonnull writeonly [[A:%.*]], i32* nocapture nofree noundef nonnull writeonly [[B:%.*]], i32* nofree writeonly [[C:%.*]]) #[[ATTR6]] { ; TUNIT-NEXT: [[D:%.*]] = icmp eq i32* [[C]], null @@ -664,7 +664,7 @@ define void @arg_nonnull_12_noundef_2(i32* nonnull %a, i32* noundef nonnull %b, ; TUNIT: ret: ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@arg_nonnull_12_noundef_2 ; CGSCC-SAME: (i32* nocapture nofree nonnull writeonly [[A:%.*]], i32* nocapture nofree noundef nonnull writeonly [[B:%.*]], i32* nofree writeonly [[C:%.*]]) #[[ATTR9]] { ; CGSCC-NEXT: [[D:%.*]] = icmp eq i32* [[C]], null @@ -692,12 +692,12 @@ ret: ; Pass null directly to argument with nonnull attribute define void @arg_nonnull_violation1_1() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@arg_nonnull_violation1_1 ; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@arg_nonnull_violation1_1 ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: unreachable @@ -707,12 +707,12 @@ define void @arg_nonnull_violation1_1() { } define void @arg_nonnull_violation1_2() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@arg_nonnull_violation1_2 ; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@arg_nonnull_violation1_2 ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: unreachable @@ -723,12 +723,12 @@ define void @arg_nonnull_violation1_2() { ; A case that depends on value simplification define void @arg_nonnull_violation2_1(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@arg_nonnull_violation2_1 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@arg_nonnull_violation2_1 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: unreachable @@ -740,12 +740,12 @@ define void @arg_nonnull_violation2_1(i1 %c) { } define void @arg_nonnull_violation2_2(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@arg_nonnull_violation2_2 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@arg_nonnull_violation2_2 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: unreachable @@ -758,7 +758,7 @@ define void @arg_nonnull_violation2_2(i1 %c) { ; Cases for single and multiple violation at a callsite define void @arg_nonnull_violation3_1(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@arg_nonnull_violation3_1 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: [[PTR:%.*]] = alloca i32, align 4 @@ -772,7 +772,7 @@ define void @arg_nonnull_violation3_1(i1 %c) { ; TUNIT: ret: ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@arg_nonnull_violation3_1 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[PTR:%.*]] = alloca i32, align 4 @@ -805,7 +805,7 @@ ret: } define void @arg_nonnull_violation3_2(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@arg_nonnull_violation3_2 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: [[PTR:%.*]] = alloca i32, align 4 @@ -819,7 +819,7 @@ define void @arg_nonnull_violation3_2(i1 %c) { ; TUNIT: ret: ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@arg_nonnull_violation3_2 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[PTR:%.*]] = alloca i32, align 4 @@ -854,7 +854,7 @@ ret: ; Tests for returned position define nonnull i32* @returned_nonnnull(i32 %c) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@returned_nonnnull ; CHECK-SAME: (i32 [[C:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: switch i32 [[C]], label [[ONDEFAULT:%.*]] [ @@ -881,7 +881,7 @@ ondefault: } define noundef i32* @returned_noundef(i32 %c) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@returned_noundef ; CHECK-SAME: (i32 [[C:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: switch i32 [[C]], label [[ONDEFAULT:%.*]] [ @@ -908,7 +908,7 @@ ondefault: } define nonnull noundef i32* @returned_nonnnull_noundef(i32 %c) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@returned_nonnnull_noundef ; CHECK-SAME: (i32 [[C:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: switch i32 [[C]], label [[ONDEFAULT:%.*]] [ @@ -935,7 +935,7 @@ ondefault: } define noundef i32 @returned_nonnnull_noundef_int() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@returned_nonnnull_noundef_int ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: ret i32 0 @@ -965,7 +965,7 @@ define void @callsite_noundef_2() { } define i32 @argument_noundef1(i32 noundef %c) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@argument_noundef1 ; CHECK-SAME: (i32 noundef returned [[C:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret i32 [[C]] @@ -974,12 +974,12 @@ define i32 @argument_noundef1(i32 noundef %c) { } define i32 @violate_noundef_nonpointer() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@violate_noundef_nonpointer ; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: ret i32 undef ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@violate_noundef_nonpointer ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: unreachable @@ -989,7 +989,7 @@ define i32 @violate_noundef_nonpointer() { } define i32* @argument_noundef2(i32* noundef %c) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@argument_noundef2 ; CHECK-SAME: (i32* nofree noundef readnone returned "no-capture-maybe-returned" [[C:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret i32* [[C]] @@ -998,12 +998,12 @@ define i32* @argument_noundef2(i32* noundef %c) { } define i32* @violate_noundef_pointer() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@violate_noundef_pointer ; TUNIT-SAME: () #[[ATTR0]] { ; TUNIT-NEXT: ret i32* undef ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@violate_noundef_pointer ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: ret i32* undef @@ -1013,7 +1013,7 @@ define i32* @violate_noundef_pointer() { } define internal noundef i32 @assumed_undef_is_ok(i1 %c, i32 %arg) { -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@assumed_undef_is_ok ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: br i1 [[C]], label [[REC:%.*]], label [[RET:%.*]] @@ -1035,12 +1035,12 @@ ret: } define noundef i32 @assumed_undef_is_ok_caller(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@assumed_undef_is_ok_caller ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: ret i32 0 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@assumed_undef_is_ok_caller ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[CALL:%.*]] = call i32 @assumed_undef_is_ok(i1 [[C]]) #[[ATTR10]] @@ -1051,25 +1051,25 @@ define noundef i32 @assumed_undef_is_ok_caller(i1 %c) { } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind null_pointer_is_valid readnone willreturn } -; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind null_pointer_is_valid willreturn writeonly } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(none) } +; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(write) } ; TUNIT: attributes #[[ATTR4]] = { nofree norecurse nounwind null_pointer_is_valid willreturn } -; TUNIT: attributes #[[ATTR5]] = { nofree norecurse noreturn nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR6]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR7]] = { nofree nosync nounwind willreturn writeonly } +; TUNIT: attributes #[[ATTR5]] = { nofree norecurse noreturn nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR6]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; TUNIT: attributes #[[ATTR7]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind null_pointer_is_valid readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR3]] = { nofree norecurse nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind null_pointer_is_valid willreturn writeonly } -; CGSCC: attributes #[[ATTR5]] = { nofree nosync nounwind willreturn writeonly } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR3]] = { nofree norecurse nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind null_pointer_is_valid willreturn memory(write) } +; CGSCC: attributes #[[ATTR5]] = { nofree nosync nounwind willreturn memory(write) } ; CGSCC: attributes #[[ATTR6]] = { nofree norecurse nounwind null_pointer_is_valid willreturn } ; CGSCC: attributes #[[ATTR7]] = { nofree nounwind willreturn } -; CGSCC: attributes #[[ATTR8]] = { nofree norecurse noreturn nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR9]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR10]] = { readnone willreturn } -; CGSCC: attributes #[[ATTR11]] = { nounwind willreturn writeonly } +; CGSCC: attributes #[[ATTR8]] = { nofree norecurse noreturn nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; CGSCC: attributes #[[ATTR10]] = { willreturn } +; CGSCC: attributes #[[ATTR11]] = { nounwind willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/value-simplify-assume.ll b/llvm/test/Transforms/Attributor/value-simplify-assume.ll index 260626f383437..a44a5f618d4c8 100644 --- a/llvm/test/Transforms/Attributor/value-simplify-assume.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-assume.ll @@ -7,7 +7,7 @@ declare void @useI1p(i1*) declare void @unknown() define i1 @readI1p(i1* %p) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@readI1p ; CHECK-SAME: (i1* nocapture nofree noundef nonnull readonly dereferenceable(1) [[P:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: [[L:%.*]] = load i1, i1* [[P]], align 1 @@ -37,13 +37,13 @@ define i1 @keep_assume_1c_nr() norecurse { } define i1 @drop_assume_1c_nr() norecurse { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@drop_assume_1c_nr ; TUNIT-SAME: () #[[ATTR3:[0-9]+]] { ; TUNIT-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR4:[0-9]+]] ; TUNIT-NEXT: ret i1 true ; -; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@drop_assume_1c_nr ; CGSCC-SAME: () #[[ATTR3:[0-9]+]] { ; CGSCC-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR5:[0-9]+]] @@ -156,7 +156,7 @@ define i1 @keep_assume_1_nr(i1 %arg) norecurse { } define i1 @drop_assume_1_nr(i1 %arg) norecurse { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@drop_assume_1_nr ; TUNIT-SAME: (i1 returned [[ARG:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -164,7 +164,7 @@ define i1 @drop_assume_1_nr(i1 %arg) norecurse { ; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[ARG]]) #[[ATTR4]] ; TUNIT-NEXT: ret i1 [[ARG]] ; -; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@drop_assume_1_nr ; CGSCC-SAME: (i1 returned [[ARG:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -261,7 +261,7 @@ define i1 @keep_assume_4_nr(i1 %arg) norecurse { } define i1 @assume_1_nr(i1 %arg, i1 %cond) norecurse { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@assume_1_nr ; TUNIT-SAME: (i1 returned [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -275,7 +275,7 @@ define i1 @assume_1_nr(i1 %arg, i1 %cond) norecurse { ; TUNIT: m: ; TUNIT-NEXT: ret i1 [[ARG]] ; -; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@assume_1_nr ; CGSCC-SAME: (i1 returned [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -305,7 +305,7 @@ m: } define void @assume_1b_nr(i1 %arg, i1 %cond) norecurse { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@assume_1b_nr ; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -320,7 +320,7 @@ define void @assume_1b_nr(i1 %arg, i1 %cond) norecurse { ; TUNIT: m: ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@assume_1b_nr ; CGSCC-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -351,7 +351,7 @@ m: } define i1 @assume_2_nr(i1 %arg, i1 %cond) norecurse { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@assume_2_nr ; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -368,7 +368,7 @@ define i1 @assume_2_nr(i1 %arg, i1 %cond) norecurse { ; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR4]] ; TUNIT-NEXT: ret i1 [[L]] ; -; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@assume_2_nr ; CGSCC-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -401,7 +401,7 @@ m: } define void @assume_2b_nr(i1 %arg, i1 %cond) norecurse { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@assume_2b_nr ; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -418,7 +418,7 @@ define void @assume_2b_nr(i1 %arg, i1 %cond) norecurse { ; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR4]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@assume_2b_nr ; CGSCC-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -451,7 +451,7 @@ m: } define i1 @assume_3_nr(i1 %arg, i1 %cond) norecurse { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@assume_3_nr ; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -469,7 +469,7 @@ define i1 @assume_3_nr(i1 %arg, i1 %cond) norecurse { ; TUNIT-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5:[0-9]+]] ; TUNIT-NEXT: ret i1 [[R]] ; -; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@assume_3_nr ; CGSCC-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -484,7 +484,7 @@ define i1 @assume_3_nr(i1 %arg, i1 %cond) norecurse { ; CGSCC-NEXT: store i1 false, i1* [[STACK]], align 1 ; CGSCC-NEXT: br label [[M]] ; CGSCC: m: -; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR6:[0-9]+]] +; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5]] ; CGSCC-NEXT: ret i1 [[R]] ; %stack = alloca i1 @@ -504,7 +504,7 @@ m: } define i1 @assume_4_nr(i1 %arg, i1 %cond) norecurse { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@assume_4_nr ; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -522,7 +522,7 @@ define i1 @assume_4_nr(i1 %arg, i1 %cond) norecurse { ; TUNIT-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5]] ; TUNIT-NEXT: ret i1 [[R]] ; -; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@assume_4_nr ; CGSCC-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -537,7 +537,7 @@ define i1 @assume_4_nr(i1 %arg, i1 %cond) norecurse { ; CGSCC: m: ; CGSCC-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1 ; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR5]] -; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR6]] +; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5]] ; CGSCC-NEXT: ret i1 [[R]] ; %stack = alloca i1 @@ -557,7 +557,7 @@ m: } define i1 @assume_5_nr(i1 %arg, i1 %cond) norecurse { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@assume_5_nr ; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -581,7 +581,7 @@ define i1 @assume_5_nr(i1 %arg, i1 %cond) norecurse { ; TUNIT-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5]] ; TUNIT-NEXT: ret i1 [[R]] ; -; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@assume_5_nr ; CGSCC-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -602,7 +602,7 @@ define i1 @assume_5_nr(i1 %arg, i1 %cond) norecurse { ; CGSCC: m: ; CGSCC-NEXT: [[L4:%.*]] = load i1, i1* [[STACK]], align 1 ; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L4]]) #[[ATTR5]] -; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR6]] +; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5]] ; CGSCC-NEXT: ret i1 [[R]] ; %stack = alloca i1 @@ -628,7 +628,7 @@ m: } define i1 @assume_5c_nr(i1 %cond) norecurse { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@assume_5c_nr ; TUNIT-SAME: (i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -652,7 +652,7 @@ define i1 @assume_5c_nr(i1 %cond) norecurse { ; TUNIT-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5]] ; TUNIT-NEXT: ret i1 [[R]] ; -; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@assume_5c_nr ; CGSCC-SAME: (i1 [[COND:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -673,7 +673,7 @@ define i1 @assume_5c_nr(i1 %cond) norecurse { ; CGSCC: m: ; CGSCC-NEXT: [[L4:%.*]] = load i1, i1* [[STACK]], align 1 ; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L4]]) #[[ATTR5]] -; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR6]] +; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5]] ; CGSCC-NEXT: ret i1 [[R]] ; %stack = alloca i1 @@ -717,13 +717,13 @@ define i1 @keep_assume_1c() { } define i1 @drop_assume_1c() { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@drop_assume_1c ; TUNIT-SAME: () #[[ATTR3]] { ; TUNIT-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR4]] ; TUNIT-NEXT: ret i1 true ; -; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@drop_assume_1c ; CGSCC-SAME: () #[[ATTR3]] { ; CGSCC-NEXT: call void @llvm.assume(i1 noundef true) #[[ATTR5]] @@ -825,7 +825,7 @@ define i1 @keep_assume_1(i1 %arg) { } define i1 @drop_assume_1(i1 %arg) { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@drop_assume_1 ; TUNIT-SAME: (i1 returned [[ARG:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -833,7 +833,7 @@ define i1 @drop_assume_1(i1 %arg) { ; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[ARG]]) #[[ATTR4]] ; TUNIT-NEXT: ret i1 [[ARG]] ; -; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@drop_assume_1 ; CGSCC-SAME: (i1 returned [[ARG:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -925,7 +925,7 @@ define i1 @keep_assume_4(i1 %arg) { } define i1 @assume_1(i1 %arg, i1 %cond) { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@assume_1 ; TUNIT-SAME: (i1 returned [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -939,7 +939,7 @@ define i1 @assume_1(i1 %arg, i1 %cond) { ; TUNIT: m: ; TUNIT-NEXT: ret i1 [[ARG]] ; -; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@assume_1 ; CGSCC-SAME: (i1 returned [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -969,7 +969,7 @@ m: } define void @assume_1b(i1 %arg, i1 %cond) { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@assume_1b ; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -984,7 +984,7 @@ define void @assume_1b(i1 %arg, i1 %cond) { ; TUNIT: m: ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@assume_1b ; CGSCC-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -1015,7 +1015,7 @@ m: } define i1 @assume_2(i1 %arg, i1 %cond) { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@assume_2 ; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -1032,7 +1032,7 @@ define i1 @assume_2(i1 %arg, i1 %cond) { ; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR4]] ; TUNIT-NEXT: ret i1 [[L]] ; -; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@assume_2 ; CGSCC-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -1065,7 +1065,7 @@ m: } define void @assume_2b(i1 %arg, i1 %cond) { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@assume_2b ; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -1082,7 +1082,7 @@ define void @assume_2b(i1 %arg, i1 %cond) { ; TUNIT-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR4]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@assume_2b ; CGSCC-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -1115,7 +1115,7 @@ m: } define i1 @assume_3(i1 %arg, i1 %cond) { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@assume_3 ; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -1133,7 +1133,7 @@ define i1 @assume_3(i1 %arg, i1 %cond) { ; TUNIT-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5]] ; TUNIT-NEXT: ret i1 [[R]] ; -; CGSCC: Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@assume_3 ; CGSCC-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR4:[0-9]+]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -1148,7 +1148,7 @@ define i1 @assume_3(i1 %arg, i1 %cond) { ; CGSCC-NEXT: store i1 false, i1* [[STACK]], align 1 ; CGSCC-NEXT: br label [[M]] ; CGSCC: m: -; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR6]] +; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5]] ; CGSCC-NEXT: ret i1 [[R]] ; %stack = alloca i1 @@ -1168,7 +1168,7 @@ m: } define i1 @assume_4(i1 %arg, i1 %cond) { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@assume_4 ; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -1186,7 +1186,7 @@ define i1 @assume_4(i1 %arg, i1 %cond) { ; TUNIT-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5]] ; TUNIT-NEXT: ret i1 [[R]] ; -; CGSCC: Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@assume_4 ; CGSCC-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR4]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -1201,7 +1201,7 @@ define i1 @assume_4(i1 %arg, i1 %cond) { ; CGSCC: m: ; CGSCC-NEXT: [[L:%.*]] = load i1, i1* [[STACK]], align 1 ; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L]]) #[[ATTR5]] -; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR6]] +; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5]] ; CGSCC-NEXT: ret i1 [[R]] ; %stack = alloca i1 @@ -1221,7 +1221,7 @@ m: } define i1 @assume_5(i1 %arg, i1 %cond) { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@assume_5 ; TUNIT-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -1245,7 +1245,7 @@ define i1 @assume_5(i1 %arg, i1 %cond) { ; TUNIT-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5]] ; TUNIT-NEXT: ret i1 [[R]] ; -; CGSCC: Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@assume_5 ; CGSCC-SAME: (i1 [[ARG:%.*]], i1 [[COND:%.*]]) #[[ATTR4]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -1266,7 +1266,7 @@ define i1 @assume_5(i1 %arg, i1 %cond) { ; CGSCC: m: ; CGSCC-NEXT: [[L4:%.*]] = load i1, i1* [[STACK]], align 1 ; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L4]]) #[[ATTR5]] -; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR6]] +; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5]] ; CGSCC-NEXT: ret i1 [[R]] ; %stack = alloca i1 @@ -1292,7 +1292,7 @@ m: } define i1 @assume_5c(i1 %cond) { -; TUNIT: Function Attrs: inaccessiblememonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@assume_5c ; TUNIT-SAME: (i1 [[COND:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -1316,7 +1316,7 @@ define i1 @assume_5c(i1 %cond) { ; TUNIT-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5]] ; TUNIT-NEXT: ret i1 [[R]] ; -; CGSCC: Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@assume_5c ; CGSCC-SAME: (i1 [[COND:%.*]]) #[[ATTR4]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i1, align 1 @@ -1337,7 +1337,7 @@ define i1 @assume_5c(i1 %cond) { ; CGSCC: m: ; CGSCC-NEXT: [[L4:%.*]] = load i1, i1* [[STACK]], align 1 ; CGSCC-NEXT: call void @llvm.assume(i1 noundef [[L4]]) #[[ATTR5]] -; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR6]] +; CGSCC-NEXT: [[R:%.*]] = call i1 @readI1p(i1* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[STACK]]) #[[ATTR5]] ; CGSCC-NEXT: ret i1 [[R]] ; %stack = alloca i1 @@ -1363,18 +1363,17 @@ m: } ;. -; TUNIT: attributes #[[ATTR0:[0-9]+]] = { inaccessiblememonly nocallback nofree nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR1]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } +; TUNIT: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } ; TUNIT: attributes #[[ATTR2]] = { norecurse } -; TUNIT: attributes #[[ATTR3]] = { inaccessiblememonly nofree norecurse nosync nounwind willreturn } +; TUNIT: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) } ; TUNIT: attributes #[[ATTR4]] = { willreturn } -; TUNIT: attributes #[[ATTR5]] = { nofree nosync nounwind readonly willreturn } +; TUNIT: attributes #[[ATTR5]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0:[0-9]+]] = { inaccessiblememonly nocallback nofree nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR1]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } +; CGSCC: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } ; CGSCC: attributes #[[ATTR2]] = { norecurse } -; CGSCC: attributes #[[ATTR3]] = { inaccessiblememonly nofree norecurse nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR4]] = { inaccessiblememonly nofree nosync nounwind willreturn } +; CGSCC: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +; CGSCC: attributes #[[ATTR4]] = { nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } ; CGSCC: attributes #[[ATTR5]] = { willreturn } -; CGSCC: attributes #[[ATTR6]] = { readonly willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/value-simplify-dbg.ll b/llvm/test/Transforms/Attributor/value-simplify-dbg.ll index 10a84d6458c67..f1e887d7fe83e 100644 --- a/llvm/test/Transforms/Attributor/value-simplify-dbg.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-dbg.ll @@ -24,7 +24,7 @@ entry: declare void @use(i32 noundef) define void @src() norecurse !dbg !22 { -; CHECK: Function Attrs: norecurse nosync writeonly +; CHECK: Function Attrs: norecurse nosync memory(write) ; CHECK-LABEL: define {{[^@]+}}@src ; CHECK-SAME: () #[[ATTR0:[0-9]+]] !dbg [[DBG22:![0-9]+]] { ; CHECK-NEXT: entry: @@ -73,8 +73,8 @@ declare i32 @speculatable() speculatable readnone !24 = !DILocation(line: 10, column: 7, scope: !22) !25 = !DILocation(line: 11, column: 1, scope: !22) ;. -; CHECK: attributes #[[ATTR0]] = { norecurse nosync writeonly } -; CHECK: attributes #[[ATTR1:[0-9]+]] = { readnone speculatable } +; CHECK: attributes #[[ATTR0]] = { norecurse nosync memory(write) } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { speculatable memory(none) } ;. ; CHECK: [[DBG0]] = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) ; CHECK: [[META1:![0-9]+]] = distinct !DIGlobalVariable(name: "G", scope: !2, file: !5, line: 1, type: !6, isLocal: true, isDefinition: true) diff --git a/llvm/test/Transforms/Attributor/value-simplify-gpu.ll b/llvm/test/Transforms/Attributor/value-simplify-gpu.ll index a2e4c3114e317..7ae99e48ff8e6 100644 --- a/llvm/test/Transforms/Attributor/value-simplify-gpu.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-gpu.ll @@ -50,7 +50,7 @@ define internal void @level1Kernel(i32 %C) { ; TUNIT-NEXT: call void @level2Kernelb() #[[ATTR3]] ; TUNIT-NEXT: br label [[IF_END]] ; TUNIT: if.end: -; TUNIT-NEXT: call void @level2Kernelall_late() #[[ATTR5:[0-9]+]] +; TUNIT-NEXT: call void @level2Kernelall_late() #[[ATTR3]] ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: norecurse nosync nounwind @@ -67,7 +67,7 @@ define internal void @level1Kernel(i32 %C) { ; CGSCC-NEXT: call void @level2Kernelb() #[[ATTR4]] ; CGSCC-NEXT: br label [[IF_END]] ; CGSCC: if.end: -; CGSCC-NEXT: call void @level2Kernelall_late() #[[ATTR6:[0-9]+]] +; CGSCC-NEXT: call void @level2Kernelall_late() #[[ATTR4]] ; CGSCC-NEXT: ret void ; entry: @@ -89,7 +89,7 @@ if.end: ; preds = %if.else, %if.then } define internal void @level2Kernelall_early() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CHECK-LABEL: define {{[^@]+}}@level2Kernelall_early ; CHECK-SAME: () #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: entry: @@ -110,7 +110,7 @@ define internal void @level2Kernela() { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableKernel to i32*), align 4 ; TUNIT-NEXT: [[TMP1:%.*]] = load i32, i32* @ReachableKernelAS0, align 4 -; TUNIT-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef 42) #[[ATTR6:[0-9]+]] +; TUNIT-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef 42) #[[ATTR5:[0-9]+]] ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: nosync nounwind @@ -138,7 +138,7 @@ define internal void @level2Kernelb() { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableKernel to i32*), align 4 ; TUNIT-NEXT: [[TMP1:%.*]] = load i32, i32* @ReachableKernelAS0, align 4 -; TUNIT-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef 42) #[[ATTR6]] +; TUNIT-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef 42) #[[ATTR5]] ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: nosync nounwind @@ -160,13 +160,13 @@ entry: } define internal void @level2Kernelall_late() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@level2Kernelall_late ; TUNIT-SAME: () #[[ATTR2]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@level2Kernelall_late ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: entry: @@ -217,7 +217,7 @@ define internal void @level1(i32 %C) { ; TUNIT-NEXT: call void @level2b() #[[ATTR3]] ; TUNIT-NEXT: br label [[IF_END]] ; TUNIT: if.end: -; TUNIT-NEXT: call void @level2all_late(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[LOCAL]]) #[[ATTR5]] +; TUNIT-NEXT: call void @level2all_late(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[LOCAL]]) #[[ATTR3]] ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: norecurse nosync nounwind @@ -235,7 +235,7 @@ define internal void @level1(i32 %C) { ; CGSCC-NEXT: call void @level2b(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOCAL]]) #[[ATTR4]] ; CGSCC-NEXT: br label [[IF_END]] ; CGSCC: if.end: -; CGSCC-NEXT: call void @level2all_late(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[LOCAL]]) #[[ATTR6]] +; CGSCC-NEXT: call void @level2all_late(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[LOCAL]]) #[[ATTR4]] ; CGSCC-NEXT: ret void ; entry: @@ -258,14 +258,14 @@ if.end: ; preds = %if.else, %if.then } define internal void @level2all_early(i32* %addr) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@level2all_early ; TUNIT-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@level2all_early ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: entry: @@ -286,7 +286,7 @@ define internal void @level2a(i32* %addr) { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 ; TUNIT-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 -; TUNIT-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 17) #[[ATTR6]] +; TUNIT-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 17) #[[ATTR5]] ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: nosync nounwind @@ -314,7 +314,7 @@ define internal void @level2b(i32* %addr) { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 ; TUNIT-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 -; TUNIT-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 17) #[[ATTR6]] +; TUNIT-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 17) #[[ATTR5]] ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: nosync nounwind @@ -336,14 +336,14 @@ entry: } define internal void @level2all_late(i32* %addr) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@level2all_late ; TUNIT-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: store i32 1, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@level2all_late ; CGSCC-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: entry: @@ -362,17 +362,15 @@ declare dso_local void @use(i32, i32, i32) nosync norecurse nounwind ;. ; TUNIT: attributes #[[ATTR0]] = { norecurse nosync nounwind "kernel" } ; TUNIT: attributes #[[ATTR1]] = { norecurse nosync nounwind } -; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn writeonly } +; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn memory(write) } ; TUNIT: attributes #[[ATTR3]] = { nosync nounwind } -; TUNIT: attributes #[[ATTR4]] = { nofree nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR5]] = { nosync nounwind writeonly } -; TUNIT: attributes #[[ATTR6]] = { nounwind } +; TUNIT: attributes #[[ATTR4]] = { nofree nosync nounwind willreturn } +; TUNIT: attributes #[[ATTR5]] = { nounwind } ;. ; CGSCC: attributes #[[ATTR0]] = { norecurse nosync nounwind "kernel" } ; CGSCC: attributes #[[ATTR1]] = { norecurse nosync nounwind } -; CGSCC: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn writeonly } +; CGSCC: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn memory(write) } ; CGSCC: attributes #[[ATTR3]] = { nosync nounwind } ; CGSCC: attributes #[[ATTR4]] = { nounwind } -; CGSCC: attributes #[[ATTR5]] = { nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR6]] = { nounwind writeonly } +; CGSCC: attributes #[[ATTR5]] = { nounwind willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/value-simplify-instances.ll b/llvm/test/Transforms/Attributor/value-simplify-instances.ll index 9de4fd59ec581..270673ba23c79 100644 --- a/llvm/test/Transforms/Attributor/value-simplify-instances.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-instances.ll @@ -13,7 +13,7 @@ declare i1* @geti1Ptr() ; CHECK: @[[G3:[a-zA-Z0-9_$"\\.-]+]] = private global i1 undef ;. define internal i1 @recursive_inst_comparator(i1* %a, i1* %b) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@recursive_inst_comparator ; CHECK-SAME: (i1* noalias nofree readnone [[A:%.*]], i1* noalias nofree readnone [[B:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i1* [[A]], [[B]] @@ -103,17 +103,29 @@ define i1 @recursive_inst_compare_caller(i1 %c) { ; Make sure we do *not* return true. define internal i1 @recursive_alloca_compare(i1 %c, i1* %p) { -; CHECK: Function Attrs: nofree nosync nounwind readnone -; CHECK-LABEL: define {{[^@]+}}@recursive_alloca_compare -; CHECK-SAME: (i1 [[C:%.*]], i1* noalias nofree nonnull readnone [[P:%.*]]) #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: [[A:%.*]] = alloca i1, align 1 -; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] -; CHECK: t: -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i1* [[A]], [[P]] -; CHECK-NEXT: ret i1 [[CMP]] -; CHECK: f: -; CHECK-NEXT: [[CALL:%.*]] = call i1 @recursive_alloca_compare(i1 noundef true, i1* noalias nofree noundef nonnull readnone dereferenceable(1) [[A]]) #[[ATTR1]] -; CHECK-NEXT: ret i1 [[CALL]] +; TUNIT: Function Attrs: nofree nosync nounwind memory(none) +; TUNIT-LABEL: define {{[^@]+}}@recursive_alloca_compare +; TUNIT-SAME: (i1 [[C:%.*]], i1* noalias nofree nonnull readnone [[P:%.*]]) #[[ATTR1:[0-9]+]] { +; TUNIT-NEXT: [[A:%.*]] = alloca i1, align 1 +; TUNIT-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; TUNIT: t: +; TUNIT-NEXT: [[CMP:%.*]] = icmp eq i1* [[A]], [[P]] +; TUNIT-NEXT: ret i1 [[CMP]] +; TUNIT: f: +; TUNIT-NEXT: [[CALL:%.*]] = call i1 @recursive_alloca_compare(i1 noundef true, i1* noalias nofree noundef nonnull readnone dereferenceable(1) [[A]]) #[[ATTR4:[0-9]+]] +; TUNIT-NEXT: ret i1 [[CALL]] +; +; CGSCC: Function Attrs: nofree nosync nounwind memory(none) +; CGSCC-LABEL: define {{[^@]+}}@recursive_alloca_compare +; CGSCC-SAME: (i1 [[C:%.*]], i1* noalias nofree nonnull readnone [[P:%.*]]) #[[ATTR1:[0-9]+]] { +; CGSCC-NEXT: [[A:%.*]] = alloca i1, align 1 +; CGSCC-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; CGSCC: t: +; CGSCC-NEXT: [[CMP:%.*]] = icmp eq i1* [[A]], [[P]] +; CGSCC-NEXT: ret i1 [[CMP]] +; CGSCC: f: +; CGSCC-NEXT: [[CALL:%.*]] = call i1 @recursive_alloca_compare(i1 noundef true, i1* noalias nofree noundef nonnull readnone dereferenceable(1) [[A]]) #[[ATTR3:[0-9]+]] +; CGSCC-NEXT: ret i1 [[CALL]] ; %a = alloca i1 br i1 %c, label %t, label %f @@ -127,13 +139,13 @@ f: ; FIXME: This should *not* return true. define i1 @recursive_alloca_compare_caller(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone +; TUNIT: Function Attrs: nofree norecurse nosync nounwind memory(none) ; TUNIT-LABEL: define {{[^@]+}}@recursive_alloca_compare_caller ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR2:[0-9]+]] { -; TUNIT-NEXT: [[CALL:%.*]] = call i1 @recursive_alloca_compare(i1 [[C]], i1* undef) #[[ATTR1]] +; TUNIT-NEXT: [[CALL:%.*]] = call i1 @recursive_alloca_compare(i1 [[C]], i1* undef) #[[ATTR4]] ; TUNIT-NEXT: ret i1 [[CALL]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone +; CGSCC: Function Attrs: nofree nosync nounwind memory(none) ; CGSCC-LABEL: define {{[^@]+}}@recursive_alloca_compare_caller ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: [[CALL:%.*]] = call i1 @recursive_alloca_compare(i1 [[C]], i1* undef) #[[ATTR4:[0-9]+]] @@ -145,7 +157,7 @@ define i1 @recursive_alloca_compare_caller(i1 %c) { ; Make sure we do *not* simplify this to return 0 or 1, return 42 is ok though. define internal i8 @recursive_alloca_load_return(i1 %c, i8* %p, i8 %v) { -; TUNIT: Function Attrs: argmemonly nofree nosync nounwind +; TUNIT: Function Attrs: nofree nosync nounwind memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@recursive_alloca_load_return ; TUNIT-SAME: (i1 [[C:%.*]], i8* nocapture nofree nonnull readonly [[P:%.*]], i8 noundef [[V:%.*]]) #[[ATTR3:[0-9]+]] { ; TUNIT-NEXT: [[A:%.*]] = alloca i8, align 1 @@ -156,10 +168,10 @@ define internal i8 @recursive_alloca_load_return(i1 %c, i8* %p, i8 %v) { ; TUNIT-NEXT: [[L:%.*]] = load i8, i8* [[P]], align 1 ; TUNIT-NEXT: ret i8 [[L]] ; TUNIT: f: -; TUNIT-NEXT: [[CALL:%.*]] = call i8 @recursive_alloca_load_return(i1 noundef true, i8* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[A]], i8 noundef 1) #[[ATTR4:[0-9]+]] +; TUNIT-NEXT: [[CALL:%.*]] = call i8 @recursive_alloca_load_return(i1 noundef true, i8* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[A]], i8 noundef 1) #[[ATTR4]] ; TUNIT-NEXT: ret i8 [[CALL]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind +; CGSCC: Function Attrs: nofree nosync nounwind memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@recursive_alloca_load_return ; CGSCC-SAME: (i1 [[C:%.*]], i8* nocapture nofree nonnull readonly [[P:%.*]], i8 noundef [[V:%.*]]) #[[ATTR2:[0-9]+]] { ; CGSCC-NEXT: [[A:%.*]] = alloca i8, align 1 @@ -170,7 +182,7 @@ define internal i8 @recursive_alloca_load_return(i1 %c, i8* %p, i8 %v) { ; CGSCC-NEXT: [[L:%.*]] = load i8, i8* [[P]], align 1 ; CGSCC-NEXT: ret i8 [[L]] ; CGSCC: f: -; CGSCC-NEXT: [[CALL:%.*]] = call i8 @recursive_alloca_load_return(i1 noundef true, i8* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[A]], i8 noundef 1) #[[ATTR3:[0-9]+]] +; CGSCC-NEXT: [[CALL:%.*]] = call i8 @recursive_alloca_load_return(i1 noundef true, i8* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[A]], i8 noundef 1) #[[ATTR3]] ; CGSCC-NEXT: ret i8 [[CALL]] ; %a = alloca i8 @@ -186,16 +198,16 @@ f: } define i8 @recursive_alloca_load_return_caller(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone +; TUNIT: Function Attrs: nofree norecurse nosync nounwind memory(none) ; TUNIT-LABEL: define {{[^@]+}}@recursive_alloca_load_return_caller ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR2]] { ; TUNIT-NEXT: [[CALL:%.*]] = call i8 @recursive_alloca_load_return(i1 [[C]], i8* undef, i8 noundef 42) #[[ATTR4]] ; TUNIT-NEXT: ret i8 [[CALL]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone +; CGSCC: Function Attrs: nofree nosync nounwind memory(none) ; CGSCC-LABEL: define {{[^@]+}}@recursive_alloca_load_return_caller ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { -; CGSCC-NEXT: [[CALL:%.*]] = call i8 @recursive_alloca_load_return(i1 [[C]], i8* undef, i8 noundef 42) #[[ATTR5:[0-9]+]] +; CGSCC-NEXT: [[CALL:%.*]] = call i8 @recursive_alloca_load_return(i1 [[C]], i8* undef, i8 noundef 42) #[[ATTR4]] ; CGSCC-NEXT: ret i8 [[CALL]] ; %call = call i8 @recursive_alloca_load_return(i1 %c, i8* undef, i8 42) @@ -259,7 +271,7 @@ define i1 @recursive_alloca_compare_caller_global1(i1 %c) { ; CGSCC: Function Attrs: nofree nosync nounwind ; CGSCC-LABEL: define {{[^@]+}}@recursive_alloca_compare_caller_global1 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR3]] { -; CGSCC-NEXT: [[CALL:%.*]] = call i1 @recursive_alloca_compare_global1(i1 [[C]]) #[[ATTR5]] +; CGSCC-NEXT: [[CALL:%.*]] = call i1 @recursive_alloca_compare_global1(i1 [[C]]) #[[ATTR4]] ; CGSCC-NEXT: ret i1 [[CALL]] ; %call = call i1 @recursive_alloca_compare_global1(i1 %c) @@ -318,7 +330,7 @@ define i1 @recursive_alloca_compare_caller_global2(i1 %c) { ; CGSCC: Function Attrs: nofree nosync nounwind ; CGSCC-LABEL: define {{[^@]+}}@recursive_alloca_compare_caller_global2 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR3]] { -; CGSCC-NEXT: [[CALL:%.*]] = call i1 @recursive_alloca_compare_global2(i1 [[C]]) #[[ATTR5]] +; CGSCC-NEXT: [[CALL:%.*]] = call i1 @recursive_alloca_compare_global2(i1 [[C]]) #[[ATTR4]] ; CGSCC-NEXT: ret i1 [[CALL]] ; %call = call i1 @recursive_alloca_compare_global2(i1 %c) @@ -374,25 +386,24 @@ define i1 @recursive_inst_compare_caller_global3(i1 %c) { ; CGSCC: Function Attrs: nofree nosync nounwind ; CGSCC-LABEL: define {{[^@]+}}@recursive_inst_compare_caller_global3 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR3]] { -; CGSCC-NEXT: [[CALL:%.*]] = call i1 @recursive_inst_compare_global3(i1 [[C]]) #[[ATTR5]] +; CGSCC-NEXT: [[CALL:%.*]] = call i1 @recursive_inst_compare_global3(i1 [[C]]) #[[ATTR4]] ; CGSCC-NEXT: ret i1 [[CALL]] ; %call = call i1 @recursive_inst_compare_global3(i1 %c) ret i1 %call } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR1]] = { nofree nosync nounwind readnone } -; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind readnone } -; TUNIT: attributes #[[ATTR3]] = { argmemonly nofree nosync nounwind } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR1]] = { nofree nosync nounwind memory(none) } +; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind memory(none) } +; TUNIT: attributes #[[ATTR3]] = { nofree nosync nounwind memory(argmem: readwrite) } ; TUNIT: attributes #[[ATTR4]] = { nofree nosync nounwind } ; TUNIT: attributes #[[ATTR5]] = { nofree norecurse nosync nounwind } -; TUNIT: attributes #[[ATTR6]] = { nounwind readnone } +; TUNIT: attributes #[[ATTR6]] = { nounwind } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind readnone } -; CGSCC: attributes #[[ATTR2]] = { argmemonly nofree nosync nounwind } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind memory(none) } +; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind memory(argmem: readwrite) } ; CGSCC: attributes #[[ATTR3]] = { nofree nosync nounwind } -; CGSCC: attributes #[[ATTR4]] = { nounwind readnone } -; CGSCC: attributes #[[ATTR5]] = { nounwind } +; CGSCC: attributes #[[ATTR4]] = { nounwind } ;. diff --git a/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll b/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll index 51e7729ba54ea..dd4e2f68bb36f 100644 --- a/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-local-remote.ll @@ -13,7 +13,7 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16 %struct2 = type <{ ptr, i64, i64, i32, [4 x i8] }> define i64 @t1(ptr %first, ptr %first.addr, ptr %0) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@t1 ; TUNIT-SAME: (ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[FIRST:%.*]], ptr nocapture nofree readnone [[FIRST_ADDR:%.*]], ptr nocapture nofree readnone [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -24,7 +24,7 @@ define i64 @t1(ptr %first, ptr %first.addr, ptr %0) { ; TUNIT-NEXT: [[CALL:%.*]] = call ptr @foo.4(ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[FIRST]]) #[[ATTR3:[0-9]+]] ; TUNIT-NEXT: ret i64 0 ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@t1 ; CGSCC-SAME: (ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[FIRST:%.*]], ptr nocapture nofree readnone [[FIRST_ADDR:%.*]], ptr nocapture nofree readnone [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -49,7 +49,7 @@ if.end: ; preds = %entry } define internal ptr @foo.4(ptr %__first) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@foo.4 ; TUNIT-SAME: (ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[__FIRST:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: entry: @@ -57,7 +57,7 @@ define internal ptr @foo.4(ptr %__first) { ; TUNIT-NEXT: store ptr [[__FIRST]], ptr [[__FIRST]], align 8 ; TUNIT-NEXT: ret ptr undef ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@foo.4 ; CGSCC-SAME: (ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[__FIRST:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: entry: @@ -75,7 +75,7 @@ entry: } define internal ptr @bar(ptr %QQfirst) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@bar ; CGSCC-SAME: (ptr noalias nofree noundef nonnull readnone returned align 8 dereferenceable(8) "no-capture-maybe-returned" [[QQFIRST:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -101,7 +101,7 @@ while.end: ; preds = %while.cond } define ptr @t2(ptr %this, ptr %this.addr, ptr %this1) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@t2 ; TUNIT-SAME: (ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], ptr nocapture nofree readnone [[THIS_ADDR:%.*]], ptr nocapture nofree readnone [[THIS1:%.*]]) #[[ATTR1:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -110,12 +110,12 @@ define ptr @t2(ptr %this, ptr %this.addr, ptr %this1) { ; TUNIT-NEXT: [[TEST_RET:%.*]] = extractvalue [[S]] [[CALL]], 0 ; TUNIT-NEXT: ret ptr [[TEST_RET]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@t2 ; CGSCC-SAME: (ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], ptr nocapture nofree readnone [[THIS_ADDR:%.*]], ptr nocapture nofree readnone [[THIS1:%.*]]) #[[ATTR2:[0-9]+]] { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: store ptr [[THIS]], ptr [[THIS]], align 8 -; CGSCC-NEXT: [[CALL:%.*]] = call [[S:%.*]] @foo.1(ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS]]) #[[ATTR8:[0-9]+]] +; CGSCC-NEXT: [[CALL:%.*]] = call [[S:%.*]] @foo.1(ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS]]) #[[ATTR6]] ; CGSCC-NEXT: [[TEST_RET:%.*]] = extractvalue [[S]] [[CALL]], 0 ; CGSCC-NEXT: ret ptr [[TEST_RET]] ; @@ -128,23 +128,23 @@ entry: } define internal %S @foo.1(ptr %foo.this) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@foo.1 ; TUNIT-SAME: (ptr nofree noundef nonnull align 8 dereferenceable(8) [[FOO_THIS:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[RETVAL:%.*]] = alloca [[S:%.*]], i32 0, align 8 ; TUNIT-NEXT: store ptr [[FOO_THIS]], ptr [[FOO_THIS]], align 8 -; TUNIT-NEXT: call void @bar.2(ptr noalias nocapture nofree noundef nonnull writeonly align 8 [[RETVAL]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[FOO_THIS]]) #[[ATTR5:[0-9]+]] +; TUNIT-NEXT: call void @bar.2(ptr noalias nocapture nofree noundef nonnull writeonly align 8 [[RETVAL]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[FOO_THIS]]) #[[ATTR4]] ; TUNIT-NEXT: [[FOO_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8 ; TUNIT-NEXT: ret [[S]] [[FOO_RET]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@foo.1 ; CGSCC-SAME: (ptr nofree noundef nonnull align 8 dereferenceable(8) [[FOO_THIS:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: [[RETVAL:%.*]] = alloca [[S:%.*]], i32 0, align 8 ; CGSCC-NEXT: store ptr [[FOO_THIS]], ptr [[FOO_THIS]], align 8 -; CGSCC-NEXT: call void @bar.2(ptr noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[RETVAL]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[FOO_THIS]]) #[[ATTR6]] +; CGSCC-NEXT: call void @bar.2(ptr noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[RETVAL]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[FOO_THIS]]) #[[ATTR8:[0-9]+]] ; CGSCC-NEXT: [[FOO_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8 ; CGSCC-NEXT: ret [[S]] [[FOO_RET]] ; @@ -157,20 +157,20 @@ entry: } define internal void @bar.2(ptr %bar.this, ptr %bar.data) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@bar.2 ; TUNIT-SAME: (ptr noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[BAR_THIS:%.*]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[BAR_DATA:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: store ptr [[BAR_DATA]], ptr [[BAR_THIS]], align 8 -; TUNIT-NEXT: call void @baz(ptr nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[BAR_THIS]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[BAR_DATA]]) #[[ATTR5]] +; TUNIT-NEXT: call void @baz(ptr nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[BAR_THIS]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[BAR_DATA]]) #[[ATTR4]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@bar.2 ; CGSCC-SAME: (ptr noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[BAR_THIS:%.*]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[BAR_DATA:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: store ptr [[BAR_DATA]], ptr [[BAR_THIS]], align 8 -; CGSCC-NEXT: call void @baz(ptr nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[BAR_THIS]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[BAR_DATA]]) #[[ATTR6]] +; CGSCC-NEXT: call void @baz(ptr nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[BAR_THIS]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[BAR_DATA]]) #[[ATTR8]] ; CGSCC-NEXT: ret void ; entry: @@ -180,14 +180,14 @@ entry: } define internal void @baz(ptr %baz.this, ptr %baz.data) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@baz ; TUNIT-SAME: (ptr nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[BAZ_THIS:%.*]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[BAZ_DATA:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: store ptr [[BAZ_DATA]], ptr [[BAZ_THIS]], align 8 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@baz ; CGSCC-SAME: (ptr nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[BAZ_THIS:%.*]], ptr nofree writeonly [[BAZ_DATA:%.*]]) #[[ATTR3:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -200,7 +200,7 @@ entry: } define ptr @foo(ptr %this, ptr %this.addr, ptr %this1) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@foo ; TUNIT-SAME: (ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], ptr nocapture nofree readnone [[THIS_ADDR:%.*]], ptr nocapture nofree readnone [[THIS1:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: @@ -209,12 +209,12 @@ define ptr @foo(ptr %this, ptr %this.addr, ptr %this1) { ; TUNIT-NEXT: [[FOO_RET:%.*]] = extractvalue [[S]] [[CALL]], 0 ; TUNIT-NEXT: ret ptr [[FOO_RET]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@foo ; CGSCC-SAME: (ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], ptr nocapture nofree readnone [[THIS_ADDR:%.*]], ptr nocapture nofree readnone [[THIS1:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: store ptr [[THIS]], ptr [[THIS]], align 8 -; CGSCC-NEXT: [[CALL:%.*]] = call [[S:%.*]] @bar.5(ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS]]) #[[ATTR8]] +; CGSCC-NEXT: [[CALL:%.*]] = call [[S:%.*]] @bar.5(ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS]]) #[[ATTR6]] ; CGSCC-NEXT: [[FOO_RET:%.*]] = extractvalue [[S]] [[CALL]], 0 ; CGSCC-NEXT: ret ptr [[FOO_RET]] ; @@ -227,7 +227,7 @@ entry: } define internal %S @bar.5(ptr %this) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@bar.5 ; TUNIT-SAME: (ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: @@ -237,13 +237,13 @@ define internal %S @bar.5(ptr %this) { ; TUNIT-NEXT: [[BAR_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8 ; TUNIT-NEXT: ret [[S]] [[BAR_RET]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@bar.5 ; CGSCC-SAME: (ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: [[RETVAL:%.*]] = alloca [[S:%.*]], i32 0, align 8 ; CGSCC-NEXT: store ptr [[THIS]], ptr [[THIS]], align 8 -; CGSCC-NEXT: call void @baz.6(ptr noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[RETVAL]], ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS]]) #[[ATTR8]] +; CGSCC-NEXT: call void @baz.6(ptr noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[RETVAL]], ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS]]) #[[ATTR9:[0-9]+]] ; CGSCC-NEXT: [[BAR_RET:%.*]] = load [[S]], ptr [[RETVAL]], align 8 ; CGSCC-NEXT: ret [[S]] [[BAR_RET]] ; @@ -257,7 +257,7 @@ entry: } define internal void @baz.6(ptr %this, ptr %data) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@baz.6 ; TUNIT-SAME: (ptr noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS:%.*]], ptr nofree noundef nonnull align 8 dereferenceable(8) [[DATA:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: @@ -265,12 +265,12 @@ define internal void @baz.6(ptr %this, ptr %data) { ; TUNIT-NEXT: call void @boom(ptr nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS]], ptr nofree noundef nonnull align 8 dereferenceable(8) [[DATA]]) #[[ATTR4]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@baz.6 ; CGSCC-SAME: (ptr noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS:%.*]], ptr nofree noundef nonnull align 8 dereferenceable(8) [[DATA:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: store ptr [[DATA]], ptr [[THIS]], align 8 -; CGSCC-NEXT: call void @boom(ptr nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS]], ptr nofree noundef nonnull align 8 dereferenceable(8) [[DATA]]) #[[ATTR8]] +; CGSCC-NEXT: call void @boom(ptr nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS]], ptr nofree noundef nonnull align 8 dereferenceable(8) [[DATA]]) #[[ATTR9]] ; CGSCC-NEXT: ret void ; entry: @@ -280,7 +280,7 @@ entry: } define internal void @boom(ptr %this, ptr %data) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@boom ; TUNIT-SAME: (ptr nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS:%.*]], ptr nofree noundef nonnull align 8 dereferenceable(8) [[DATA:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: @@ -290,7 +290,7 @@ define internal void @boom(ptr %this, ptr %data) { ; TUNIT-NEXT: store ptr [[V]], ptr [[THIS]], align 8 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@boom ; CGSCC-SAME: (ptr nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS:%.*]], ptr nofree [[DATA:%.*]]) #[[ATTR4:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -371,7 +371,7 @@ declare void @ext1(ptr) ; Taken from https://github.com/llvm/llvm-project/issues/54981 define dso_local void @spam() { -; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind readnone +; TUNIT: Function Attrs: nofree norecurse noreturn nosync nounwind memory(none) ; TUNIT-LABEL: define {{[^@]+}}@spam ; TUNIT-SAME: () #[[ATTR2:[0-9]+]] { ; TUNIT-NEXT: bb: @@ -402,7 +402,7 @@ define dso_local void @spam() { ; TUNIT: bb35: ; TUNIT-NEXT: unreachable ; -; CGSCC: Function Attrs: nofree norecurse noreturn nosync nounwind readnone +; CGSCC: Function Attrs: nofree norecurse noreturn nosync nounwind memory(none) ; CGSCC-LABEL: define {{[^@]+}}@spam ; CGSCC-SAME: () #[[ATTR5:[0-9]+]] { ; CGSCC-NEXT: bb: @@ -475,22 +475,22 @@ bb35: ; preds = %bb16 } define double @t4(ptr %this, ptr %this.addr, ptr %this1) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@t4 ; TUNIT-SAME: (ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS:%.*]], ptr nocapture nofree readnone [[THIS_ADDR:%.*]], ptr nocapture nofree readnone [[THIS1:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[THIS_ADDR1:%.*]] = alloca ptr, i32 0, align 8 ; TUNIT-NEXT: store ptr [[THIS]], ptr [[THIS]], align 8 -; TUNIT-NEXT: [[CALL:%.*]] = call [[S:%.*]] @t4a(ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS]]) #[[ATTR5]] +; TUNIT-NEXT: [[CALL:%.*]] = call [[S:%.*]] @t4a(ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS]]) #[[ATTR4]] ; TUNIT-NEXT: ret double 0.000000e+00 ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@t4 ; CGSCC-SAME: (ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], ptr nocapture nofree readnone [[THIS_ADDR:%.*]], ptr nocapture nofree readnone [[THIS1:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: [[THIS_ADDR1:%.*]] = alloca ptr, i32 0, align 8 ; CGSCC-NEXT: store ptr [[THIS]], ptr [[THIS]], align 8 -; CGSCC-NEXT: [[CALL:%.*]] = call [[S:%.*]] @t4a(ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS]]) #[[ATTR8]] +; CGSCC-NEXT: [[CALL:%.*]] = call [[S:%.*]] @t4a(ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS]]) #[[ATTR6]] ; CGSCC-NEXT: [[TMP0:%.*]] = extractvalue [[S]] [[CALL]], 0 ; CGSCC-NEXT: ret double 0.000000e+00 ; @@ -504,24 +504,24 @@ entry: } define internal %S @t4a(ptr %this) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@t4a ; TUNIT-SAME: (ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[RETVAL:%.*]] = alloca [[S:%.*]], i32 0, align 8 ; TUNIT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, i32 0, align 8 ; TUNIT-NEXT: store ptr [[THIS]], ptr [[THIS]], align 8 -; TUNIT-NEXT: call void @t4b(ptr noalias nocapture nofree noundef nonnull writeonly align 8 [[RETVAL]]) #[[ATTR5]] +; TUNIT-NEXT: call void @t4b(ptr noalias nocapture nofree noundef nonnull writeonly align 8 [[RETVAL]]) #[[ATTR4]] ; TUNIT-NEXT: ret [[S]] undef ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@t4a ; CGSCC-SAME: (ptr nofree noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: [[RETVAL:%.*]] = alloca [[S:%.*]], i32 0, align 8 ; CGSCC-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, i32 0, align 8 ; CGSCC-NEXT: store ptr [[THIS]], ptr [[THIS]], align 8 -; CGSCC-NEXT: call void @t4b(ptr noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[RETVAL]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS]]) #[[ATTR6]] +; CGSCC-NEXT: call void @t4b(ptr noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[RETVAL]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS]]) #[[ATTR8]] ; CGSCC-NEXT: [[TMP0:%.*]] = load [[S]], ptr [[RETVAL]], align 8 ; CGSCC-NEXT: ret [[S]] [[TMP0]] ; @@ -539,23 +539,23 @@ entry: } define internal void @t4b(ptr %this, ptr %data) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@t4b ; TUNIT-SAME: (ptr noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, i32 0, align 8 ; TUNIT-NEXT: [[DATA_ADDR:%.*]] = alloca ptr, i32 0, align 8 -; TUNIT-NEXT: call void @t4c(ptr noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS]]) #[[ATTR5]] +; TUNIT-NEXT: call void @t4c(ptr noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS]]) #[[ATTR4]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@t4b ; CGSCC-SAME: (ptr noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS:%.*]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[DATA:%.*]]) #[[ATTR0]] { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, i32 0, align 8 ; CGSCC-NEXT: [[DATA_ADDR:%.*]] = alloca ptr, i32 0, align 8 ; CGSCC-NEXT: store ptr [[DATA]], ptr [[THIS]], align 8 -; CGSCC-NEXT: call void @t4c(ptr nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[DATA]]) #[[ATTR6]] +; CGSCC-NEXT: call void @t4c(ptr nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS]], ptr nofree noundef nonnull writeonly align 8 dereferenceable(8) [[DATA]]) #[[ATTR8]] ; CGSCC-NEXT: ret void ; entry: @@ -570,7 +570,7 @@ entry: } define internal void @t4c(ptr %this, ptr %data) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@t4c ; TUNIT-SAME: (ptr noalias nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS:%.*]]) #[[ATTR0]] { ; TUNIT-NEXT: entry: @@ -578,7 +578,7 @@ define internal void @t4c(ptr %this, ptr %data) { ; TUNIT-NEXT: [[DATA_ADDR:%.*]] = alloca ptr, i32 0, align 8 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@t4c ; CGSCC-SAME: (ptr nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[THIS:%.*]], ptr nofree writeonly [[DATA:%.*]]) #[[ATTR3]] { ; CGSCC-NEXT: entry: @@ -611,22 +611,22 @@ entry: !6 = !{i32 7, !"Dwarf Version", i32 2} !7 = !{i32 2, !"Debug Info Version", i32 3} ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR1]] = { argmemonly nofree norecurse nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR2]] = { nofree norecurse noreturn nosync nounwind readnone } -; TUNIT: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind willreturn writeonly } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } +; TUNIT: attributes #[[ATTR2]] = { nofree norecurse noreturn nosync nounwind memory(none) } +; TUNIT: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind willreturn } ; TUNIT: attributes #[[ATTR4]] = { nofree nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR5]] = { nofree nosync nounwind willreturn writeonly } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { argmemonly nofree nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR3]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR4]] = { argmemonly nofree norecurse nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR5]] = { nofree norecurse noreturn nosync nounwind readnone } -; CGSCC: attributes #[[ATTR6]] = { nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR7]] = { readnone willreturn } -; CGSCC: attributes #[[ATTR8]] = { nounwind willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree nosync nounwind willreturn memory(argmem: write) } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; CGSCC: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR5]] = { nofree norecurse noreturn nosync nounwind memory(none) } +; CGSCC: attributes #[[ATTR6]] = { nounwind willreturn } +; CGSCC: attributes #[[ATTR7]] = { willreturn } +; CGSCC: attributes #[[ATTR8]] = { nounwind willreturn memory(write) } +; CGSCC: attributes #[[ATTR9]] = { nounwind willreturn memory(readwrite) } ;. ; CHECK: [[META0:![0-9]+]] = !{i32 2, !"SDK Version", [2 x i32] [i32 11, i32 5]} ; CHECK: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} diff --git a/llvm/test/Transforms/Attributor/value-simplify-pointer-info-struct.ll b/llvm/test/Transforms/Attributor/value-simplify-pointer-info-struct.ll index 28940d7328f3c..537dc304cb797 100644 --- a/llvm/test/Transforms/Attributor/value-simplify-pointer-info-struct.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-pointer-info-struct.ll @@ -36,7 +36,7 @@ declare void @harmless_use(ptr nocapture readonly) nofree norecurse nosync nounw ; CHECK: @[[GLOBALS:[a-zA-Z0-9_$"\\.-]+]] = internal constant [[STRUCT_S:%.*]] { i32 42, double 3.140000e+00, ptr null, i32 0 }, align 8 ;. define i32 @testOneFieldGlobalS(i32 %cmpx) { -; CHECK: Function Attrs: nofree norecurse nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@testOneFieldGlobalS ; CHECK-SAME: (i32 [[CMPX:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: @@ -95,7 +95,7 @@ if.end7: ; preds = %if.then5, %if.end4 } define i32 @testOneFieldGlobalS_type_mismatch() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@testOneFieldGlobalS_type_mismatch ; CHECK-SAME: () #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: entry: @@ -152,7 +152,7 @@ if.end7: ; preds = %if.then5, %if.end4 } define i32 @testOneFieldGlobalS_byte_offset_wrong() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@testOneFieldGlobalS_byte_offset_wrong ; CHECK-SAME: () #[[ATTR2]] { ; CHECK-NEXT: entry: @@ -210,7 +210,7 @@ if.end7: ; preds = %if.then5, %if.end4 ret i32 %r.2 } ;. -; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree norecurse nosync nounwind readnone willreturn } -; CHECK: attributes #[[ATTR1]] = { nofree norecurse nounwind readnone willreturn } -; CHECK: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind readnone willreturn } +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree norecurse nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR1]] = { nofree norecurse nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn memory(none) } ;. diff --git a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll index a8d1f549dfed4..55e314d310d09 100644 --- a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll @@ -57,7 +57,7 @@ ; CHECK: @[[GLOBAL:[a-zA-Z0-9_$"\\.-]+]] = internal global [[STRUCT_STY:%.*]] zeroinitializer, align 8 ;. define void @write_arg(i32* %p, i32 %v) { -; CHECK: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@write_arg ; CHECK-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[P:%.*]], i32 [[V:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -103,7 +103,7 @@ declare i32 @random(...) ; return r; ; } define void @local_alloca_simplifiable_1(%struct.S* noalias sret(%struct.S) align 4 %agg.result) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@local_alloca_simplifiable_1 ; TUNIT-SAME: (%struct.S* noalias nocapture nofree nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 dereferenceable(24) [[AGG_RESULT:%.*]]) #[[ATTR1:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -135,7 +135,7 @@ define void @local_alloca_simplifiable_1(%struct.S* noalias sret(%struct.S) alig ; TUNIT-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 24, i8* nocapture nofree noundef nonnull align 4 dereferenceable(24) [[I12]]) #[[ATTR15]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@local_alloca_simplifiable_1 ; CGSCC-SAME: (%struct.S* noalias nocapture nofree nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 dereferenceable(24) [[AGG_RESULT:%.*]]) #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -538,7 +538,7 @@ for.end38: ; preds = %for.cond.cleanup30 ; } ; define i32 @local_alloca_simplifiable_3() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@local_alloca_simplifiable_3 ; CHECK-SAME: () #[[ATTR4:[0-9]+]] { ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 @@ -563,7 +563,7 @@ split: ; } ; define i32 @local_alloca_simplifiable_4() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@local_alloca_simplifiable_4 ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: ret i32 undef @@ -725,7 +725,7 @@ cond.end: ; preds = %cond.false, %cond.t ; } ; define void @static_global_simplifiable_1(%struct.S* noalias sret(%struct.S) align 4 %agg.result) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@static_global_simplifiable_1 ; TUNIT-SAME: (%struct.S* noalias nocapture nofree nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 dereferenceable(24) [[AGG_RESULT:%.*]]) #[[ATTR5:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -832,7 +832,7 @@ entry: ; } ; define void @static_global_simplifiable_2() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@static_global_simplifiable_2 ; TUNIT-SAME: () #[[ATTR5]] { ; TUNIT-NEXT: entry: @@ -1075,13 +1075,13 @@ for.end35: ; preds = %for.cond.cleanup27 ; return Flag3; ; } define i32 @static_global_simplifiable_3() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@static_global_simplifiable_3 ; TUNIT-SAME: () #[[ATTR5]] { ; TUNIT-NEXT: store i32 1, i32* @Flag3, align 4, !tbaa [[TBAA3]] ; TUNIT-NEXT: ret i32 1 ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@static_global_simplifiable_3 ; CGSCC-SAME: () #[[ATTR6:[0-9]+]] { ; CGSCC-NEXT: store i32 1, i32* @Flag3, align 4, !tbaa [[TBAA3]] @@ -1110,7 +1110,7 @@ define i32 @static_global_simplifiable_3() { ; } ; define void @noalias_arg_simplifiable_1(%struct.S* noalias sret(%struct.S) align 4 %agg.result, %struct.S* byval(%struct.S) align 8 %s) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@noalias_arg_simplifiable_1 ; TUNIT-SAME: (%struct.S* noalias nocapture nofree nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 dereferenceable(24) [[AGG_RESULT:%.*]], %struct.S* noalias nocapture nofree nonnull byval([[STRUCT_S]]) align 8 dereferenceable(24) [[S:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: @@ -1160,7 +1160,7 @@ define void @noalias_arg_simplifiable_1(%struct.S* noalias sret(%struct.S) align ; TUNIT-NEXT: store i32 [[ADD15]], i32* [[I316]], align 4, !tbaa [[TBAA14]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@noalias_arg_simplifiable_1 ; CGSCC-SAME: (%struct.S* noalias nocapture nofree nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 dereferenceable(24) [[AGG_RESULT:%.*]], %struct.S* noalias nocapture nofree nonnull byval([[STRUCT_S]]) align 8 dereferenceable(24) [[S:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: entry: @@ -1683,7 +1683,7 @@ join: ; preds = %right, %left ; We could simplify these if we separate accessed bins wrt. alignment (here mod 4). define i32 @unknown_access_mixed_simplifiable(i32 %arg1, i32 %arg2) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@unknown_access_mixed_simplifiable ; CHECK-SAME: (i32 [[ARG1:%.*]], i32 [[ARG2:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: entry: @@ -1721,7 +1721,7 @@ entry: ; The access to bc4b could go anywhere, nothing is simplifiable. define i32 @unknown_access_mixed_not_simplifiable(i32 %arg1, i32 %arg2, i32 %arg3) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@unknown_access_mixed_not_simplifiable ; CHECK-SAME: (i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 [[ARG3:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: entry: @@ -1777,14 +1777,14 @@ declare void @escape(i8*) ; } ; define i32 @global_not_simplifiable_1(i32 %cnd) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) ; TUNIT-LABEL: define {{[^@]+}}@global_not_simplifiable_1 ; TUNIT-SAME: (i32 [[CND:%.*]]) #[[ATTR6:[0-9]+]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: [[I:%.*]] = load i32, i32* @Flag0, align 4, !tbaa [[TBAA3]] ; TUNIT-NEXT: ret i32 [[I]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) ; CGSCC-LABEL: define {{[^@]+}}@global_not_simplifiable_1 ; CGSCC-SAME: (i32 [[CND:%.*]]) #[[ATTR7:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -1891,13 +1891,13 @@ entry: ret i32 %i } define void @static_global_not_simplifiable_2_helper() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@static_global_not_simplifiable_2_helper ; TUNIT-SAME: () #[[ATTR5]] { ; TUNIT-NEXT: store i32 2, i32* @Flag4, align 4, !tbaa [[TBAA3]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@static_global_not_simplifiable_2_helper ; CGSCC-SAME: () #[[ATTR6]] { ; CGSCC-NEXT: store i32 2, i32* @Flag4, align 4, !tbaa [[TBAA3]] @@ -1964,13 +1964,13 @@ define i32 @write_read_global() { ret i32 %l } define void @write_global() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@write_global ; TUNIT-SAME: () #[[ATTR5]] { ; TUNIT-NEXT: store i32 7, i32* @Gint2, align 4 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@write_global ; CGSCC-SAME: () #[[ATTR6]] { ; CGSCC-NEXT: store i32 7, i32* @Gint2, align 4 @@ -1980,13 +1980,13 @@ define void @write_global() { ret void } define i32 @read_global() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) ; TUNIT-LABEL: define {{[^@]+}}@read_global ; TUNIT-SAME: () #[[ATTR6]] { ; TUNIT-NEXT: [[L:%.*]] = load i32, i32* @Gint2, align 4 ; TUNIT-NEXT: ret i32 [[L]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) ; CGSCC-LABEL: define {{[^@]+}}@read_global ; CGSCC-SAME: () #[[ATTR7]] { ; CGSCC-NEXT: [[L:%.*]] = load i32, i32* @Gint2, align 4 @@ -1996,12 +1996,12 @@ define i32 @read_global() { ret i32 %l } define i32 @write_read_static_global() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@write_read_static_global ; TUNIT-SAME: () #[[ATTR5]] { ; TUNIT-NEXT: ret i32 7 ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@write_read_static_global ; CGSCC-SAME: () #[[ATTR6]] { ; CGSCC-NEXT: ret i32 7 @@ -2011,13 +2011,13 @@ define i32 @write_read_static_global() { ret i32 %l } define void @write_static_global() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@write_static_global ; TUNIT-SAME: () #[[ATTR5]] { ; TUNIT-NEXT: store i32 7, i32* @Gstatic_int2, align 4 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@write_static_global ; CGSCC-SAME: () #[[ATTR6]] { ; CGSCC-NEXT: store i32 7, i32* @Gstatic_int2, align 4 @@ -2027,13 +2027,13 @@ define void @write_static_global() { ret void } define i32 @read_static_global() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) ; TUNIT-LABEL: define {{[^@]+}}@read_static_global ; TUNIT-SAME: () #[[ATTR6]] { ; TUNIT-NEXT: [[L:%.*]] = load i32, i32* @Gstatic_int2, align 4 ; TUNIT-NEXT: ret i32 [[L]] ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(read) ; CGSCC-LABEL: define {{[^@]+}}@read_static_global ; CGSCC-SAME: () #[[ATTR7]] { ; CGSCC-NEXT: [[L:%.*]] = load i32, i32* @Gstatic_int2, align 4 @@ -2043,12 +2043,12 @@ define i32 @read_static_global() { ret i32 %l } define i32 @write_read_static_undef_global() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@write_read_static_undef_global ; TUNIT-SAME: () #[[ATTR5]] { ; TUNIT-NEXT: ret i32 7 ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@write_read_static_undef_global ; CGSCC-SAME: () #[[ATTR6]] { ; CGSCC-NEXT: ret i32 7 @@ -2058,12 +2058,12 @@ define i32 @write_read_static_undef_global() { ret i32 %l } define void @write_static_undef_global() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@write_static_undef_global ; TUNIT-SAME: () #[[ATTR5]] { ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@write_static_undef_global ; CGSCC-SAME: () #[[ATTR6]] { ; CGSCC-NEXT: store i32 7, i32* @Gstatic_undef_int2, align 4 @@ -2073,7 +2073,7 @@ define void @write_static_undef_global() { ret void } define i32 @read_static_undef_global() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@read_static_undef_global ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: ret i32 7 @@ -2083,7 +2083,7 @@ define i32 @read_static_undef_global() { } define i32 @single_read_of_static_global() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@single_read_of_static_global ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: ret i32 0 @@ -2223,7 +2223,6 @@ define i8 @phi_no_store_2() { ; TUNIT: loop: ; TUNIT-NEXT: [[P:%.*]] = phi i8* [ bitcast (i32* @a2 to i8*), [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ] ; TUNIT-NEXT: [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ] -; TUNIT-NEXT: store i8 1, i8* [[P]], align 2 ; TUNIT-NEXT: [[G]] = getelementptr i8, i8* bitcast (i32* @a2 to i8*), i64 2 ; TUNIT-NEXT: [[O]] = add nsw i8 [[I]], 1 ; TUNIT-NEXT: [[C:%.*]] = icmp eq i8 [[O]], 7 @@ -2242,7 +2241,6 @@ define i8 @phi_no_store_2() { ; CGSCC: loop: ; CGSCC-NEXT: [[P:%.*]] = phi i8* [ bitcast (i32* @a2 to i8*), [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ] ; CGSCC-NEXT: [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ] -; CGSCC-NEXT: store i8 1, i8* [[P]], align 2 ; CGSCC-NEXT: [[G]] = getelementptr i8, i8* bitcast (i32* @a2 to i8*), i64 2 ; CGSCC-NEXT: [[O]] = add nsw i8 [[I]], 1 ; CGSCC-NEXT: [[C:%.*]] = icmp eq i8 [[O]], 7 @@ -2283,7 +2281,6 @@ define i8 @phi_no_store_3() { ; TUNIT: loop: ; TUNIT-NEXT: [[P:%.*]] = phi i8* [ bitcast (i32* @a3 to i8*), [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ] ; TUNIT-NEXT: [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ] -; TUNIT-NEXT: store i8 1, i8* [[P]], align 2 ; TUNIT-NEXT: [[G]] = getelementptr i8, i8* bitcast (i32* @a3 to i8*), i64 2 ; TUNIT-NEXT: [[O]] = add nsw i8 [[I]], 1 ; TUNIT-NEXT: [[C:%.*]] = icmp eq i8 [[O]], 7 @@ -2305,7 +2302,6 @@ define i8 @phi_no_store_3() { ; CGSCC: loop: ; CGSCC-NEXT: [[P:%.*]] = phi i8* [ bitcast (i32* @a3 to i8*), [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ] ; CGSCC-NEXT: [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ] -; CGSCC-NEXT: store i8 1, i8* [[P]], align 2 ; CGSCC-NEXT: [[G]] = getelementptr i8, i8* bitcast (i32* @a3 to i8*), i64 2 ; CGSCC-NEXT: [[O]] = add nsw i8 [[I]], 1 ; CGSCC-NEXT: [[C:%.*]] = icmp eq i8 [[O]], 7 @@ -2387,7 +2383,7 @@ define i64 @cast_and_load_2() { define void @recursive_load_store(i64 %N, i32 %v) { ; -; TUNIT: Function Attrs: nofree norecurse nosync nounwind writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind memory(write) ; TUNIT-LABEL: define {{[^@]+}}@recursive_load_store ; TUNIT-SAME: (i64 [[N:%.*]], i32 [[V:%.*]]) #[[ATTR7:[0-9]+]] { ; TUNIT-NEXT: entry: @@ -2402,7 +2398,7 @@ define void @recursive_load_store(i64 %N, i32 %v) { ; TUNIT: for.end: ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree norecurse nosync nounwind writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind memory(write) ; CGSCC-LABEL: define {{[^@]+}}@recursive_load_store ; CGSCC-SAME: (i64 [[N:%.*]], i32 [[V:%.*]]) #[[ATTR8:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -2861,7 +2857,7 @@ entry: ; Make sure the access %1 is not forwarded to the loads %2 and %3 as the indices are ; varying and the accesses thus not "exact". This used to simplify %cmp12 to true. define hidden void @no_propagation_of_unknown_index_access(i32* %in, i32* %out, i32 %idx) #0 { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@no_propagation_of_unknown_index_access ; TUNIT-SAME: (i32* nocapture nofree readonly [[IN:%.*]], i32* nocapture nofree writeonly [[OUT:%.*]], i32 [[IDX:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: @@ -2904,7 +2900,7 @@ define hidden void @no_propagation_of_unknown_index_access(i32* %in, i32* %out, ; TUNIT-NEXT: [[INC16]] = add nsw i32 [[I3_0]], 1 ; TUNIT-NEXT: br label [[FOR_COND4]], !llvm.loop [[TBAA12]] ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@no_propagation_of_unknown_index_access ; CGSCC-SAME: (i32* nocapture nofree readonly [[IN:%.*]], i32* nocapture nofree writeonly [[OUT:%.*]], i32 [[IDX:%.*]]) #[[ATTR13:[0-9]+]] { ; CGSCC-NEXT: entry: @@ -2996,7 +2992,7 @@ for.body7: ; preds = %for.cond4 ; Ensure we do not return true. define internal i1 @alloca_non_unique(i32* %p, i32 %in, i1 %c) { -; TUNIT: Function Attrs: argmemonly nofree nosync nounwind +; TUNIT: Function Attrs: nofree nosync nounwind memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@alloca_non_unique ; TUNIT-SAME: (i32* nocapture nofree nonnull readonly align 4 [[P:%.*]], i32 [[IN:%.*]], i1 [[C:%.*]]) #[[ATTR12:[0-9]+]] { ; TUNIT-NEXT: [[A:%.*]] = alloca i32, align 4 @@ -3010,7 +3006,7 @@ define internal i1 @alloca_non_unique(i32* %p, i32 %in, i1 %c) { ; TUNIT-NEXT: [[CMP:%.*]] = icmp eq i32 [[IN]], [[L]] ; TUNIT-NEXT: ret i1 [[CMP]] ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind +; CGSCC: Function Attrs: nofree nosync nounwind memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@alloca_non_unique ; CGSCC-SAME: (i32* nocapture nofree nonnull readonly align 4 [[P:%.*]], i32 [[IN:%.*]], i1 [[C:%.*]]) #[[ATTR14:[0-9]+]] { ; CGSCC-NEXT: [[A:%.*]] = alloca i32, align 4 @@ -3038,13 +3034,13 @@ f: ; Ensure we do not return true. define i1 @alloca_non_unique_caller(i32 %in, i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone +; TUNIT: Function Attrs: nofree norecurse nosync nounwind memory(none) ; TUNIT-LABEL: define {{[^@]+}}@alloca_non_unique_caller ; TUNIT-SAME: (i32 [[IN:%.*]], i1 [[C:%.*]]) #[[ATTR13:[0-9]+]] { ; TUNIT-NEXT: [[R:%.*]] = call i1 @alloca_non_unique(i32* undef, i32 [[IN]], i1 [[C]]) #[[ATTR20]] ; TUNIT-NEXT: ret i1 [[R]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone +; CGSCC: Function Attrs: nofree nosync nounwind memory(none) ; CGSCC-LABEL: define {{[^@]+}}@alloca_non_unique_caller ; CGSCC-SAME: (i32 [[IN:%.*]], i1 [[C:%.*]]) #[[ATTR15:[0-9]+]] { ; CGSCC-NEXT: [[R:%.*]] = call i1 @alloca_non_unique(i32* undef, i32 [[IN]], i1 [[C]]) #[[ATTR22]] @@ -3056,22 +3052,22 @@ define i1 @alloca_non_unique_caller(i32 %in, i1 %c) { ; Ensure we do not return %bad or %l, but %sel define i32 @scope_value_traversal(i32 %bad, i1 %c, i1 %c2) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@scope_value_traversal ; TUNIT-SAME: (i32 [[BAD:%.*]], i1 [[C:%.*]], i1 [[C2:%.*]]) #[[ATTR4]] { ; TUNIT-NEXT: [[A:%.*]] = alloca i32, align 4 ; TUNIT-NEXT: store i32 [[BAD]], i32* [[A]], align 4 -; TUNIT-NEXT: call void @scope_value_traversal_helper(i32* noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]], i1 [[C2]]) #[[ATTR21:[0-9]+]] +; TUNIT-NEXT: call void @scope_value_traversal_helper(i32* noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]], i1 [[C2]]) #[[ATTR16]] ; TUNIT-NEXT: [[L:%.*]] = load i32, i32* [[A]], align 4 ; TUNIT-NEXT: [[SEL:%.*]] = select i1 [[C]], i32 [[BAD]], i32 [[L]] ; TUNIT-NEXT: ret i32 [[SEL]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@scope_value_traversal ; CGSCC-SAME: (i32 [[BAD:%.*]], i1 [[C:%.*]], i1 [[C2:%.*]]) #[[ATTR16:[0-9]+]] { ; CGSCC-NEXT: [[A:%.*]] = alloca i32, align 4 ; CGSCC-NEXT: store i32 [[BAD]], i32* [[A]], align 4 -; CGSCC-NEXT: call void @scope_value_traversal_helper(i32* noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]], i1 [[C2]]) #[[ATTR24:[0-9]+]] +; CGSCC-NEXT: call void @scope_value_traversal_helper(i32* noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]], i1 [[C2]]) #[[ATTR19]] ; CGSCC-NEXT: [[L:%.*]] = load i32, i32* [[A]], align 4 ; CGSCC-NEXT: [[SEL:%.*]] = select i1 [[C]], i32 [[BAD]], i32 [[L]] ; CGSCC-NEXT: ret i32 [[SEL]] @@ -3085,7 +3081,7 @@ define i32 @scope_value_traversal(i32 %bad, i1 %c, i1 %c2) { } define void @scope_value_traversal_helper(i32* %a, i1 %c) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; TUNIT-LABEL: define {{[^@]+}}@scope_value_traversal_helper ; TUNIT-SAME: (i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: [[L:%.*]] = load i32, i32* [[A]], align 4 @@ -3093,7 +3089,7 @@ define void @scope_value_traversal_helper(i32* %a, i1 %c) { ; TUNIT-NEXT: store i32 [[SEL]], i32* [[A]], align 4 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; CGSCC-LABEL: define {{[^@]+}}@scope_value_traversal_helper ; CGSCC-SAME: (i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i1 [[C:%.*]]) #[[ATTR13]] { ; CGSCC-NEXT: [[L:%.*]] = load i32, i32* [[A]], align 4 @@ -3143,54 +3139,52 @@ define void @scope_value_traversal_helper(i32* %a, i1 %c) { !30 = distinct !{!30, !17} !31 = distinct !{!31, !17} ;. -; TUNIT: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR1]] = { argmemonly nofree norecurse nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR2:[0-9]+]] = { argmemonly nocallback nofree nosync nounwind willreturn } +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } +; TUNIT: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } ; TUNIT: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR5]] = { nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR6]] = { nofree norecurse nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR7]] = { nofree norecurse nosync nounwind writeonly } +; TUNIT: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR5]] = { nofree norecurse nosync nounwind willreturn memory(write) } +; TUNIT: attributes #[[ATTR6]] = { nofree norecurse nosync nounwind willreturn memory(read) } +; TUNIT: attributes #[[ATTR7]] = { nofree norecurse nosync nounwind memory(write) } ; TUNIT: attributes #[[ATTR8:[0-9]+]] = { allockind("alloc,uninitialized") allocsize(0) "alloc-family"="malloc" } ; TUNIT: attributes #[[ATTR9:[0-9]+]] = { allockind("free") "alloc-family"="malloc" } ; TUNIT: attributes #[[ATTR10:[0-9]+]] = { allockind("alloc,zeroed") allocsize(0,1) "alloc-family"="malloc" } ; TUNIT: attributes #[[ATTR11]] = { nofree norecurse nosync nounwind willreturn uwtable } -; TUNIT: attributes #[[ATTR12]] = { argmemonly nofree nosync nounwind } -; TUNIT: attributes #[[ATTR13]] = { nofree norecurse nosync nounwind readnone } -; TUNIT: attributes #[[ATTR14:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn writeonly } +; TUNIT: attributes #[[ATTR12]] = { nofree nosync nounwind memory(argmem: readwrite) } +; TUNIT: attributes #[[ATTR13]] = { nofree norecurse nosync nounwind memory(none) } +; TUNIT: attributes #[[ATTR14:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } ; TUNIT: attributes #[[ATTR15]] = { willreturn } -; TUNIT: attributes #[[ATTR16]] = { nofree nosync nounwind willreturn writeonly } +; TUNIT: attributes #[[ATTR16]] = { nofree nosync nounwind willreturn } ; TUNIT: attributes #[[ATTR17]] = { nocallback } ; TUNIT: attributes #[[ATTR18]] = { norecurse } ; TUNIT: attributes #[[ATTR19]] = { nounwind } ; TUNIT: attributes #[[ATTR20]] = { nofree nosync nounwind } -; TUNIT: attributes #[[ATTR21]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR1]] = { argmemonly nofree nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR2:[0-9]+]] = { argmemonly nocallback nofree nosync nounwind willreturn } +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } ; CGSCC: attributes #[[ATTR3]] = { nofree nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind willreturn memory(none) } ; CGSCC: attributes #[[ATTR5]] = { nofree norecurse nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR6]] = { nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR7]] = { nofree norecurse nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR8]] = { nofree norecurse nosync nounwind writeonly } +; CGSCC: attributes #[[ATTR6]] = { nofree norecurse nosync nounwind willreturn memory(write) } +; CGSCC: attributes #[[ATTR7]] = { nofree norecurse nosync nounwind willreturn memory(read) } +; CGSCC: attributes #[[ATTR8]] = { nofree norecurse nosync nounwind memory(write) } ; CGSCC: attributes #[[ATTR9:[0-9]+]] = { allockind("alloc,uninitialized") allocsize(0) "alloc-family"="malloc" } ; CGSCC: attributes #[[ATTR10:[0-9]+]] = { allockind("free") "alloc-family"="malloc" } ; CGSCC: attributes #[[ATTR11:[0-9]+]] = { allockind("alloc,zeroed") allocsize(0,1) "alloc-family"="malloc" } ; CGSCC: attributes #[[ATTR12]] = { nofree norecurse nosync nounwind willreturn uwtable } -; CGSCC: attributes #[[ATTR13]] = { argmemonly nofree norecurse nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR14]] = { argmemonly nofree nosync nounwind } -; CGSCC: attributes #[[ATTR15]] = { nofree nosync nounwind readnone } -; CGSCC: attributes #[[ATTR16]] = { nofree nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR17:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn writeonly } +; CGSCC: attributes #[[ATTR13]] = { nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR14]] = { nofree nosync nounwind memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR15]] = { nofree nosync nounwind memory(none) } +; CGSCC: attributes #[[ATTR16]] = { nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR17:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } ; CGSCC: attributes #[[ATTR18]] = { willreturn } -; CGSCC: attributes #[[ATTR19]] = { nounwind willreturn writeonly } +; CGSCC: attributes #[[ATTR19]] = { nounwind willreturn } ; CGSCC: attributes #[[ATTR20]] = { nocallback } ; CGSCC: attributes #[[ATTR21]] = { norecurse } ; CGSCC: attributes #[[ATTR22]] = { nounwind } ; CGSCC: attributes #[[ATTR23]] = { nofree nosync nounwind } -; CGSCC: attributes #[[ATTR24]] = { nounwind willreturn } ;. ; TUNIT: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} ; TUNIT: [[META1:![0-9]+]] = !{i32 7, !"uwtable", i32 1} diff --git a/llvm/test/Transforms/Attributor/value-simplify.ll b/llvm/test/Transforms/Attributor/value-simplify.ll index 7c79c4e3d059c..490b5bb7e876c 100644 --- a/llvm/test/Transforms/Attributor/value-simplify.ll +++ b/llvm/test/Transforms/Attributor/value-simplify.ll @@ -15,7 +15,7 @@ declare i8* @llvm.call.preallocated.arg(token, i32) ; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = internal constant { [2 x i8*] } { [2 x i8*] [i8* bitcast (void (i8***)* @f1 to i8*), i8* bitcast (void (i1 (i8*)*)* @f2 to i8*)] } ;. define internal i32 addrspace(3)* @const_ptr_return_as3() { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@const_ptr_return_as3 ; CGSCC-SAME: () #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: ret i32 addrspace(3)* @ConstAS3Ptr @@ -23,7 +23,7 @@ define internal i32 addrspace(3)* @const_ptr_return_as3() { ret i32 addrspace(3)* @ConstAS3Ptr } define internal i32* @const_ptr_return() { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@const_ptr_return ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: ret i32* addrspacecast (i32 addrspace(3)* @ConstAS3Ptr to i32*) @@ -52,7 +52,7 @@ define void @test1_helper() { ; TEST 2 : Simplify return value define i32 @return0() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@return0 ; CHECK-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: ret i32 0 @@ -61,7 +61,7 @@ define i32 @return0() { } define i32 @return1() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@return1 ; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: ret i32 1 @@ -70,7 +70,7 @@ define i32 @return1() { } define i32 @test2_1(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@test2_1 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: br i1 [[C]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] @@ -83,7 +83,7 @@ define i32 @test2_1(i1 %c) { ; TUNIT-NEXT: [[RET:%.*]] = phi i32 [ [[RET0]], [[IF_TRUE]] ], [ 1, [[IF_FALSE]] ] ; TUNIT-NEXT: ret i32 1 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test2_1 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR2:[0-9]+]] { ; CGSCC-NEXT: br i1 [[C]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] @@ -116,12 +116,12 @@ end: define i32 @test2_2(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@test2_2 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: ret i32 1 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test2_2 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[RET:%.*]] = tail call noundef i32 @test2_1(i1 [[C]]) #[[ATTR12]] @@ -229,7 +229,7 @@ end: } define i32 @ipccp1(i32 %a) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@ipccp1 ; CHECK-SAME: (i32 returned [[A:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: br i1 true, label [[T:%.*]], label [[F:%.*]] @@ -247,7 +247,7 @@ f: } define internal i1 @ipccp2i(i1 %a) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@ipccp2i ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: br label [[T:%.*]] @@ -265,12 +265,12 @@ f: } define i1 @ipccp2() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@ipccp2 ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i1 true ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@ipccp2 ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: [[R:%.*]] = call noundef i1 @ipccp2i() #[[ATTR12]] @@ -281,7 +281,7 @@ define i1 @ipccp2() { } define internal i1 @ipccp2ib(i1 %a) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@ipccp2ib ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: br label [[T:%.*]] @@ -299,12 +299,12 @@ f: } define i1 @ipccp2b() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@ipccp2b ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i1 true ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@ipccp2b ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: [[R:%.*]] = call noundef i1 @ipccp2ib() #[[ATTR12]] @@ -315,7 +315,7 @@ define i1 @ipccp2b() { } define internal i32 @ipccp3i(i32 %a) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@ipccp3i ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: br label [[T:%.*]] @@ -334,12 +334,12 @@ f: } define i32 @ipccp3() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@ipccp3 ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i32 7 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@ipccp3 ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: [[R:%.*]] = call noundef i32 @ipccp3i() #[[ATTR12]] @@ -350,7 +350,7 @@ define i32 @ipccp3() { } define internal i32 @ipccp4ia(i1 %c) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@ipccp4ia ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] @@ -366,7 +366,7 @@ f: ret i32 1 } define internal i32 @ipccp4ib(i32 %a) { -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@ipccp4ib ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: br label [[T:%.*]] @@ -386,7 +386,7 @@ f: } define i32 @ipccp4(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@ipccp4 ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] @@ -395,7 +395,7 @@ define i32 @ipccp4(i1 %c) { ; TUNIT: f: ; TUNIT-NEXT: ret i32 0 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@ipccp4 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] @@ -417,7 +417,7 @@ f: ; Do not touch complicated arguments (for now) %struct.X = type { i8* } define internal i32* @test_inalloca(i32* inalloca(i32) %a) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test_inalloca ; CHECK-SAME: (i32* noalias nofree nonnull returned writeonly inalloca(i32) dereferenceable(4) "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: ret i32* [[A]] @@ -425,13 +425,13 @@ define internal i32* @test_inalloca(i32* inalloca(i32) %a) { ret i32* %a } define i32* @complicated_args_inalloca(i32* %arg) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@complicated_args_inalloca ; TUNIT-SAME: (i32* nofree readnone "no-capture-maybe-returned" [[ARG:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: [[CALL:%.*]] = call nonnull dereferenceable(4) i32* @test_inalloca(i32* noalias nofree writeonly inalloca(i32) "no-capture-maybe-returned" [[ARG]]) #[[ATTR9:[0-9]+]] ; TUNIT-NEXT: ret i32* [[CALL]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@complicated_args_inalloca ; CGSCC-SAME: (i32* nofree noundef nonnull readnone dereferenceable(4) [[ARG:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[CALL:%.*]] = call noalias nonnull dereferenceable(4) i32* @test_inalloca(i32* noalias nofree noundef nonnull writeonly inalloca(i32) dereferenceable(4) [[ARG]]) #[[ATTR12]] @@ -442,7 +442,7 @@ define i32* @complicated_args_inalloca(i32* %arg) { } define internal i32* @test_preallocated(i32* preallocated(i32) %a) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test_preallocated ; CHECK-SAME: (i32* noalias nofree noundef nonnull returned writeonly preallocated(i32) align 4294967296 dereferenceable(4) "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: ret i32* [[A]] @@ -460,7 +460,8 @@ define i32* @complicated_args_preallocated() { ; CGSCC: Function Attrs: nofree nosync nounwind willreturn ; CGSCC-LABEL: define {{[^@]+}}@complicated_args_preallocated ; CGSCC-SAME: () #[[ATTR3:[0-9]+]] { -; CGSCC-NEXT: [[C:%.*]] = call token @llvm.call.preallocated.setup(i32 noundef 1) #[[ATTR13:[0-9]+]] +; CGSCC-NEXT: [[C:%.*]] = call token @llvm.call.preallocated.setup(i32 noundef 1) #[[ATTR12]] +; CGSCC-NEXT: [[CALL:%.*]] = call i32* @test_preallocated(i32* noalias nocapture nofree noundef writeonly preallocated(i32) align 4294967296 null) #[[ATTR13:[0-9]+]] [ "preallocated"(token [[C]]) ] ; CGSCC-NEXT: ret i32* null ; %c = call token @llvm.call.preallocated.setup(i32 1) @@ -470,13 +471,13 @@ define i32* @complicated_args_preallocated() { define internal void @test_sret(%struct.X* sret(%struct.X) %a, %struct.X** %b) { ; -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@test_sret ; TUNIT-SAME: (%struct.X* noalias nofree noundef nonnull writeonly sret([[STRUCT_X:%.*]]) align 4294967296 dereferenceable(8) [[A:%.*]], %struct.X** nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[B:%.*]]) #[[ATTR3:[0-9]+]] { ; TUNIT-NEXT: store %struct.X* [[A]], %struct.X** [[B]], align 8 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@test_sret ; CGSCC-SAME: (%struct.X* noalias nofree noundef nonnull writeonly sret([[STRUCT_X:%.*]]) align 4294967296 dereferenceable(8) [[A:%.*]], %struct.X** nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[B:%.*]]) #[[ATTR4:[0-9]+]] { ; CGSCC-NEXT: store %struct.X* [[A]], %struct.X** [[B]], align 8 @@ -489,13 +490,13 @@ define internal void @test_sret(%struct.X* sret(%struct.X) %a, %struct.X** %b) { define void @complicated_args_sret(%struct.X** %b) { ; ; -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@complicated_args_sret ; TUNIT-SAME: (%struct.X** nocapture nofree writeonly [[B:%.*]]) #[[ATTR3]] { -; TUNIT-NEXT: call void @test_sret(%struct.X* noalias nocapture nofree noundef writeonly sret([[STRUCT_X:%.*]]) align 4294967296 null, %struct.X** nocapture nofree writeonly align 8 [[B]]) #[[ATTR11:[0-9]+]] +; TUNIT-NEXT: call void @test_sret(%struct.X* noalias nocapture nofree noundef writeonly sret([[STRUCT_X:%.*]]) align 4294967296 null, %struct.X** nocapture nofree writeonly align 8 [[B]]) #[[ATTR9]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@complicated_args_sret ; CGSCC-SAME: (%struct.X** nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[B:%.*]]) #[[ATTR5:[0-9]+]] { ; CGSCC-NEXT: unreachable @@ -505,7 +506,7 @@ define void @complicated_args_sret(%struct.X** %b) { } define internal %struct.X* @test_nest(%struct.X* nest %a) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test_nest ; CGSCC-SAME: (%struct.X* nest noalias nocapture nofree readnone align 4294967296 [[A:%.*]]) #[[ATTR1]] { ; CGSCC-NEXT: ret %struct.X* null @@ -513,12 +514,12 @@ define internal %struct.X* @test_nest(%struct.X* nest %a) { ret %struct.X* %a } define %struct.X* @complicated_args_nest() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@complicated_args_nest ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret %struct.X* null ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@complicated_args_nest ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: [[CALL:%.*]] = call noalias noundef align 4294967296 %struct.X* @test_nest(%struct.X* noalias nocapture nofree noundef readnone align 4294967296 null) #[[ATTR12]] @@ -530,7 +531,7 @@ define %struct.X* @complicated_args_nest() { @S = external global %struct.X define internal void @test_byval(%struct.X* byval(%struct.X) %a) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@test_byval ; TUNIT-SAME: (i8* [[TMP0:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: [[A_PRIV:%.*]] = alloca [[STRUCT_X:%.*]], align 8 @@ -540,7 +541,7 @@ define internal void @test_byval(%struct.X* byval(%struct.X) %a) { ; TUNIT-NEXT: store i8* null, i8** [[G0]], align 8 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@test_byval ; CGSCC-SAME: (i8* [[TMP0:%.*]]) #[[ATTR4]] { ; CGSCC-NEXT: [[A_PRIV:%.*]] = alloca [[STRUCT_X:%.*]], align 8 @@ -555,19 +556,19 @@ define internal void @test_byval(%struct.X* byval(%struct.X) %a) { ret void } define void @complicated_args_byval() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@complicated_args_byval ; TUNIT-SAME: () #[[ATTR4:[0-9]+]] { ; TUNIT-NEXT: [[S_CAST:%.*]] = bitcast %struct.X* @S to i8** ; TUNIT-NEXT: [[TMP1:%.*]] = load i8*, i8** [[S_CAST]], align 8 -; TUNIT-NEXT: call void @test_byval(i8* [[TMP1]]) #[[ATTR11]] +; TUNIT-NEXT: call void @test_byval(i8* [[TMP1]]) #[[ATTR9]] ; TUNIT-NEXT: ret void ; ; CGSCC: Function Attrs: nofree nosync nounwind willreturn ; CGSCC-LABEL: define {{[^@]+}}@complicated_args_byval ; CGSCC-SAME: () #[[ATTR3]] { ; CGSCC-NEXT: [[TMP1:%.*]] = load i8*, i8** getelementptr inbounds ([[STRUCT_X:%.*]], %struct.X* @S, i32 0, i32 0), align 8 -; CGSCC-NEXT: call void @test_byval(i8* nofree writeonly [[TMP1]]) #[[ATTR14:[0-9]+]] +; CGSCC-NEXT: call void @test_byval(i8* nofree writeonly [[TMP1]]) #[[ATTR13]] ; CGSCC-NEXT: ret void ; call void @test_byval(%struct.X* byval(%struct.X) @S) @@ -610,7 +611,7 @@ define i8* @complicated_args_byval2() { } define void @fixpoint_changed(i32* %p) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@fixpoint_changed ; TUNIT-SAME: (i32* nocapture nofree writeonly [[P:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: entry: @@ -633,7 +634,7 @@ define void @fixpoint_changed(i32* %p) { ; TUNIT: for.end: ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@fixpoint_changed ; CGSCC-SAME: (i32* nocapture nofree writeonly [[P:%.*]]) #[[ATTR4]] { ; CGSCC-NEXT: entry: @@ -684,12 +685,12 @@ for.end: ; Check we merge undef and a constant properly. define i8 @caller0() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@caller0 ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i8 49 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@caller0 ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: [[C:%.*]] = call noundef i8 @callee() #[[ATTR12]] @@ -699,12 +700,12 @@ define i8 @caller0() { ret i8 %c } define i8 @caller1() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@caller1 ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i8 49 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@caller1 ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: [[C:%.*]] = call noundef i8 @callee() #[[ATTR12]] @@ -714,12 +715,12 @@ define i8 @caller1() { ret i8 %c } define i8 @caller2() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@caller2 ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i8 49 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@caller2 ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: [[C:%.*]] = call noundef i8 @callee() #[[ATTR12]] @@ -729,12 +730,12 @@ define i8 @caller2() { ret i8 %c } define i8 @caller_middle() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@caller_middle ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i8 49 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@caller_middle ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: [[C:%.*]] = call noundef i8 @callee() #[[ATTR12]] @@ -744,12 +745,12 @@ define i8 @caller_middle() { ret i8 %c } define i8 @caller3() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@caller3 ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i8 49 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@caller3 ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: [[C:%.*]] = call noundef i8 @callee() #[[ATTR12]] @@ -759,12 +760,12 @@ define i8 @caller3() { ret i8 %c } define i8 @caller4() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@caller4 ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i8 49 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@caller4 ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: [[C:%.*]] = call noundef i8 @callee() #[[ATTR12]] @@ -774,7 +775,7 @@ define i8 @caller4() { ret i8 %c } define internal i8 @callee(i8 %a) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@callee ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: ret i8 49 @@ -784,13 +785,13 @@ define internal i8 @callee(i8 %a) { } define void @user_as3() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@user_as3 ; TUNIT-SAME: () #[[ATTR4]] { ; TUNIT-NEXT: store i32 0, i32 addrspace(3)* @ConstAS3Ptr, align 4 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@user_as3 ; CGSCC-SAME: () #[[ATTR6:[0-9]+]] { ; CGSCC-NEXT: [[CALL:%.*]] = call fastcc align 4 i32 addrspace(3)* @const_ptr_return_as3() #[[ATTR12]] @@ -802,13 +803,13 @@ define void @user_as3() { ret void } define void @user() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(write) ; TUNIT-LABEL: define {{[^@]+}}@user ; TUNIT-SAME: () #[[ATTR4]] { ; TUNIT-NEXT: store i32 0, i32* addrspacecast (i32 addrspace(3)* @ConstAS3Ptr to i32*), align 4 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(write) ; CGSCC-LABEL: define {{[^@]+}}@user ; CGSCC-SAME: () #[[ATTR6]] { ; CGSCC-NEXT: [[CALL:%.*]] = call fastcc align 4 i32* @const_ptr_return() #[[ATTR12]] @@ -822,12 +823,12 @@ define void @user() { define i1 @test_merge_with_undef_values_ptr(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@test_merge_with_undef_values_ptr ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: ret i1 false ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test_merge_with_undef_values_ptr ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[R1:%.*]] = call noundef i1 @undef_then_null(i1 [[C]]) #[[ATTR12]] @@ -837,7 +838,7 @@ define i1 @test_merge_with_undef_values_ptr(i1 %c) { ret i1 %r1 } define internal i1 @undef_then_null(i1 %c, i32* %i32Aptr, i32* %i32Bptr) { -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@undef_then_null ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: br i1 [[C]], label [[A:%.*]], label [[B:%.*]] @@ -858,12 +859,12 @@ b: } define i1 @test_merge_with_undef_values(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@test_merge_with_undef_values ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: ret i1 false ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test_merge_with_undef_values ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[R1:%.*]] = call noundef i1 @undef_then_1(i1 [[C]]) #[[ATTR12]] @@ -874,7 +875,7 @@ define i1 @test_merge_with_undef_values(i1 %c) { } define internal i1 @undef_then_1(i1 %c, i32 %i32A, i32 %i32B) { ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@undef_then_1 ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: br i1 [[C]], label [[A:%.*]], label [[B:%.*]] @@ -895,12 +896,12 @@ b: } define i32 @test_select(i32 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@test_select ; TUNIT-SAME: (i32 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: ret i32 42 ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test_select ; CGSCC-SAME: (i32 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: [[CALL:%.*]] = call noundef i32 @select() #[[ATTR12]] @@ -911,7 +912,7 @@ define i32 @test_select(i32 %c) { } define internal i32 @select(i1 %a, i32 %b, i32 %c) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@select ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: ret i32 42 @@ -921,7 +922,7 @@ define internal i32 @select(i1 %a, i32 %b, i32 %c) { } define i1 @icmp() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@icmp ; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: ret i1 true @@ -973,14 +974,14 @@ define internal void @unknown_calle_arg_is_undef(void (i32)* %fn, i32 %arg) { @g = internal constant { [2 x i8*] } { [2 x i8*] [i8* bitcast (void (i8***)* @f1 to i8*), i8* bitcast (void (i1 (i8*)*)* @f2 to i8*)] } define internal void @f1(i8*** %a) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; TUNIT-LABEL: define {{[^@]+}}@f1 ; TUNIT-SAME: (i8*** nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR3]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: store i8** getelementptr inbounds ({ [2 x i8*] }, { [2 x i8*] }* @g, i32 0, i32 0, i32 0), i8*** [[A]], align 8 ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CGSCC-LABEL: define {{[^@]+}}@f1 ; CGSCC-SAME: (i8*** nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR4]] { ; CGSCC-NEXT: entry: @@ -1044,12 +1045,12 @@ entry: define i1 @test_cmp_null_after_cast() { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@test_cmp_null_after_cast ; TUNIT-SAME: () #[[ATTR1]] { ; TUNIT-NEXT: ret i1 true ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test_cmp_null_after_cast ; CGSCC-SAME: () #[[ATTR2]] { ; CGSCC-NEXT: [[C:%.*]] = call noundef i1 @cmp_null_after_cast() #[[ATTR12]] @@ -1059,7 +1060,7 @@ define i1 @test_cmp_null_after_cast() { ret i1 %c } define internal i1 @cmp_null_after_cast(i32 %a, i8 %b) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@cmp_null_after_cast ; CGSCC-SAME: () #[[ATTR1]] { ; CGSCC-NEXT: ret i1 true @@ -1145,7 +1146,7 @@ join: } define i1 @test_liveness(i1 %c) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@test_liveness ; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { ; TUNIT-NEXT: entry: @@ -1157,7 +1158,7 @@ define i1 @test_liveness(i1 %c) { ; TUNIT-NEXT: [[RC1:%.*]] = call i1 @ret(i1 noundef [[P]]) #[[ATTR9]] ; TUNIT-NEXT: ret i1 [[RC1]] ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone willreturn +; CGSCC: Function Attrs: nofree nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test_liveness ; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR2]] { ; CGSCC-NEXT: entry: @@ -1180,7 +1181,7 @@ f: } define internal i1 @ret(i1 %c) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@ret ; CHECK-SAME: (i1 noundef [[C:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: entry: @@ -1228,7 +1229,7 @@ define internal i8 @memcpy_uses_store(i8 %arg) { ; TUNIT-NEXT: [[SRC:%.*]] = alloca i8, align 1 ; TUNIT-NEXT: [[DST:%.*]] = alloca i8, align 1 ; TUNIT-NEXT: store i8 [[ARG]], i8* [[SRC]], align 1 -; TUNIT-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[DST]], i8* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[SRC]], i32 noundef 1, i1 noundef false) #[[ATTR10]] +; TUNIT-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[DST]], i8* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[SRC]], i32 noundef 1, i1 noundef false) #[[ATTR11:[0-9]+]] ; TUNIT-NEXT: [[L:%.*]] = load i8, i8* [[DST]], align 1 ; TUNIT-NEXT: ret i8 [[L]] ; @@ -1238,7 +1239,7 @@ define internal i8 @memcpy_uses_store(i8 %arg) { ; CGSCC-NEXT: [[SRC:%.*]] = alloca i8, align 1 ; CGSCC-NEXT: [[DST:%.*]] = alloca i8, align 1 ; CGSCC-NEXT: store i8 [[ARG]], i8* [[SRC]], align 1 -; CGSCC-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[DST]], i8* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[SRC]], i32 noundef 1, i1 noundef false) #[[ATTR13]] +; CGSCC-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[DST]], i8* noalias nocapture nofree noundef nonnull readonly dereferenceable(1) [[SRC]], i32 noundef 1, i1 noundef false) #[[ATTR14:[0-9]+]] ; CGSCC-NEXT: [[L:%.*]] = load i8, i8* [[DST]], align 1 ; CGSCC-NEXT: ret i8 [[L]] ; @@ -1254,13 +1255,13 @@ define i8 @memcpy_uses_store_caller(i8 %arg) { ; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn ; TUNIT-LABEL: define {{[^@]+}}@memcpy_uses_store_caller ; TUNIT-SAME: (i8 [[ARG:%.*]]) #[[ATTR2]] { -; TUNIT-NEXT: [[R:%.*]] = call i8 @memcpy_uses_store(i8 [[ARG]]) #[[ATTR12:[0-9]+]] +; TUNIT-NEXT: [[R:%.*]] = call i8 @memcpy_uses_store(i8 [[ARG]]) #[[ATTR9]] ; TUNIT-NEXT: ret i8 [[R]] ; ; CGSCC: Function Attrs: nofree nosync nounwind willreturn ; CGSCC-LABEL: define {{[^@]+}}@memcpy_uses_store_caller ; CGSCC-SAME: (i8 [[ARG:%.*]]) #[[ATTR3]] { -; CGSCC-NEXT: [[R:%.*]] = call i8 @memcpy_uses_store(i8 [[ARG]]) #[[ATTR15:[0-9]+]] +; CGSCC-NEXT: [[R:%.*]] = call i8 @memcpy_uses_store(i8 [[ARG]]) #[[ATTR13]] ; CGSCC-NEXT: ret i8 [[R]] ; %r = call i8 @memcpy_uses_store(i8 %arg) @@ -1271,7 +1272,7 @@ define i8 @memcpy_uses_store_caller(i8 %arg) { declare i32 @speculatable() speculatable readnone define i32 @test_speculatable_expr() norecurse { -; TUNIT: Function Attrs: norecurse nosync readnone +; TUNIT: Function Attrs: norecurse nosync memory(none) ; TUNIT-LABEL: define {{[^@]+}}@test_speculatable_expr ; TUNIT-SAME: () #[[ATTR6:[0-9]+]] { ; TUNIT-NEXT: [[STACK:%.*]] = alloca i32, align 4 @@ -1279,10 +1280,10 @@ define i32 @test_speculatable_expr() norecurse { ; TUNIT-NEXT: [[PLUS1:%.*]] = add i32 [[SPEC_RESULT]], 1 ; TUNIT-NEXT: store i32 [[PLUS1]], i32* [[STACK]], align 4 ; TUNIT-NEXT: [[TMP1:%.*]] = load i32, i32* [[STACK]], align 4 -; TUNIT-NEXT: [[RSPEC:%.*]] = call i32 @ret_speculatable_expr(i32 [[TMP1]]) #[[ATTR13:[0-9]+]] +; TUNIT-NEXT: [[RSPEC:%.*]] = call i32 @ret_speculatable_expr(i32 [[TMP1]]) #[[ATTR12:[0-9]+]] ; TUNIT-NEXT: ret i32 [[RSPEC]] ; -; CGSCC: Function Attrs: norecurse nosync readnone +; CGSCC: Function Attrs: norecurse nosync memory(none) ; CGSCC-LABEL: define {{[^@]+}}@test_speculatable_expr ; CGSCC-SAME: () #[[ATTR9:[0-9]+]] { ; CGSCC-NEXT: [[STACK:%.*]] = alloca i32, align 4 @@ -1301,7 +1302,7 @@ define i32 @test_speculatable_expr() norecurse { } define internal i32 @ret_speculatable_expr(i32* %mem, i32 %a2) { -; TUNIT: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; TUNIT-LABEL: define {{[^@]+}}@ret_speculatable_expr ; TUNIT-SAME: (i32 [[TMP0:%.*]]) #[[ATTR7:[0-9]+]] { ; TUNIT-NEXT: [[MEM_PRIV:%.*]] = alloca i32, align 4 @@ -1311,7 +1312,7 @@ define internal i32 @ret_speculatable_expr(i32* %mem, i32 %a2) { ; TUNIT-NEXT: [[ADD:%.*]] = add i32 [[MUL]], 7 ; TUNIT-NEXT: ret i32 [[ADD]] ; -; CGSCC: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CGSCC-LABEL: define {{[^@]+}}@ret_speculatable_expr ; CGSCC-SAME: (i32 [[TMP0:%.*]]) #[[ATTR10:[0-9]+]] { ; CGSCC-NEXT: [[MEM_PRIV:%.*]] = alloca i32, align 4 @@ -1330,34 +1331,32 @@ define internal i32 @ret_speculatable_expr(i32* %mem, i32 %a2) { ;. ; TUNIT: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } ; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR3]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR5:[0-9]+]] = { readnone speculatable } -; TUNIT: attributes #[[ATTR6]] = { norecurse nosync readnone } -; TUNIT: attributes #[[ATTR7]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; TUNIT: attributes #[[ATTR8:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn } -; TUNIT: attributes #[[ATTR9]] = { nofree nosync nounwind readnone willreturn } +; TUNIT: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; TUNIT: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind willreturn memory(write) } +; TUNIT: attributes #[[ATTR5:[0-9]+]] = { speculatable memory(none) } +; TUNIT: attributes #[[ATTR6]] = { norecurse nosync memory(none) } +; TUNIT: attributes #[[ATTR7]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; TUNIT: attributes #[[ATTR8:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +; TUNIT: attributes #[[ATTR9]] = { nofree nosync nounwind willreturn } ; TUNIT: attributes #[[ATTR10]] = { willreturn } -; TUNIT: attributes #[[ATTR11]] = { nofree nosync nounwind willreturn writeonly } -; TUNIT: attributes #[[ATTR12]] = { nofree nosync nounwind willreturn } -; TUNIT: attributes #[[ATTR13]] = { nosync nounwind readonly } +; TUNIT: attributes #[[ATTR11]] = { willreturn memory(readwrite) } +; TUNIT: attributes #[[ATTR12]] = { nosync nounwind } ;. ; CGSCC: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind readnone willreturn } +; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn memory(none) } ; CGSCC: attributes #[[ATTR3]] = { nofree nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR4]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR5]] = { argmemonly nofree nosync nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR6]] = { nofree nosync nounwind willreturn writeonly } +; CGSCC: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind willreturn memory(argmem: write) } +; CGSCC: attributes #[[ATTR5]] = { nofree nosync nounwind willreturn memory(argmem: write) } +; CGSCC: attributes #[[ATTR6]] = { nofree nosync nounwind willreturn memory(write) } ; CGSCC: attributes #[[ATTR7]] = { nofree norecurse nosync nounwind willreturn } -; CGSCC: attributes #[[ATTR8:[0-9]+]] = { readnone speculatable } -; CGSCC: attributes #[[ATTR9]] = { norecurse nosync readnone } -; CGSCC: attributes #[[ATTR10]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } -; CGSCC: attributes #[[ATTR11:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn } -; CGSCC: attributes #[[ATTR12]] = { readnone willreturn } -; CGSCC: attributes #[[ATTR13]] = { willreturn } -; CGSCC: attributes #[[ATTR14]] = { nounwind willreturn writeonly } -; CGSCC: attributes #[[ATTR15]] = { nounwind willreturn } +; CGSCC: attributes #[[ATTR8:[0-9]+]] = { speculatable memory(none) } +; CGSCC: attributes #[[ATTR9]] = { norecurse nosync memory(none) } +; CGSCC: attributes #[[ATTR10]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; CGSCC: attributes #[[ATTR11:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR12]] = { willreturn } +; CGSCC: attributes #[[ATTR13]] = { nounwind willreturn } +; CGSCC: attributes #[[ATTR14]] = { willreturn memory(readwrite) } ;. diff --git a/llvm/test/Transforms/Attributor/willreturn.ll b/llvm/test/Transforms/Attributor/willreturn.ll index c448587423b19..76675f9c8ab68 100644 --- a/llvm/test/Transforms/Attributor/willreturn.ll +++ b/llvm/test/Transforms/Attributor/willreturn.ll @@ -10,7 +10,7 @@ target datalayout = "e-m:e-i54:64-f80:128-n8:16:32:64-S128" ; TEST 1 (positive case) define void @only_return() #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@only_return ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: ret void @@ -28,7 +28,7 @@ define void @only_return() #0 { ; FIXME: missing willreturn define i32 @fib(i32 %0) local_unnamed_addr #0 { -; TUNIT: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; TUNIT: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; TUNIT-LABEL: define {{[^@]+}}@fib ; TUNIT-SAME: (i32 [[TMP0:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { ; TUNIT-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP0]], 2 @@ -43,16 +43,16 @@ define i32 @fib(i32 %0) local_unnamed_addr #0 { ; TUNIT: 9: ; TUNIT-NEXT: ret i32 [[TMP0]] ; -; CGSCC: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; CGSCC: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; CGSCC-LABEL: define {{[^@]+}}@fib ; CGSCC-SAME: (i32 [[TMP0:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { ; CGSCC-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP0]], 2 ; CGSCC-NEXT: br i1 [[TMP2]], label [[TMP9:%.*]], label [[TMP3:%.*]] ; CGSCC: 3: ; CGSCC-NEXT: [[TMP4:%.*]] = add nsw i32 [[TMP0]], -1 -; CGSCC-NEXT: [[TMP5:%.*]] = tail call i32 @fib(i32 [[TMP4]]) #[[ATTR19:[0-9]+]] +; CGSCC-NEXT: [[TMP5:%.*]] = tail call i32 @fib(i32 [[TMP4]]) #[[ATTR27:[0-9]+]] ; CGSCC-NEXT: [[TMP6:%.*]] = add nsw i32 [[TMP0]], -2 -; CGSCC-NEXT: [[TMP7:%.*]] = tail call i32 @fib(i32 [[TMP6]]) #[[ATTR19]] +; CGSCC-NEXT: [[TMP7:%.*]] = tail call i32 @fib(i32 [[TMP6]]) #[[ATTR27]] ; CGSCC-NEXT: [[TMP8:%.*]] = add nsw i32 [[TMP7]], [[TMP5]] ; CGSCC-NEXT: ret i32 [[TMP8]] ; CGSCC: 9: @@ -84,7 +84,7 @@ define i32 @fib(i32 %0) local_unnamed_addr #0 { ; fact_maybe_not(-1) doesn't stop. define i32 @fact_maybe_not_halt(i32 %0) local_unnamed_addr #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@fact_maybe_not_halt ; CHECK-SAME: (i32 [[TMP0:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP0]], 0 @@ -132,7 +132,7 @@ define i32 @fact_maybe_not_halt(i32 %0) local_unnamed_addr #0 { ; } define i32 @fact_loop(i32 %0) local_unnamed_addr #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@fact_loop ; CHECK-SAME: (i32 [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP0]], 1 @@ -176,16 +176,27 @@ define i32 @fact_loop(i32 %0) local_unnamed_addr #0 { declare void @sink() nounwind willreturn nosync nofree define void @mutual_recursion1(i1 %c) #0 { -; CHECK: Function Attrs: nofree noinline nosync nounwind uwtable -; CHECK-LABEL: define {{[^@]+}}@mutual_recursion1 -; CHECK-SAME: (i1 [[C:%.*]]) #[[ATTR4:[0-9]+]] { -; CHECK-NEXT: br i1 [[C]], label [[REC:%.*]], label [[END:%.*]] -; CHECK: rec: -; CHECK-NEXT: call void @sink() #[[ATTR12:[0-9]+]] -; CHECK-NEXT: call void @mutual_recursion2(i1 noundef [[C]]) #[[ATTR27:[0-9]+]] -; CHECK-NEXT: br label [[END]] -; CHECK: end: -; CHECK-NEXT: ret void +; TUNIT: Function Attrs: nofree noinline nosync nounwind uwtable +; TUNIT-LABEL: define {{[^@]+}}@mutual_recursion1 +; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR4:[0-9]+]] { +; TUNIT-NEXT: br i1 [[C]], label [[REC:%.*]], label [[END:%.*]] +; TUNIT: rec: +; TUNIT-NEXT: call void @sink() #[[ATTR12:[0-9]+]] +; TUNIT-NEXT: call void @mutual_recursion2(i1 noundef [[C]]) #[[ATTR26]] +; TUNIT-NEXT: br label [[END]] +; TUNIT: end: +; TUNIT-NEXT: ret void +; +; CGSCC: Function Attrs: nofree noinline nosync nounwind uwtable +; CGSCC-LABEL: define {{[^@]+}}@mutual_recursion1 +; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR4:[0-9]+]] { +; CGSCC-NEXT: br i1 [[C]], label [[REC:%.*]], label [[END:%.*]] +; CGSCC: rec: +; CGSCC-NEXT: call void @sink() #[[ATTR12:[0-9]+]] +; CGSCC-NEXT: call void @mutual_recursion2(i1 noundef [[C]]) #[[ATTR27]] +; CGSCC-NEXT: br label [[END]] +; CGSCC: end: +; CGSCC-NEXT: ret void ; br i1 %c, label %rec, label %end rec: @@ -198,11 +209,17 @@ end: define void @mutual_recursion2(i1 %c) #0 { -; CHECK: Function Attrs: nofree noinline nosync nounwind uwtable -; CHECK-LABEL: define {{[^@]+}}@mutual_recursion2 -; CHECK-SAME: (i1 [[C:%.*]]) #[[ATTR4]] { -; CHECK-NEXT: call void @mutual_recursion1(i1 [[C]]) #[[ATTR27]] -; CHECK-NEXT: ret void +; TUNIT: Function Attrs: nofree noinline nosync nounwind uwtable +; TUNIT-LABEL: define {{[^@]+}}@mutual_recursion2 +; TUNIT-SAME: (i1 [[C:%.*]]) #[[ATTR4]] { +; TUNIT-NEXT: call void @mutual_recursion1(i1 [[C]]) #[[ATTR26]] +; TUNIT-NEXT: ret void +; +; CGSCC: Function Attrs: nofree noinline nosync nounwind uwtable +; CGSCC-LABEL: define {{[^@]+}}@mutual_recursion2 +; CGSCC-SAME: (i1 [[C:%.*]]) #[[ATTR4]] { +; CGSCC-NEXT: call void @mutual_recursion1(i1 [[C]]) #[[ATTR27]] +; CGSCC-NEXT: ret void ; call void @mutual_recursion1(i1 %c) ret void @@ -277,12 +294,12 @@ define void @conditional_exit(i32 %0, i32* nocapture readonly %1) local_unnamed_ ; TEST 6 (positive case) ; Call intrinsic function -; CHECK: Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +; CHECK: Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) ; CHECK-NEXT: declare float @llvm.floor.f32(float) declare float @llvm.floor.f32(float) define void @call_floor(float %a) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@call_floor ; CHECK-SAME: (float [[A:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: ret void @@ -292,11 +309,17 @@ define void @call_floor(float %a) #0 { } define float @call_floor2(float %a) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable -; CHECK-LABEL: define {{[^@]+}}@call_floor2 -; CHECK-SAME: (float [[A:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[C:%.*]] = tail call float @llvm.floor.f32(float [[A]]) #[[ATTR28:[0-9]+]] -; CHECK-NEXT: ret float [[C]] +; TUNIT: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable +; TUNIT-LABEL: define {{[^@]+}}@call_floor2 +; TUNIT-SAME: (float [[A:%.*]]) #[[ATTR0]] { +; TUNIT-NEXT: [[C:%.*]] = tail call float @llvm.floor.f32(float [[A]]) #[[ATTR27:[0-9]+]] +; TUNIT-NEXT: ret float [[C]] +; +; CGSCC: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable +; CGSCC-LABEL: define {{[^@]+}}@call_floor2 +; CGSCC-SAME: (float [[A:%.*]]) #[[ATTR0]] { +; CGSCC-NEXT: [[C:%.*]] = tail call float @llvm.floor.f32(float [[A]]) #[[ATTR28:[0-9]+]] +; CGSCC-NEXT: ret float [[C]] ; %c = tail call float @llvm.floor.f32(float %a) ret float %c @@ -312,11 +335,17 @@ define float @call_floor2(float %a) #0 { declare void @maybe_noreturn() #0 define void @call_maybe_noreturn() #0 { -; CHECK: Function Attrs: noinline nounwind uwtable -; CHECK-LABEL: define {{[^@]+}}@call_maybe_noreturn -; CHECK-SAME: () #[[ATTR7]] { -; CHECK-NEXT: tail call void @maybe_noreturn() #[[ATTR29:[0-9]+]] -; CHECK-NEXT: ret void +; TUNIT: Function Attrs: noinline nounwind uwtable +; TUNIT-LABEL: define {{[^@]+}}@call_maybe_noreturn +; TUNIT-SAME: () #[[ATTR7]] { +; TUNIT-NEXT: tail call void @maybe_noreturn() #[[ATTR28:[0-9]+]] +; TUNIT-NEXT: ret void +; +; CGSCC: Function Attrs: noinline nounwind uwtable +; CGSCC-LABEL: define {{[^@]+}}@call_maybe_noreturn +; CGSCC-SAME: () #[[ATTR7]] { +; CGSCC-NEXT: tail call void @maybe_noreturn() #[[ATTR29:[0-9]+]] +; CGSCC-NEXT: ret void ; tail call void @maybe_noreturn() ret void @@ -331,11 +360,17 @@ define void @call_maybe_noreturn() #0 { declare void @will_return() willreturn norecurse define void @f1() #0 { -; CHECK: Function Attrs: noinline nounwind willreturn uwtable -; CHECK-LABEL: define {{[^@]+}}@f1 -; CHECK-SAME: () #[[ATTR10:[0-9]+]] { -; CHECK-NEXT: tail call void @will_return() #[[ATTR30:[0-9]+]] -; CHECK-NEXT: ret void +; TUNIT: Function Attrs: noinline nounwind willreturn uwtable +; TUNIT-LABEL: define {{[^@]+}}@f1 +; TUNIT-SAME: () #[[ATTR10:[0-9]+]] { +; TUNIT-NEXT: tail call void @will_return() #[[ATTR27]] +; TUNIT-NEXT: ret void +; +; CGSCC: Function Attrs: noinline nounwind willreturn uwtable +; CGSCC-LABEL: define {{[^@]+}}@f1 +; CGSCC-SAME: () #[[ATTR10:[0-9]+]] { +; CGSCC-NEXT: tail call void @will_return() #[[ATTR28]] +; CGSCC-NEXT: ret void ; tail call void @will_return() ret void @@ -344,8 +379,8 @@ define void @f1() #0 { define void @f2() #0 { ; CHECK: Function Attrs: noinline nounwind willreturn uwtable ; CHECK-LABEL: define {{[^@]+}}@f2 -; CHECK-SAME: () #[[ATTR10]] { -; CHECK-NEXT: tail call void @f1() #[[ATTR12]] +; CHECK-SAME: () #[[ATTR10:[0-9]+]] { +; CHECK-NEXT: tail call void @f1() #[[ATTR12:[0-9]+]] ; CHECK-NEXT: ret void ; tail call void @f1() @@ -384,17 +419,29 @@ label2: declare i1 @maybe_raise_exception() #1 willreturn define void @invoke_test() personality i32 (...)* @__gxx_personality_v0 { -; CHECK: Function Attrs: nounwind willreturn -; CHECK-LABEL: define {{[^@]+}}@invoke_test -; CHECK-SAME: () #[[ATTR12]] personality i32 (...)* @__gxx_personality_v0 { -; CHECK-NEXT: [[TMP1:%.*]] = invoke i1 @maybe_raise_exception() #[[ATTR30]] -; CHECK-NEXT: to label [[N:%.*]] unwind label [[F:%.*]] -; CHECK: N: -; CHECK-NEXT: ret void -; CHECK: F: -; CHECK-NEXT: [[VAL:%.*]] = landingpad { i8*, i32 } -; CHECK-NEXT: catch i8* null -; CHECK-NEXT: ret void +; TUNIT: Function Attrs: nounwind willreturn +; TUNIT-LABEL: define {{[^@]+}}@invoke_test +; TUNIT-SAME: () #[[ATTR12]] personality i32 (...)* @__gxx_personality_v0 { +; TUNIT-NEXT: [[TMP1:%.*]] = invoke i1 @maybe_raise_exception() #[[ATTR27]] +; TUNIT-NEXT: to label [[N:%.*]] unwind label [[F:%.*]] +; TUNIT: N: +; TUNIT-NEXT: ret void +; TUNIT: F: +; TUNIT-NEXT: [[VAL:%.*]] = landingpad { i8*, i32 } +; TUNIT-NEXT: catch i8* null +; TUNIT-NEXT: ret void +; +; CGSCC: Function Attrs: nounwind willreturn +; CGSCC-LABEL: define {{[^@]+}}@invoke_test +; CGSCC-SAME: () #[[ATTR12]] personality i32 (...)* @__gxx_personality_v0 { +; CGSCC-NEXT: [[TMP1:%.*]] = invoke i1 @maybe_raise_exception() #[[ATTR28]] +; CGSCC-NEXT: to label [[N:%.*]] unwind label [[F:%.*]] +; CGSCC: N: +; CGSCC-NEXT: ret void +; CGSCC: F: +; CGSCC-NEXT: [[VAL:%.*]] = landingpad { i8*, i32 } +; CGSCC-NEXT: catch i8* null +; CGSCC-NEXT: ret void ; invoke i1 @maybe_raise_exception() to label %N unwind label %F @@ -420,7 +467,7 @@ declare i32 @__gxx_personality_v0(...) ; } define i32 @loop_constant_trip_count(i32* nocapture readonly %0) #0 { -; CHECK: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind readonly willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: read) uwtable ; CHECK-LABEL: define {{[^@]+}}@loop_constant_trip_count ; CHECK-SAME: (i32* nocapture nofree nonnull readonly dereferenceable(4) [[TMP0:%.*]]) #[[ATTR13:[0-9]+]] { ; CHECK-NEXT: br label [[TMP3:%.*]] @@ -464,7 +511,7 @@ define i32 @loop_constant_trip_count(i32* nocapture readonly %0) #0 { ; return ans; ; } define i32 @loop_trip_count_unbound(i32 %0, i32 %1, i32* nocapture readonly %2, i32 %3) local_unnamed_addr #0 { -; CHECK: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind readonly uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind memory(argmem: read) uwtable ; CHECK-LABEL: define {{[^@]+}}@loop_trip_count_unbound ; CHECK-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32* nocapture nofree readonly [[TMP2:%.*]], i32 [[TMP3:%.*]]) local_unnamed_addr #[[ATTR14:[0-9]+]] { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]] @@ -515,7 +562,7 @@ define i32 @loop_trip_count_unbound(i32 %0, i32 %1, i32* nocapture readonly %2, define i32 @loop_trip_dec(i32 %0, i32* nocapture readonly %1) local_unnamed_addr #0 { -; CHECK: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind readonly willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(argmem: read) uwtable ; CHECK-LABEL: define {{[^@]+}}@loop_trip_dec ; CHECK-SAME: (i32 [[TMP0:%.*]], i32* nocapture nofree readonly [[TMP1:%.*]]) local_unnamed_addr #[[ATTR13]] { ; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP0]], -1 @@ -562,7 +609,7 @@ define i32 @loop_trip_dec(i32 %0, i32* nocapture readonly %1) local_unnamed_addr ; multiple return define i32 @multiple_return(i32 %a) #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@multiple_return ; CHECK-SAME: (i32 [[A:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[A]], 0 @@ -586,13 +633,21 @@ f: ; 15.1 (positive case) define void @unreachable_exit_positive1() #0 { -; CHECK: Function Attrs: noinline nounwind willreturn uwtable -; CHECK-LABEL: define {{[^@]+}}@unreachable_exit_positive1 -; CHECK-SAME: () #[[ATTR10]] { -; CHECK-NEXT: tail call void @will_return() #[[ATTR30]] -; CHECK-NEXT: ret void -; CHECK: unreachable_label: -; CHECK-NEXT: unreachable +; TUNIT: Function Attrs: noinline nounwind willreturn uwtable +; TUNIT-LABEL: define {{[^@]+}}@unreachable_exit_positive1 +; TUNIT-SAME: () #[[ATTR10]] { +; TUNIT-NEXT: tail call void @will_return() #[[ATTR27]] +; TUNIT-NEXT: ret void +; TUNIT: unreachable_label: +; TUNIT-NEXT: unreachable +; +; CGSCC: Function Attrs: noinline nounwind willreturn uwtable +; CGSCC-LABEL: define {{[^@]+}}@unreachable_exit_positive1 +; CGSCC-SAME: () #[[ATTR10]] { +; CGSCC-NEXT: tail call void @will_return() #[[ATTR28]] +; CGSCC-NEXT: ret void +; CGSCC: unreachable_label: +; CGSCC-NEXT: unreachable ; tail call void @will_return() ret void @@ -603,7 +658,7 @@ unreachable_label: } define i32 @unreachable_exit_positive2(i32) local_unnamed_addr #0 { -; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@unreachable_exit_positive2 ; CHECK-SAME: (i32 [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP0]], 1 @@ -662,7 +717,7 @@ unreachable_label: } define void @unreachable_exit_negative2() #0 { -; CHECK: Function Attrs: nofree noinline norecurse noreturn nosync nounwind readnone uwtable +; CHECK: Function Attrs: nofree noinline norecurse noreturn nosync nounwind memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@unreachable_exit_negative2 ; CHECK-SAME: () #[[ATTR15:[0-9]+]] { ; CHECK-NEXT: br label [[L1:%.*]] @@ -711,7 +766,7 @@ define void @call_longjmp(i8* nocapture readnone %0) local_unnamed_addr #0 { ; } define i32 @infinite_loop_inside_bounded_loop(i32 %n) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone +; CHECK: Function Attrs: nofree norecurse nosync nounwind memory(none) ; CHECK-LABEL: define {{[^@]+}}@infinite_loop_inside_bounded_loop ; CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR17:[0-9]+]] { ; CHECK-NEXT: entry: @@ -772,7 +827,7 @@ for.end: ; preds = %for.cond.cleanup ; } define i32 @bounded_nested_loops(i32 %n) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@bounded_nested_loops ; CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR18:[0-9]+]] { ; CHECK-NEXT: entry: @@ -849,7 +904,7 @@ for.end: ; preds = %for.cond.cleanup ; } define i32 @bounded_loop_inside_unbounded_loop(i32 %n) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone +; CHECK: Function Attrs: nofree norecurse nosync nounwind memory(none) ; CHECK-LABEL: define {{[^@]+}}@bounded_loop_inside_unbounded_loop ; CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR17]] { ; CHECK-NEXT: entry: @@ -933,7 +988,7 @@ while.end: ; preds = %while.cond ; } define i32 @nested_unbounded_loops(i32 %n) { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone +; CHECK: Function Attrs: nofree norecurse nosync nounwind memory(none) ; CHECK-LABEL: define {{[^@]+}}@nested_unbounded_loops ; CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR17]] { ; CHECK-NEXT: entry: @@ -1023,7 +1078,7 @@ while.end11: ; preds = %while.cond ; } define void @non_loop_cycle(i32 %n) { -; TUNIT: Function Attrs: nofree norecurse nosync nounwind readnone +; TUNIT: Function Attrs: nofree norecurse nosync nounwind memory(none) ; TUNIT-LABEL: define {{[^@]+}}@non_loop_cycle ; TUNIT-SAME: (i32 [[N:%.*]]) #[[ATTR17]] { ; TUNIT-NEXT: entry: @@ -1053,9 +1108,9 @@ define void @non_loop_cycle(i32 %n) { ; TUNIT: exit: ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: nofree nosync nounwind readnone +; CGSCC: Function Attrs: nofree nosync nounwind memory(none) ; CGSCC-LABEL: define {{[^@]+}}@non_loop_cycle -; CGSCC-SAME: (i32 [[N:%.*]]) #[[ATTR19]] { +; CGSCC-SAME: (i32 [[N:%.*]]) #[[ATTR19:[0-9]+]] { ; CGSCC-NEXT: entry: ; CGSCC-NEXT: [[CALL:%.*]] = call i32 @fact_loop(i32 [[N]]) ; CGSCC-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 5 @@ -1143,29 +1198,29 @@ define void @willreturn_mustprogress_caller_1() mustprogress { ret void } define void @willreturn_mustprogress_caller_2() mustprogress { -; TUNIT: Function Attrs: mustprogress readonly willreturn +; TUNIT: Function Attrs: mustprogress willreturn memory(read) ; TUNIT-LABEL: define {{[^@]+}}@willreturn_mustprogress_caller_2 ; TUNIT-SAME: () #[[ATTR23:[0-9]+]] { -; TUNIT-NEXT: call void @readonly() #[[ATTR19:[0-9]+]] +; TUNIT-NEXT: call void @readonly() ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: mustprogress readonly willreturn +; CGSCC: Function Attrs: mustprogress willreturn memory(read) ; CGSCC-LABEL: define {{[^@]+}}@willreturn_mustprogress_caller_2 ; CGSCC-SAME: () #[[ATTR24:[0-9]+]] { -; CGSCC-NEXT: call void @readonly() #[[ATTR20:[0-9]+]] +; CGSCC-NEXT: call void @readonly() ; CGSCC-NEXT: ret void ; call void @readonly() ret void } define void @willreturn_mustprogress_caller_3() mustprogress { -; TUNIT: Function Attrs: mustprogress nosync readnone willreturn +; TUNIT: Function Attrs: mustprogress nosync willreturn memory(none) ; TUNIT-LABEL: define {{[^@]+}}@willreturn_mustprogress_caller_3 ; TUNIT-SAME: () #[[ATTR24:[0-9]+]] { ; TUNIT-NEXT: call void @readnone() ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: mustprogress nosync readnone willreturn +; CGSCC: Function Attrs: mustprogress nosync willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@willreturn_mustprogress_caller_3 ; CGSCC-SAME: () #[[ATTR25:[0-9]+]] { ; CGSCC-NEXT: call void @readnone() @@ -1183,16 +1238,16 @@ define void @willreturn_mustprogress_callee_1() { ret void } define void @willreturn_mustprogress_callee_2() { -; TUNIT: Function Attrs: readonly willreturn +; TUNIT: Function Attrs: willreturn memory(read) ; TUNIT-LABEL: define {{[^@]+}}@willreturn_mustprogress_callee_2 ; TUNIT-SAME: () #[[ATTR25:[0-9]+]] { -; TUNIT-NEXT: call void @readonly_mustprogress() #[[ATTR25]] +; TUNIT-NEXT: call void @readonly_mustprogress() #[[ATTR27]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: readonly willreturn +; CGSCC: Function Attrs: willreturn memory(read) ; CGSCC-LABEL: define {{[^@]+}}@willreturn_mustprogress_callee_2 ; CGSCC-SAME: () #[[ATTR26:[0-9]+]] { -; CGSCC-NEXT: call void @readonly_mustprogress() #[[ATTR26]] +; CGSCC-NEXT: call void @readonly_mustprogress() #[[ATTR28]] ; CGSCC-NEXT: ret void ; call void @readonly_mustprogress() @@ -1207,16 +1262,16 @@ define void @willreturn_mustprogress_callee_3() { ret void } define void @willreturn_mustprogress_callee_4() { -; TUNIT: Function Attrs: readonly willreturn +; TUNIT: Function Attrs: willreturn memory(read) ; TUNIT-LABEL: define {{[^@]+}}@willreturn_mustprogress_callee_4 ; TUNIT-SAME: () #[[ATTR25]] { -; TUNIT-NEXT: call void @willreturn_mustprogress_callee_2() #[[ATTR25]] +; TUNIT-NEXT: call void @willreturn_mustprogress_callee_2() #[[ATTR27]] ; TUNIT-NEXT: ret void ; -; CGSCC: Function Attrs: readonly willreturn +; CGSCC: Function Attrs: willreturn memory(read) ; CGSCC-LABEL: define {{[^@]+}}@willreturn_mustprogress_callee_4 ; CGSCC-SAME: () #[[ATTR26]] { -; CGSCC-NEXT: call void @willreturn_mustprogress_callee_2() #[[ATTR26]] +; CGSCC-NEXT: call void @willreturn_mustprogress_callee_2() #[[ATTR28]] ; CGSCC-NEXT: ret void ; call void @willreturn_mustprogress_callee_2() @@ -1226,67 +1281,64 @@ define void @willreturn_mustprogress_callee_4() { attributes #0 = { nounwind uwtable noinline } attributes #1 = { uwtable noinline } ;. -; TUNIT: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind readnone willreturn uwtable } -; TUNIT: attributes #[[ATTR1]] = { nofree noinline nosync nounwind readnone uwtable } -; TUNIT: attributes #[[ATTR2]] = { nofree noinline norecurse nosync nounwind readnone uwtable } +; TUNIT: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable } +; TUNIT: attributes #[[ATTR1]] = { nofree noinline nosync nounwind memory(none) uwtable } +; TUNIT: attributes #[[ATTR2]] = { nofree noinline norecurse nosync nounwind memory(none) uwtable } ; TUNIT: attributes #[[ATTR3:[0-9]+]] = { nofree nosync nounwind willreturn } ; TUNIT: attributes #[[ATTR4]] = { nofree noinline nosync nounwind uwtable } ; TUNIT: attributes #[[ATTR5]] = { noreturn } ; TUNIT: attributes #[[ATTR6]] = { noinline noreturn nounwind uwtable } ; TUNIT: attributes #[[ATTR7]] = { noinline nounwind uwtable } -; TUNIT: attributes #[[ATTR8:[0-9]+]] = { nocallback nofree nosync nounwind readnone speculatable willreturn } +; TUNIT: attributes #[[ATTR8:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; TUNIT: attributes #[[ATTR9:[0-9]+]] = { norecurse willreturn } ; TUNIT: attributes #[[ATTR10]] = { noinline nounwind willreturn uwtable } ; TUNIT: attributes #[[ATTR11:[0-9]+]] = { noinline willreturn uwtable } ; TUNIT: attributes #[[ATTR12]] = { nounwind willreturn } -; TUNIT: attributes #[[ATTR13]] = { argmemonly nofree noinline norecurse nosync nounwind readonly willreturn uwtable } -; TUNIT: attributes #[[ATTR14]] = { argmemonly nofree noinline norecurse nosync nounwind readonly uwtable } -; TUNIT: attributes #[[ATTR15]] = { nofree noinline norecurse noreturn nosync nounwind readnone uwtable } +; TUNIT: attributes #[[ATTR13]] = { nofree noinline norecurse nosync nounwind willreturn memory(argmem: read) uwtable } +; TUNIT: attributes #[[ATTR14]] = { nofree noinline norecurse nosync nounwind memory(argmem: read) uwtable } +; TUNIT: attributes #[[ATTR15]] = { nofree noinline norecurse noreturn nosync nounwind memory(none) uwtable } ; TUNIT: attributes #[[ATTR16:[0-9]+]] = { noreturn nounwind } -; TUNIT: attributes #[[ATTR17]] = { nofree norecurse nosync nounwind readnone } -; TUNIT: attributes #[[ATTR18]] = { nofree norecurse nosync nounwind readnone willreturn } -; TUNIT: attributes #[[ATTR19]] = { readonly } -; TUNIT: attributes #[[ATTR20:[0-9]+]] = { readnone } +; TUNIT: attributes #[[ATTR17]] = { nofree norecurse nosync nounwind memory(none) } +; TUNIT: attributes #[[ATTR18]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR19:[0-9]+]] = { memory(read) } +; TUNIT: attributes #[[ATTR20:[0-9]+]] = { memory(none) } ; TUNIT: attributes #[[ATTR21]] = { mustprogress } -; TUNIT: attributes #[[ATTR22:[0-9]+]] = { mustprogress readonly } -; TUNIT: attributes #[[ATTR23]] = { mustprogress readonly willreturn } -; TUNIT: attributes #[[ATTR24]] = { mustprogress nosync readnone willreturn } -; TUNIT: attributes #[[ATTR25]] = { readonly willreturn } -; TUNIT: attributes #[[ATTR26]] = { nofree nosync nounwind readnone } -; TUNIT: attributes #[[ATTR27]] = { nofree nosync nounwind } -; TUNIT: attributes #[[ATTR28]] = { readnone willreturn } -; TUNIT: attributes #[[ATTR29]] = { nounwind } -; TUNIT: attributes #[[ATTR30]] = { willreturn } +; TUNIT: attributes #[[ATTR22:[0-9]+]] = { mustprogress memory(read) } +; TUNIT: attributes #[[ATTR23]] = { mustprogress willreturn memory(read) } +; TUNIT: attributes #[[ATTR24]] = { mustprogress nosync willreturn memory(none) } +; TUNIT: attributes #[[ATTR25]] = { willreturn memory(read) } +; TUNIT: attributes #[[ATTR26]] = { nofree nosync nounwind } +; TUNIT: attributes #[[ATTR27]] = { willreturn } +; TUNIT: attributes #[[ATTR28]] = { nounwind } ;. -; CGSCC: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind readnone willreturn uwtable } -; CGSCC: attributes #[[ATTR1]] = { nofree noinline nosync nounwind readnone uwtable } -; CGSCC: attributes #[[ATTR2]] = { nofree noinline norecurse nosync nounwind readnone uwtable } +; CGSCC: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable } +; CGSCC: attributes #[[ATTR1]] = { nofree noinline nosync nounwind memory(none) uwtable } +; CGSCC: attributes #[[ATTR2]] = { nofree noinline norecurse nosync nounwind memory(none) uwtable } ; CGSCC: attributes #[[ATTR3:[0-9]+]] = { nofree nosync nounwind willreturn } ; CGSCC: attributes #[[ATTR4]] = { nofree noinline nosync nounwind uwtable } ; CGSCC: attributes #[[ATTR5]] = { noreturn } ; CGSCC: attributes #[[ATTR6]] = { noinline noreturn nounwind uwtable } ; CGSCC: attributes #[[ATTR7]] = { noinline nounwind uwtable } -; CGSCC: attributes #[[ATTR8:[0-9]+]] = { nocallback nofree nosync nounwind readnone speculatable willreturn } +; CGSCC: attributes #[[ATTR8:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; CGSCC: attributes #[[ATTR9:[0-9]+]] = { norecurse willreturn } ; CGSCC: attributes #[[ATTR10]] = { noinline nounwind willreturn uwtable } ; CGSCC: attributes #[[ATTR11:[0-9]+]] = { noinline willreturn uwtable } ; CGSCC: attributes #[[ATTR12]] = { nounwind willreturn } -; CGSCC: attributes #[[ATTR13]] = { argmemonly nofree noinline norecurse nosync nounwind readonly willreturn uwtable } -; CGSCC: attributes #[[ATTR14]] = { argmemonly nofree noinline norecurse nosync nounwind readonly uwtable } -; CGSCC: attributes #[[ATTR15]] = { nofree noinline norecurse noreturn nosync nounwind readnone uwtable } +; CGSCC: attributes #[[ATTR13]] = { nofree noinline norecurse nosync nounwind willreturn memory(argmem: read) uwtable } +; CGSCC: attributes #[[ATTR14]] = { nofree noinline norecurse nosync nounwind memory(argmem: read) uwtable } +; CGSCC: attributes #[[ATTR15]] = { nofree noinline norecurse noreturn nosync nounwind memory(none) uwtable } ; CGSCC: attributes #[[ATTR16:[0-9]+]] = { noreturn nounwind } -; CGSCC: attributes #[[ATTR17]] = { nofree norecurse nosync nounwind readnone } -; CGSCC: attributes #[[ATTR18]] = { nofree norecurse nosync nounwind readnone willreturn } -; CGSCC: attributes #[[ATTR19]] = { nofree nosync nounwind readnone } -; CGSCC: attributes #[[ATTR20]] = { readonly } -; CGSCC: attributes #[[ATTR21:[0-9]+]] = { readnone } +; CGSCC: attributes #[[ATTR17]] = { nofree norecurse nosync nounwind memory(none) } +; CGSCC: attributes #[[ATTR18]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR19]] = { nofree nosync nounwind memory(none) } +; CGSCC: attributes #[[ATTR20:[0-9]+]] = { memory(read) } +; CGSCC: attributes #[[ATTR21:[0-9]+]] = { memory(none) } ; CGSCC: attributes #[[ATTR22]] = { mustprogress } -; CGSCC: attributes #[[ATTR23:[0-9]+]] = { mustprogress readonly } -; CGSCC: attributes #[[ATTR24]] = { mustprogress readonly willreturn } -; CGSCC: attributes #[[ATTR25]] = { mustprogress nosync readnone willreturn } -; CGSCC: attributes #[[ATTR26]] = { readonly willreturn } +; CGSCC: attributes #[[ATTR23:[0-9]+]] = { mustprogress memory(read) } +; CGSCC: attributes #[[ATTR24]] = { mustprogress willreturn memory(read) } +; CGSCC: attributes #[[ATTR25]] = { mustprogress nosync willreturn memory(none) } +; CGSCC: attributes #[[ATTR26]] = { willreturn memory(read) } ; CGSCC: attributes #[[ATTR27]] = { nofree nosync nounwind } -; CGSCC: attributes #[[ATTR28]] = { readnone willreturn } +; CGSCC: attributes #[[ATTR28]] = { willreturn } ; CGSCC: attributes #[[ATTR29]] = { nounwind } -; CGSCC: attributes #[[ATTR30]] = { willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/wrapper.ll b/llvm/test/Transforms/Attributor/wrapper.ll index 85bf78a69d2ea..34af977467961 100644 --- a/llvm/test/Transforms/Attributor/wrapper.ll +++ b/llvm/test/Transforms/Attributor/wrapper.ll @@ -8,7 +8,7 @@ ; CHECK: ret ; ; Check the original function, which is wrapped and becomes anonymous -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK: define internal noundef i32 @0() ; CHECK: ret i32 1 define linkonce i32 @inner1() { @@ -35,7 +35,7 @@ entry: ; CHECK: tail call i32 @1(i32 %a, i32 %b) ; CHECK: ret ; -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) ; CHECK: define internal i32 @1(i32 %a, i32 %b) ; CHECK: %c = add i32 %a, %b ; CHECK: ret i32 %c diff --git a/llvm/test/Transforms/ConstraintElimination/geps-ptrvector.ll b/llvm/test/Transforms/ConstraintElimination/geps-ptrvector.ll index e30830fff7c76..93d940f2de3ba 100644 --- a/llvm/test/Transforms/ConstraintElimination/geps-ptrvector.ll +++ b/llvm/test/Transforms/ConstraintElimination/geps-ptrvector.ll @@ -12,3 +12,48 @@ define <2 x i1> @test.vectorgep(<2 x ptr> %vec) { %cond = icmp ule <2 x ptr> %gep, zeroinitializer ret <2 x i1> %cond } + +define <2 x i1> @test.vectorgep.ult.true(<2 x ptr> %vec) { +; CHECK-LABEL: @test.vectorgep.ult.true( +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, <2 x ptr> [[VEC:%.*]], i64 1 +; CHECK-NEXT: [[T_1:%.*]] = icmp ult <2 x ptr> [[VEC]], [[GEP_1]] +; CHECK-NEXT: ret <2 x i1> +; + %gep.1 = getelementptr inbounds i32, <2 x ptr> %vec, i64 1 + %t.1 = icmp ult <2 x ptr> %vec, %gep.1 + ret <2 x i1> %t.1 +} + +define <2 x i1> @test.vectorgep.ult.false(<2 x ptr> %vec) { +; CHECK-LABEL: @test.vectorgep.ult.false( +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, <2 x ptr> [[VEC:%.*]], i64 1 +; CHECK-NEXT: [[T_1:%.*]] = icmp ult <2 x ptr> [[GEP_1]], [[VEC]] +; CHECK-NEXT: ret <2 x i1> zeroinitializer +; + %gep.1 = getelementptr inbounds i32, <2 x ptr> %vec, i64 1 + %t.1 = icmp ult <2 x ptr> %gep.1, %vec + ret <2 x i1> %t.1 +} + + +define @test.scalable.vectorgep.ult.true( %vec) { +; CHECK-LABEL: @test.scalable.vectorgep.ult.true( +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, [[VEC:%.*]], i64 1 +; CHECK-NEXT: [[T_1:%.*]] = icmp ult [[VEC]], [[GEP_1]] +; CHECK-NEXT: ret shufflevector ( insertelement ( poison, i1 true, i32 0), poison, zeroinitializer) +; + %gep.1 = getelementptr inbounds i32, %vec, i64 1 + %t.1 = icmp ult %vec, %gep.1 + ret %t.1 +} + +define @test.scalable.vectorgep.ult.false( %vec) { +; CHECK-LABEL: @test.scalable.vectorgep.ult.false( +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, [[VEC:%.*]], i64 1 +; CHECK-NEXT: [[T_1:%.*]] = icmp ult [[GEP_1]], [[VEC]] +; CHECK-NEXT: ret zeroinitializer +; + %gep.1 = getelementptr inbounds i32, %vec, i64 1 + %t.1 = icmp ult %gep.1, %vec + ret %t.1 +} diff --git a/llvm/test/Transforms/Coroutines/coro-debug.ll b/llvm/test/Transforms/Coroutines/coro-debug.ll index 396cf5472d64e..abb9edc33de8b 100644 --- a/llvm/test/Transforms/Coroutines/coro-debug.ll +++ b/llvm/test/Transforms/Coroutines/coro-debug.ll @@ -189,7 +189,7 @@ attributes #7 = { noduplicate } ; CHECK: %[[ALLOCATED_STORAGE:.+]] = invoke i8* @allocate() ; CHECK-NEXT: to label %[[NORMAL_DEST:.+]] unwind ; CHECK: [[NORMAL_DEST]] -; CHEKC-NEXT: call void @llvm.dbg.declare(metadata i8* %[[ALLOCATED_STORAGE]] +; CHECK-NEXT: call void @llvm.dbg.declare(metadata i8* %[[ALLOCATED_STORAGE]] ; CHECK: %[[CALLBR_RES:.+]] = callbr i32 asm ; CHECK-NEXT: to label %[[DEFAULT_DEST:.+]] [label ; CHECK: [[DEFAULT_DEST]]: diff --git a/llvm/test/Transforms/Coroutines/coro-readnone-02.ll b/llvm/test/Transforms/Coroutines/coro-readnone-02.ll index eede209fbdd0f..c96377fd1c6d8 100644 --- a/llvm/test/Transforms/Coroutines/coro-readnone-02.ll +++ b/llvm/test/Transforms/Coroutines/coro-readnone-02.ll @@ -50,7 +50,7 @@ suspend: ; CHECK_SPLITTED-NEXT: call void @nop() ; CHECK_SPLITTED-NEXT: call void @print_same() ; -; CHECK_SPLITTED: attributes #[[ATTR_NUM]] = { readnone } +; CHECK_SPLITTED: attributes #[[ATTR_NUM]] = { memory(none) } ; ; CHECK_UNSPLITTED-LABEL: @f( ; CHECK_UNSPLITTED: br i1 %cmp, label %same, label %diff diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll b/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll index 4dc9eb34f86ce..506aac79358f6 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll @@ -16,10 +16,10 @@ define void @test1(i64 %tmp35) { ; CHECK-NEXT: [[TMP36:%.*]] = icmp sgt i64 [[TMP35:%.*]], 0 ; CHECK-NEXT: br i1 [[TMP36]], label [[BB_TRUE:%.*]], label [[BB_FALSE:%.*]] ; CHECK: bb_true: -; CHECK-NEXT: tail call void @check1(i1 false) #[[ATTR1:[0-9]+]] +; CHECK-NEXT: tail call void @check1(i1 false) #[[ATTR2:[0-9]+]] ; CHECK-NEXT: unreachable ; CHECK: bb_false: -; CHECK-NEXT: tail call void @check2(i1 true) #[[ATTR1]] +; CHECK-NEXT: tail call void @check2(i1 true) #[[ATTR2]] ; CHECK-NEXT: unreachable ; bb: @@ -55,7 +55,7 @@ define void @test2(i64 %tmp35, i1 %inner_cmp) { ; CHECK-NEXT: tail call void @check1(i1 false) ; CHECK-NEXT: unreachable ; CHECK: bb_false: -; CHECK-NEXT: tail call void @check2(i1 true) #[[ATTR1]] +; CHECK-NEXT: tail call void @check2(i1 true) #[[ATTR2]] ; CHECK-NEXT: unreachable ; bb: @@ -1172,4 +1172,77 @@ if.false: ret void } +define void @non_const_range(i32 %a, i32 %b) { +; CHECK-LABEL: @non_const_range( +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[A:%.*]], 11 +; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[B:%.*]], 21 +; CHECK-NEXT: [[AND:%.*]] = select i1 [[CMP1]], i1 [[CMP2]], i1 false +; CHECK-NEXT: br i1 [[AND]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK: if: +; CHECK-NEXT: [[A_100:%.*]] = add nuw nsw i32 [[A]], 100 +; CHECK-NEXT: call void @check1(i1 true) +; CHECK-NEXT: call void @check1(i1 false) +; CHECK-NEXT: [[A_10:%.*]] = add nuw nsw i32 [[A]], 10 +; CHECK-NEXT: [[CMP5:%.*]] = icmp ne i32 [[A_10]], [[B]] +; CHECK-NEXT: call void @check1(i1 [[CMP5]]) +; CHECK-NEXT: [[CMP6:%.*]] = icmp eq i32 [[A_10]], [[B]] +; CHECK-NEXT: call void @check1(i1 [[CMP6]]) +; CHECK-NEXT: ret void +; CHECK: else: +; CHECK-NEXT: ret void +; + %cmp1 = icmp ult i32 %a, 11 + %cmp2 = icmp ult i32 %b, 21 + %and = select i1 %cmp1, i1 %cmp2, i1 false + br i1 %and, label %if, label %else + +if: + %a.100 = add nuw nsw i32 %a, 100 + %cmp3 = icmp ne i32 %a.100, %b + call void @check1(i1 %cmp3) + %cmp4 = icmp eq i32 %a.100, %b + call void @check1(i1 %cmp4) + + %a.10 = add nuw nsw i32 %a, 10 + %cmp5 = icmp ne i32 %a.10, %b + call void @check1(i1 %cmp5) + %cmp6 = icmp eq i32 %a.10, %b + call void @check1(i1 %cmp6) + ret void + +else: + ret void +} + +define i1 @non_const_range_minmax(i8 %a, i8 %b) { +; CHECK-LABEL: @non_const_range_minmax( +; CHECK-NEXT: [[A2:%.*]] = call i8 @llvm.umin.i8(i8 [[A:%.*]], i8 10) +; CHECK-NEXT: [[B2:%.*]] = call i8 @llvm.umax.i8(i8 [[B:%.*]], i8 11) +; CHECK-NEXT: ret i1 true +; + %a2 = call i8 @llvm.umin.i8(i8 %a, i8 10) + %b2 = call i8 @llvm.umax.i8(i8 %b, i8 11) + %cmp1 = icmp ult i8 %a2, %b2 + ret i1 %cmp1 +} + +; FIXME: Also support vectors. +define <2 x i1> @non_const_range_minmax_vec(<2 x i8> %a, <2 x i8> %b) { +; CHECK-LABEL: @non_const_range_minmax_vec( +; CHECK-NEXT: [[A2:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[A:%.*]], <2 x i8> ) +; CHECK-NEXT: [[B2:%.*]] = call <2 x i8> @llvm.umax.v2i8(<2 x i8> [[B:%.*]], <2 x i8> ) +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i8> [[A2]], [[B2]] +; CHECK-NEXT: ret <2 x i1> [[CMP1]] +; + %a2 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> %a, <2 x i8> ) + %b2 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> %b, <2 x i8> ) + %cmp1 = icmp ult <2 x i8> %a2, %b2 + ret <2 x i1> %cmp1 +} + +declare i8 @llvm.umin.i8(i8, i8) +declare i8 @llvm.umax.i8(i8, i8) +declare <2 x i8> @llvm.umin.v2i8(<2 x i8>, <2 x i8>) +declare <2 x i8> @llvm.umax.v2i8(<2 x i8>, <2 x i8>) + attributes #4 = { noreturn } diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/mul.ll b/llvm/test/Transforms/CorrelatedValuePropagation/mul.ll index 3ac26f4507099..c69d259984bd0 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/mul.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/mul.ll @@ -209,8 +209,7 @@ define i1 @nsw_range1(i8 %b) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C:%.*]] = add nuw nsw i8 [[B:%.*]], -3 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i8 [[C]], 4 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[C]], [[MUL]] -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; entry: %c = add nuw nsw i8 %b, -3 diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/shl.ll b/llvm/test/Transforms/CorrelatedValuePropagation/shl.ll index 98113cbdae36a..88311219dee58 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/shl.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/shl.ll @@ -412,8 +412,7 @@ define i1 @nsw_range1(i8 %b) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C:%.*]] = add nuw nsw i8 [[B:%.*]], -3 ; CHECK-NEXT: [[SHL:%.*]] = shl nsw i8 [[C]], 2 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[C]], [[SHL]] -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: ret i1 false ; entry: %c = add nuw nsw i8 %b, -3 diff --git a/llvm/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll b/llvm/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll index 047545a8115a1..9ff103cdda8f9 100644 --- a/llvm/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll +++ b/llvm/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll @@ -45,7 +45,7 @@ bb2: ; preds = %bb1, %bb declare void @llvm.dbg.value(metadata, metadata, metadata) nounwind readnone ; CHECK: attributes #0 = { nounwind ssp } -; CHECK: attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn } +; CHECK: attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; CHECK: attributes #2 = { noinline nounwind ssp } ; CHECK: attributes [[NUW]] = { nounwind } diff --git a/llvm/test/Transforms/FunctionAttrs/2008-09-03-Mutual.ll b/llvm/test/Transforms/FunctionAttrs/2008-09-03-Mutual.ll index 13d7682fcf877..23d6c5ae1fdb6 100644 --- a/llvm/test/Transforms/FunctionAttrs/2008-09-03-Mutual.ll +++ b/llvm/test/Transforms/FunctionAttrs/2008-09-03-Mutual.ll @@ -2,7 +2,7 @@ ; RUN: opt < %s -passes=function-attrs -S | FileCheck %s define i32 @a() { -; CHECK: Function Attrs: nofree nosync nounwind readnone +; CHECK: Function Attrs: nofree nosync nounwind memory(none) ; CHECK-LABEL: define {{[^@]+}}@a ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[TMP:%.*]] = call i32 @b() @@ -13,7 +13,7 @@ define i32 @a() { } define i32 @b() { -; CHECK: Function Attrs: nofree nosync nounwind readnone +; CHECK: Function Attrs: nofree nosync nounwind memory(none) ; CHECK-LABEL: define {{[^@]+}}@b ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: [[TMP:%.*]] = call i32 @a() diff --git a/llvm/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll b/llvm/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll index 75fb113f11436..ee8437e8c0f1a 100644 --- a/llvm/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll +++ b/llvm/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll @@ -6,7 +6,7 @@ declare i32 @e() readnone define i32 @f() { -; CHECK: Function Attrs: nofree nosync readnone +; CHECK: Function Attrs: nofree nosync memory(none) ; CHECK-LABEL: @f( ; CHECK-NEXT: [[TMP:%.*]] = call i32 @e() ; CHECK-NEXT: ret i32 [[TMP]] @@ -16,7 +16,7 @@ define i32 @f() { } define i32 @g() readonly { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: @g( ; CHECK-NEXT: ret i32 0 ; @@ -24,7 +24,7 @@ define i32 @g() readonly { } define i32 @h() readnone { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: @h( ; CHECK-NEXT: [[TMP:%.*]] = load i32, ptr @x, align 4 ; CHECK-NEXT: ret i32 [[TMP]] diff --git a/llvm/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll b/llvm/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll index 50c51f0c74a9d..e263efc4f42f6 100644 --- a/llvm/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll +++ b/llvm/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll @@ -2,7 +2,7 @@ ; RUN: opt < %s -passes=function-attrs -S | FileCheck %s define i32 @f() { -; CHECK: Function Attrs: nofree readonly +; CHECK: Function Attrs: nofree memory(read) ; CHECK-LABEL: @f( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP:%.*]] = call i32 @e() diff --git a/llvm/test/Transforms/FunctionAttrs/2008-12-29-Constant.ll b/llvm/test/Transforms/FunctionAttrs/2008-12-29-Constant.ll index 178b075f1ec78..7a97498b8f328 100644 --- a/llvm/test/Transforms/FunctionAttrs/2008-12-29-Constant.ll +++ b/llvm/test/Transforms/FunctionAttrs/2008-12-29-Constant.ll @@ -4,7 +4,7 @@ @s = external constant i8 define i8 @f() { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: @f( ; CHECK-NEXT: [[TMP:%.*]] = load i8, ptr @s, align 1 ; CHECK-NEXT: ret i8 [[TMP]] diff --git a/llvm/test/Transforms/FunctionAttrs/argmemonly.ll b/llvm/test/Transforms/FunctionAttrs/argmemonly.ll index 5ea9582dfe10e..fb73dd770fa9d 100644 --- a/llvm/test/Transforms/FunctionAttrs/argmemonly.ll +++ b/llvm/test/Transforms/FunctionAttrs/argmemonly.ll @@ -4,7 +4,7 @@ @g = global i32 20 define void @test_no_read_or_write() { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: @test_no_read_or_write( ; CHECK-NEXT: entry: ; CHECK-NEXT: ret void @@ -14,7 +14,7 @@ entry: } define i32 @test_only_read_arg(ptr %ptr) { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: @test_only_read_arg( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[PTR:%.*]], align 4 @@ -26,7 +26,7 @@ entry: } define i32 @test_only_read_arg_already_has_argmemonly(ptr %ptr) argmemonly { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: @test_only_read_arg_already_has_argmemonly( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[PTR:%.*]], align 4 @@ -38,7 +38,7 @@ entry: } define i32 @test_read_global() { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(read, argmem: none, inaccessiblemem: none) ; CHECK-LABEL: @test_read_global( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[L:%.*]] = load i32, ptr @g, align 4 @@ -50,7 +50,7 @@ entry: } define i32 @test_read_loaded_ptr(ptr %ptr) { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(read, inaccessiblemem: none) ; CHECK-LABEL: @test_read_loaded_ptr( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[L:%.*]] = load ptr, ptr [[PTR:%.*]], align 8 @@ -64,7 +64,7 @@ entry: } define void @test_only_write_arg(ptr %ptr) { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: @test_only_write_arg( ; CHECK-NEXT: entry: ; CHECK-NEXT: store i32 0, ptr [[PTR:%.*]], align 4 @@ -76,7 +76,7 @@ entry: } define void @test_write_global() { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write, argmem: none, inaccessiblemem: none) ; CHECK-LABEL: @test_write_global( ; CHECK-NEXT: entry: ; CHECK-NEXT: store i32 0, ptr @g, align 4 @@ -103,7 +103,7 @@ entry: declare i32 @fn_readnone() readnone define void @test_call_readnone(ptr %ptr) { -; CHECK: Function Attrs: argmemonly writeonly +; CHECK: Function Attrs: memory(argmem: write) ; CHECK-LABEL: @test_call_readnone( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C:%.*]] = call i32 @fn_readnone() @@ -119,7 +119,7 @@ entry: declare i32 @fn_argmemonly(ptr) argmemonly define i32 @test_call_argmemonly(ptr %ptr) { -; CHECK: Function Attrs: argmemonly +; CHECK: Function Attrs: memory(argmem: readwrite) ; CHECK-LABEL: @test_call_argmemonly( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C:%.*]] = call i32 @fn_argmemonly(ptr [[PTR:%.*]]) @@ -131,7 +131,7 @@ entry: } define i32 @test_call_fn_where_argmemonly_can_be_inferred(ptr %ptr) { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: @test_call_fn_where_argmemonly_can_be_inferred( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C:%.*]] = call i32 @test_only_read_arg(ptr [[PTR:%.*]]) @@ -143,7 +143,7 @@ entry: } define void @test_memcpy_argonly(ptr %dst, ptr %src) { -; CHECK: Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn +; CHECK: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: @test_memcpy_argonly( ; CHECK-NEXT: entry: ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 32, i1 false) @@ -159,7 +159,7 @@ declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1) @arr = global [32 x i8] zeroinitializer define void @test_memcpy_src_global(ptr %dst) { -; CHECK: Function Attrs: mustprogress nofree nosync nounwind willreturn +; CHECK: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(readwrite, inaccessiblemem: none) ; CHECK-LABEL: @test_memcpy_src_global( ; CHECK-NEXT: entry: ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DST:%.*]], ptr @arr, i64 32, i1 false) @@ -171,7 +171,7 @@ entry: } define void @test_memcpy_dst_global(ptr %src) { -; CHECK: Function Attrs: mustprogress nofree nosync nounwind willreturn +; CHECK: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(readwrite, inaccessiblemem: none) ; CHECK-LABEL: @test_memcpy_dst_global( ; CHECK-NEXT: entry: ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr @arr, ptr [[SRC:%.*]], i64 32, i1 false) @@ -183,7 +183,7 @@ entry: } define i32 @test_read_arg_access_alloca(ptr %ptr) { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: @test_read_arg_access_alloca( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 @@ -203,7 +203,7 @@ entry: declare void @fn_inaccessiblememonly() inaccessiblememonly define void @test_inaccessiblememonly() { -; CHECK: Function Attrs: inaccessiblememonly +; CHECK: Function Attrs: memory(inaccessiblemem: readwrite) ; CHECK-LABEL: @test_inaccessiblememonly( ; CHECK-NEXT: call void @fn_inaccessiblememonly() ; CHECK-NEXT: ret void @@ -213,9 +213,9 @@ define void @test_inaccessiblememonly() { } define void @test_inaccessiblememonly_readonly() { -; CHECK: Function Attrs: inaccessiblememonly nofree readonly +; CHECK: Function Attrs: nofree memory(inaccessiblemem: read) ; CHECK-LABEL: @test_inaccessiblememonly_readonly( -; CHECK-NEXT: call void @fn_inaccessiblememonly() #[[ATTR15:[0-9]+]] +; CHECK-NEXT: call void @fn_inaccessiblememonly() #[[ATTR16:[0-9]+]] ; CHECK-NEXT: ret void ; call void @fn_inaccessiblememonly() readonly @@ -223,10 +223,10 @@ define void @test_inaccessiblememonly_readonly() { } define void @test_inaccessibleorargmemonly_readonly(ptr %arg) { -; CHECK: Function Attrs: inaccessiblemem_or_argmemonly nofree readonly +; CHECK: Function Attrs: nofree memory(argmem: read, inaccessiblemem: read) ; CHECK-LABEL: @test_inaccessibleorargmemonly_readonly( ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARG:%.*]], align 4 -; CHECK-NEXT: call void @fn_inaccessiblememonly() #[[ATTR15]] +; CHECK-NEXT: call void @fn_inaccessiblememonly() #[[ATTR16]] ; CHECK-NEXT: ret void ; load i32, ptr %arg @@ -235,10 +235,10 @@ define void @test_inaccessibleorargmemonly_readonly(ptr %arg) { } define void @test_inaccessibleorargmemonly_readwrite(ptr %arg) { -; CHECK: Function Attrs: inaccessiblemem_or_argmemonly +; CHECK: Function Attrs: memory(argmem: write, inaccessiblemem: read) ; CHECK-LABEL: @test_inaccessibleorargmemonly_readwrite( ; CHECK-NEXT: store i32 0, ptr [[ARG:%.*]], align 4 -; CHECK-NEXT: call void @fn_inaccessiblememonly() #[[ATTR15]] +; CHECK-NEXT: call void @fn_inaccessiblememonly() #[[ATTR16]] ; CHECK-NEXT: ret void ; store i32 0, ptr %arg diff --git a/llvm/test/Transforms/FunctionAttrs/atomic.ll b/llvm/test/Transforms/FunctionAttrs/atomic.ll index 33711acd7d540..8635f2bbdc498 100644 --- a/llvm/test/Transforms/FunctionAttrs/atomic.ll +++ b/llvm/test/Transforms/FunctionAttrs/atomic.ll @@ -4,7 +4,7 @@ ; Atomic load/store to local doesn't affect whether a function is ; readnone/readonly. define i32 @test1(i32 %x) uwtable ssp { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readnone ssp willreturn uwtable +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind ssp willreturn memory(none) uwtable ; CHECK-LABEL: @test1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 @@ -21,7 +21,7 @@ entry: ; A function with an Acquire load is not readonly. define i32 @test2(ptr %x) uwtable ssp { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nounwind ssp willreturn uwtable +; CHECK: Function Attrs: mustprogress nofree norecurse nounwind ssp willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: @test2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[R:%.*]] = load atomic i32, ptr [[X:%.*]] seq_cst, align 4 diff --git a/llvm/test/Transforms/FunctionAttrs/convergent.ll b/llvm/test/Transforms/FunctionAttrs/convergent.ll index 34598628688fc..0263e0ec22551 100644 --- a/llvm/test/Transforms/FunctionAttrs/convergent.ll +++ b/llvm/test/Transforms/FunctionAttrs/convergent.ll @@ -2,7 +2,7 @@ ; RUN: opt -passes=function-attrs -S < %s | FileCheck %s define i32 @nonleaf() convergent { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@nonleaf ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[A:%.*]] = call i32 @leaf() @@ -13,7 +13,7 @@ define i32 @nonleaf() convergent { } define i32 @leaf() convergent { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@leaf ; CHECK-SAME: () #[[ATTR0]] { ; CHECK-NEXT: ret i32 0 @@ -85,7 +85,7 @@ define i32 @intrinsic() convergent { } define i32 @recursive1() convergent { -; CHECK: Function Attrs: nofree nosync nounwind readnone +; CHECK: Function Attrs: nofree nosync nounwind memory(none) ; CHECK-LABEL: define {{[^@]+}}@recursive1 ; CHECK-SAME: () #[[ATTR5:[0-9]+]] { ; CHECK-NEXT: [[A:%.*]] = call i32 @recursive2() #[[ATTR1]] @@ -96,7 +96,7 @@ define i32 @recursive1() convergent { } define i32 @recursive2() convergent { -; CHECK: Function Attrs: nofree nosync nounwind readnone +; CHECK: Function Attrs: nofree nosync nounwind memory(none) ; CHECK-LABEL: define {{[^@]+}}@recursive2 ; CHECK-SAME: () #[[ATTR5]] { ; CHECK-NEXT: [[A:%.*]] = call i32 @recursive1() #[[ATTR1]] diff --git a/llvm/test/Transforms/FunctionAttrs/incompatible_fn_attrs.ll b/llvm/test/Transforms/FunctionAttrs/incompatible_fn_attrs.ll index b2b46f6f4974c..7e246c482431e 100644 --- a/llvm/test/Transforms/FunctionAttrs/incompatible_fn_attrs.ll +++ b/llvm/test/Transforms/FunctionAttrs/incompatible_fn_attrs.ll @@ -5,7 +5,7 @@ ; function attributes when we derive readnone. define ptr @given_argmem_infer_readnone(ptr %p) #0 { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: @given_argmem_infer_readnone( ; CHECK-NEXT: entry: ; CHECK-NEXT: ret ptr [[P:%.*]] @@ -15,7 +15,7 @@ entry: } define ptr @given_inaccessible_infer_readnone(ptr %p) #1 { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: @given_inaccessible_infer_readnone( ; CHECK-NEXT: entry: ; CHECK-NEXT: ret ptr [[P:%.*]] @@ -25,7 +25,7 @@ entry: } define ptr @given_inaccessible_or_argmem_infer_readnone(ptr %p) #2 { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: @given_inaccessible_or_argmem_infer_readnone( ; CHECK-NEXT: entry: ; CHECK-NEXT: ret ptr [[P:%.*]] diff --git a/llvm/test/Transforms/FunctionAttrs/int_sideeffect.ll b/llvm/test/Transforms/FunctionAttrs/int_sideeffect.ll index 98a36ac17e19a..9ba82e2dc1cce 100644 --- a/llvm/test/Transforms/FunctionAttrs/int_sideeffect.ll +++ b/llvm/test/Transforms/FunctionAttrs/int_sideeffect.ll @@ -7,7 +7,7 @@ declare void @llvm.sideeffect() ; is present. define void @test() { -; CHECK: Function Attrs: inaccessiblememonly mustprogress nofree nosync nounwind willreturn +; CHECK: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CHECK-LABEL: @test( ; CHECK-NEXT: call void @llvm.sideeffect() ; CHECK-NEXT: ret void @@ -17,7 +17,7 @@ define void @test() { } define void @loop() { -; CHECK: Function Attrs: inaccessiblememonly nofree noreturn nosync nounwind +; CHECK: Function Attrs: nofree noreturn nosync nounwind memory(inaccessiblemem: readwrite) ; CHECK-LABEL: @loop( ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: diff --git a/llvm/test/Transforms/FunctionAttrs/nofree-attributor.ll b/llvm/test/Transforms/FunctionAttrs/nofree-attributor.ll index 81064c2f34889..0fe0eadf5f669 100644 --- a/llvm/test/Transforms/FunctionAttrs/nofree-attributor.ll +++ b/llvm/test/Transforms/FunctionAttrs/nofree-attributor.ll @@ -14,7 +14,7 @@ declare void @_ZdaPv(ptr) local_unnamed_addr #2 ; TEST 1 (positive case) define void @only_return() #0 { -; FNATTR: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; FNATTR: Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(none) uwtable ; FNATTR-LABEL: define {{[^@]+}}@only_return ; FNATTR-SAME: () #[[ATTR3:[0-9]+]] { ; FNATTR-NEXT: ret void @@ -101,7 +101,7 @@ end: define void @mutual_recursion1() #0 { -; FNATTR: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; FNATTR: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; FNATTR-LABEL: define {{[^@]+}}@mutual_recursion1 ; FNATTR-SAME: () #[[ATTR4:[0-9]+]] { ; FNATTR-NEXT: call void @mutual_recursion2() @@ -112,7 +112,7 @@ define void @mutual_recursion1() #0 { } define void @mutual_recursion2() #0 { -; FNATTR: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; FNATTR: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; FNATTR-LABEL: define {{[^@]+}}@mutual_recursion2 ; FNATTR-SAME: () #[[ATTR4]] { ; FNATTR-NEXT: call void @mutual_recursion1() @@ -174,7 +174,7 @@ define noalias ptr @call_realloc(ptr nocapture %0, i64 %1) local_unnamed_addr #0 declare void @nofree_function() nofree readnone #0 define void @call_nofree_function() #0 { -; FNATTR: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; FNATTR: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; FNATTR-LABEL: define {{[^@]+}}@call_nofree_function ; FNATTR-SAME: () #[[ATTR4]] { ; FNATTR-NEXT: tail call void @nofree_function() @@ -225,7 +225,7 @@ define void @call_both() #0 { declare float @llvm.floor.f32(float) define void @call_floor(float %a) #0 { -; FNATTR: Function Attrs: mustprogress nofree noinline nosync nounwind readnone willreturn uwtable +; FNATTR: Function Attrs: mustprogress nofree noinline nosync nounwind willreturn memory(none) uwtable ; FNATTR-LABEL: define {{[^@]+}}@call_floor ; FNATTR-SAME: (float [[A:%.*]]) #[[ATTR7:[0-9]+]] { ; FNATTR-NEXT: [[TMP1:%.*]] = tail call float @llvm.floor.f32(float [[A]]) @@ -239,7 +239,7 @@ define void @call_floor(float %a) #0 { ; Check propagation. define void @f1() #0 { -; FNATTR: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; FNATTR: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; FNATTR-LABEL: define {{[^@]+}}@f1 ; FNATTR-SAME: () #[[ATTR4]] { ; FNATTR-NEXT: tail call void @nofree_function() @@ -250,7 +250,7 @@ define void @f1() #0 { } define void @f2() #0 { -; FNATTR: Function Attrs: nofree noinline nosync nounwind readnone uwtable +; FNATTR: Function Attrs: nofree noinline nosync nounwind memory(none) uwtable ; FNATTR-LABEL: define {{[^@]+}}@f2 ; FNATTR-SAME: () #[[ATTR4]] { ; FNATTR-NEXT: tail call void @f1() diff --git a/llvm/test/Transforms/FunctionAttrs/nofree.ll b/llvm/test/Transforms/FunctionAttrs/nofree.ll index 020d6d23af0a3..ca56117eeacc8 100644 --- a/llvm/test/Transforms/FunctionAttrs/nofree.ll +++ b/llvm/test/Transforms/FunctionAttrs/nofree.ll @@ -34,7 +34,7 @@ entry: declare void @free(ptr nocapture) local_unnamed_addr #2 define i32 @_Z4foo3Pi(ptr nocapture readonly %a) local_unnamed_addr #3 { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn uwtable +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) uwtable ; CHECK-LABEL: @_Z4foo3Pi( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4 @@ -81,8 +81,8 @@ define noalias ptr @_Z4foo6Pm(ptr nocapture %a) local_unnamed_addr #1 { ; CHECK: Function Attrs: nounwind uwtable ; CHECK-LABEL: @_Z4foo6Pm( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A:%.*]], align 8 -; CHECK-NEXT: [[CALL:%.*]] = tail call ptr @realloc(ptr [[A]], i64 [[TMP1]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[A:%.*]], align 8 +; CHECK-NEXT: [[CALL:%.*]] = tail call ptr @realloc(ptr [[A]], i64 [[TMP0]]) #[[ATTR2]] ; CHECK-NEXT: ret ptr [[CALL]] ; entry: diff --git a/llvm/test/Transforms/FunctionAttrs/norecurse.ll b/llvm/test/Transforms/FunctionAttrs/norecurse.ll index fe262a847537c..4340956312946 100644 --- a/llvm/test/Transforms/FunctionAttrs/norecurse.ll +++ b/llvm/test/Transforms/FunctionAttrs/norecurse.ll @@ -2,7 +2,7 @@ ; RUN: opt < %s -aa-pipeline=basic-aa -passes='cgscc(function-attrs),rpo-function-attrs' -S | FileCheck %s define i32 @leaf() { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@leaf ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: ret i32 1 @@ -11,7 +11,7 @@ define i32 @leaf() { } define i32 @self_rec() { -; CHECK: Function Attrs: nofree nosync nounwind readnone +; CHECK: Function Attrs: nofree nosync nounwind memory(none) ; CHECK-LABEL: define {{[^@]+}}@self_rec ; CHECK-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: [[A:%.*]] = call i32 @self_rec() @@ -22,7 +22,7 @@ define i32 @self_rec() { } define i32 @indirect_rec() { -; CHECK: Function Attrs: nofree nosync nounwind readnone +; CHECK: Function Attrs: nofree nosync nounwind memory(none) ; CHECK-LABEL: define {{[^@]+}}@indirect_rec ; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: [[A:%.*]] = call i32 @indirect_rec2() @@ -33,7 +33,7 @@ define i32 @indirect_rec() { } define i32 @indirect_rec2() { -; CHECK: Function Attrs: nofree nosync nounwind readnone +; CHECK: Function Attrs: nofree nosync nounwind memory(none) ; CHECK-LABEL: define {{[^@]+}}@indirect_rec2 ; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: [[A:%.*]] = call i32 @indirect_rec() @@ -44,7 +44,7 @@ define i32 @indirect_rec2() { } define i32 @extern() { -; CHECK: Function Attrs: nofree nosync readnone +; CHECK: Function Attrs: nofree nosync memory(none) ; CHECK-LABEL: define {{[^@]+}}@extern ; CHECK-SAME: () #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: [[A:%.*]] = call i32 @k() @@ -57,7 +57,7 @@ define i32 @extern() { declare i32 @k() readnone define void @intrinsic(ptr %dest, ptr %src, i32 %len) { -; CHECK: Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn +; CHECK: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@intrinsic ; CHECK-SAME: (ptr nocapture writeonly [[DEST:%.*]], ptr nocapture readonly [[SRC:%.*]], i32 [[LEN:%.*]]) #[[ATTR4:[0-9]+]] { ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr [[DEST]], ptr [[SRC]], i32 [[LEN]], i1 false) @@ -70,7 +70,7 @@ define void @intrinsic(ptr %dest, ptr %src, i32 %len) { declare void @llvm.memcpy.p0.p0.i32(ptr, ptr, i32, i1) define internal i32 @called_by_norecurse() { -; CHECK: Function Attrs: nofree norecurse nosync readnone +; CHECK: Function Attrs: nofree norecurse nosync memory(none) ; CHECK-LABEL: define {{[^@]+}}@called_by_norecurse ; CHECK-SAME: () #[[ATTR6:[0-9]+]] { ; CHECK-NEXT: [[A:%.*]] = call i32 @k() @@ -81,7 +81,7 @@ define internal i32 @called_by_norecurse() { } define void @m() norecurse { -; CHECK: Function Attrs: nofree norecurse nosync readnone +; CHECK: Function Attrs: nofree norecurse nosync memory(none) ; CHECK-LABEL: define {{[^@]+}}@m ; CHECK-SAME: () #[[ATTR6]] { ; CHECK-NEXT: [[A:%.*]] = call i32 @called_by_norecurse() @@ -92,7 +92,7 @@ define void @m() norecurse { } define internal i32 @called_by_norecurse_indirectly() { -; CHECK: Function Attrs: nofree norecurse nosync readnone +; CHECK: Function Attrs: nofree norecurse nosync memory(none) ; CHECK-LABEL: define {{[^@]+}}@called_by_norecurse_indirectly ; CHECK-SAME: () #[[ATTR6]] { ; CHECK-NEXT: [[A:%.*]] = call i32 @k() @@ -103,7 +103,7 @@ define internal i32 @called_by_norecurse_indirectly() { } define internal void @o() { -; CHECK: Function Attrs: nofree norecurse nosync readnone +; CHECK: Function Attrs: nofree norecurse nosync memory(none) ; CHECK-LABEL: define {{[^@]+}}@o ; CHECK-SAME: () #[[ATTR6]] { ; CHECK-NEXT: [[A:%.*]] = call i32 @called_by_norecurse_indirectly() @@ -114,7 +114,7 @@ define internal void @o() { } define void @p() norecurse { -; CHECK: Function Attrs: nofree norecurse nosync readnone +; CHECK: Function Attrs: nofree norecurse nosync memory(none) ; CHECK-LABEL: define {{[^@]+}}@p ; CHECK-SAME: () #[[ATTR6]] { ; CHECK-NEXT: call void @o() @@ -125,7 +125,7 @@ define void @p() norecurse { } define internal i32 @escapes_as_parameter(ptr %p) { -; CHECK: Function Attrs: nofree nosync readnone +; CHECK: Function Attrs: nofree nosync memory(none) ; CHECK-LABEL: define {{[^@]+}}@escapes_as_parameter ; CHECK-SAME: (ptr nocapture readnone [[P:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[A:%.*]] = call i32 @k() @@ -136,7 +136,7 @@ define internal i32 @escapes_as_parameter(ptr %p) { } define internal void @q() { -; CHECK: Function Attrs: nofree norecurse nosync readnone +; CHECK: Function Attrs: nofree norecurse nosync memory(none) ; CHECK-LABEL: define {{[^@]+}}@q ; CHECK-SAME: () #[[ATTR6]] { ; CHECK-NEXT: [[A:%.*]] = call i32 @escapes_as_parameter(ptr @escapes_as_parameter) @@ -147,7 +147,7 @@ define internal void @q() { } define void @r() norecurse { -; CHECK: Function Attrs: nofree norecurse nosync readnone +; CHECK: Function Attrs: nofree norecurse nosync memory(none) ; CHECK-LABEL: define {{[^@]+}}@r ; CHECK-SAME: () #[[ATTR6]] { ; CHECK-NEXT: call void @q() diff --git a/llvm/test/Transforms/FunctionAttrs/nosync.ll b/llvm/test/Transforms/FunctionAttrs/nosync.ll index 77f208e6b0302..1017248e3f64f 100644 --- a/llvm/test/Transforms/FunctionAttrs/nosync.ll +++ b/llvm/test/Transforms/FunctionAttrs/nosync.ll @@ -6,7 +6,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; Base case, empty function define void @test1() { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: @test1( ; CHECK-NEXT: ret void ; @@ -15,7 +15,7 @@ define void @test1() { ; Show the bottom up walk define void @test2() { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: @test2( ; CHECK-NEXT: call void @test1() ; CHECK-NEXT: ret void @@ -38,7 +38,7 @@ define void @test3() convergent { } define i32 @test4(i32 %a, i32 %b) { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: @test4( ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: ret i32 [[A]] @@ -49,7 +49,7 @@ define i32 @test4(i32 %a, i32 %b) { ; negative case - explicit sync define void @test5(ptr %p) { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nounwind willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: @test5( ; CHECK-NEXT: store atomic i8 0, ptr [[P:%.*]] seq_cst, align 1 ; CHECK-NEXT: ret void @@ -60,7 +60,7 @@ define void @test5(ptr %p) { ; negative case - explicit sync define i8 @test6(ptr %p) { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nounwind willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: @test6( ; CHECK-NEXT: [[V:%.*]] = load atomic i8, ptr [[P:%.*]] seq_cst, align 1 ; CHECK-NEXT: ret i8 [[V]] @@ -71,7 +71,7 @@ define i8 @test6(ptr %p) { ; negative case - explicit sync define void @test7(ptr %p) { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nounwind willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: @test7( ; CHECK-NEXT: [[TMP1:%.*]] = atomicrmw add ptr [[P:%.*]], i8 0 seq_cst, align 1 ; CHECK-NEXT: ret void @@ -104,7 +104,7 @@ define void @test9(ptr %p) { ; atomic load with monotonic ordering define i32 @load_monotonic(ptr nocapture readonly %0) norecurse nounwind uwtable { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nounwind willreturn uwtable +; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: @load_monotonic( ; CHECK-NEXT: [[TMP2:%.*]] = load atomic i32, ptr [[TMP0:%.*]] monotonic, align 4 ; CHECK-NEXT: ret i32 [[TMP2]] @@ -115,7 +115,7 @@ define i32 @load_monotonic(ptr nocapture readonly %0) norecurse nounwind uwtable ; atomic store with monotonic ordering. define void @store_monotonic(ptr nocapture %0) norecurse nounwind uwtable { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nounwind willreturn uwtable +; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: @store_monotonic( ; CHECK-NEXT: store atomic i32 10, ptr [[TMP0:%.*]] monotonic, align 4 ; CHECK-NEXT: ret void @@ -127,7 +127,7 @@ define void @store_monotonic(ptr nocapture %0) norecurse nounwind uwtable { ; negative, should not deduce nosync ; atomic load with acquire ordering. define i32 @load_acquire(ptr nocapture readonly %0) norecurse nounwind uwtable { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nounwind willreturn uwtable +; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) uwtable ; CHECK-LABEL: @load_acquire( ; CHECK-NEXT: [[TMP2:%.*]] = load atomic i32, ptr [[TMP0:%.*]] acquire, align 4 ; CHECK-NEXT: ret i32 [[TMP2]] @@ -137,7 +137,7 @@ define i32 @load_acquire(ptr nocapture readonly %0) norecurse nounwind uwtable { } define i32 @load_unordered(ptr nocapture readonly %0) norecurse nounwind uwtable { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn uwtable +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) uwtable ; CHECK-LABEL: @load_unordered( ; CHECK-NEXT: [[TMP2:%.*]] = load atomic i32, ptr [[TMP0:%.*]] unordered, align 4 ; CHECK-NEXT: ret i32 [[TMP2]] @@ -148,7 +148,7 @@ define i32 @load_unordered(ptr nocapture readonly %0) norecurse nounwind uwtable ; atomic store with unordered ordering. define void @store_unordered(ptr nocapture %0) norecurse nounwind uwtable { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind willreturn writeonly uwtable +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) uwtable ; CHECK-LABEL: @store_unordered( ; CHECK-NEXT: store atomic i32 10, ptr [[TMP0:%.*]] unordered, align 4 ; CHECK-NEXT: ret void @@ -161,7 +161,7 @@ define void @store_unordered(ptr nocapture %0) norecurse nounwind uwtable { ; negative, should not deduce nosync ; atomic load with release ordering define void @load_release(ptr nocapture %0) norecurse nounwind uwtable { -; CHECK: Function Attrs: inaccessiblemem_or_argmemonly nofree norecurse nounwind uwtable +; CHECK: Function Attrs: nofree norecurse nounwind memory(argmem: readwrite, inaccessiblemem: readwrite) uwtable ; CHECK-LABEL: @load_release( ; CHECK-NEXT: store atomic volatile i32 10, ptr [[TMP0:%.*]] release, align 4 ; CHECK-NEXT: ret void @@ -172,7 +172,7 @@ define void @load_release(ptr nocapture %0) norecurse nounwind uwtable { ; negative volatile, relaxed atomic define void @load_volatile_release(ptr nocapture %0) norecurse nounwind uwtable { -; CHECK: Function Attrs: inaccessiblemem_or_argmemonly nofree norecurse nounwind uwtable +; CHECK: Function Attrs: nofree norecurse nounwind memory(argmem: readwrite, inaccessiblemem: readwrite) uwtable ; CHECK-LABEL: @load_volatile_release( ; CHECK-NEXT: store atomic volatile i32 10, ptr [[TMP0:%.*]] release, align 4 ; CHECK-NEXT: ret void @@ -183,7 +183,7 @@ define void @load_volatile_release(ptr nocapture %0) norecurse nounwind uwtable ; volatile store. define void @volatile_store(ptr %0) norecurse nounwind uwtable { -; CHECK: Function Attrs: inaccessiblemem_or_argmemonly nofree norecurse nounwind uwtable +; CHECK: Function Attrs: nofree norecurse nounwind memory(argmem: readwrite, inaccessiblemem: readwrite) uwtable ; CHECK-LABEL: @volatile_store( ; CHECK-NEXT: store volatile i32 14, ptr [[TMP0:%.*]], align 4 ; CHECK-NEXT: ret void @@ -195,7 +195,7 @@ define void @volatile_store(ptr %0) norecurse nounwind uwtable { ; negative, should not deduce nosync ; volatile load. define i32 @volatile_load(ptr %0) norecurse nounwind uwtable { -; CHECK: Function Attrs: inaccessiblemem_or_argmemonly mustprogress nofree norecurse nounwind willreturn uwtable +; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) uwtable ; CHECK-LABEL: @volatile_load( ; CHECK-NEXT: [[TMP2:%.*]] = load volatile i32, ptr [[TMP0:%.*]], align 4 ; CHECK-NEXT: ret i32 [[TMP2]] @@ -237,7 +237,7 @@ declare void @llvm.memset(ptr %dest, i8 %val, i32 %len, i1 %isvolatile) ; negative, checking volatile intrinsics. define i32 @memcpy_volatile(ptr %ptr1, ptr %ptr2) { -; CHECK: Function Attrs: argmemonly mustprogress nofree nounwind willreturn +; CHECK: Function Attrs: mustprogress nofree nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: @memcpy_volatile( ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr [[PTR1:%.*]], ptr [[PTR2:%.*]], i32 8, i1 true) ; CHECK-NEXT: ret i32 4 @@ -248,7 +248,7 @@ define i32 @memcpy_volatile(ptr %ptr1, ptr %ptr2) { ; positive, non-volatile intrinsic. define i32 @memset_non_volatile(ptr %ptr1, i8 %val) { -; CHECK: Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn writeonly +; CHECK: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: @memset_non_volatile( ; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr [[PTR1:%.*]], i8 [[VAL:%.*]], i32 8, i1 false) ; CHECK-NEXT: ret i32 4 @@ -271,7 +271,7 @@ declare void @readnone_test() convergent readnone ; negative. Convergent define void @convergent_readnone(){ -; CHECK: Function Attrs: nofree nosync readnone +; CHECK: Function Attrs: nofree nosync memory(none) ; CHECK-LABEL: @convergent_readnone( ; CHECK-NEXT: call void @readnone_test() ; CHECK-NEXT: ret void @@ -299,7 +299,7 @@ define void @i_totally_sync() { declare float @llvm.cos(float %val) readnone define float @cos_test(float %x) { -; CHECK: Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none) ; CHECK-LABEL: @cos_test( ; CHECK-NEXT: [[C:%.*]] = call float @llvm.cos.f32(float [[X:%.*]]) ; CHECK-NEXT: ret float [[C]] diff --git a/llvm/test/Transforms/FunctionAttrs/nounwind.ll b/llvm/test/Transforms/FunctionAttrs/nounwind.ll index 7987f7477b900..a147685964dfe 100644 --- a/llvm/test/Transforms/FunctionAttrs/nounwind.ll +++ b/llvm/test/Transforms/FunctionAttrs/nounwind.ll @@ -3,7 +3,7 @@ ; TEST 1 define i32 @foo1() { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@foo1 ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: ret i32 1 @@ -13,7 +13,7 @@ define i32 @foo1() { ; TEST 2 define i32 @scc1_foo() { -; CHECK: Function Attrs: nofree nosync nounwind readnone +; CHECK: Function Attrs: nofree nosync nounwind memory(none) ; CHECK-LABEL: define {{[^@]+}}@scc1_foo ; CHECK-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @scc1_bar() @@ -26,7 +26,7 @@ define i32 @scc1_foo() { ; TEST 3 define i32 @scc1_bar() { -; CHECK: Function Attrs: nofree nosync nounwind readnone +; CHECK: Function Attrs: nofree nosync nounwind memory(none) ; CHECK-LABEL: define {{[^@]+}}@scc1_bar ; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @scc1_foo() diff --git a/llvm/test/Transforms/FunctionAttrs/optnone.ll b/llvm/test/Transforms/FunctionAttrs/optnone.ll index 260d53b2bfed9..4f097147ff57f 100644 --- a/llvm/test/Transforms/FunctionAttrs/optnone.ll +++ b/llvm/test/Transforms/FunctionAttrs/optnone.ll @@ -20,6 +20,6 @@ declare i8 @strlen(ptr) noinline optnone ; CHECK: (ptr) #1 ; CHECK-LABEL: attributes #0 -; CHECK: = { mustprogress nofree norecurse nosync nounwind readnone willreturn } +; CHECK: = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) } ; CHECK-LABEL: attributes #1 ; CHECK: = { noinline optnone } diff --git a/llvm/test/Transforms/FunctionAttrs/readattrs.ll b/llvm/test/Transforms/FunctionAttrs/readattrs.ll index 1833d8b561ccf..94ffde15d338a 100644 --- a/llvm/test/Transforms/FunctionAttrs/readattrs.ll +++ b/llvm/test/Transforms/FunctionAttrs/readattrs.ll @@ -18,7 +18,7 @@ define void @test1_2(ptr %x1_2, ptr %y1_2, ptr %z1_2) { } define ptr @test2(ptr %p) { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write, argmem: none, inaccessiblemem: none) ; CHECK-LABEL: define {{[^@]+}}@test2 ; CHECK-SAME: (ptr readnone returned [[P:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: store i32 0, ptr @x, align 4 @@ -29,7 +29,7 @@ define ptr @test2(ptr %p) { } define i1 @test3(ptr %p, ptr %q) { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test3 ; CHECK-SAME: (ptr readnone [[P:%.*]], ptr readnone [[Q:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: [[A:%.*]] = icmp ult ptr [[P]], [[Q]] @@ -42,7 +42,7 @@ define i1 @test3(ptr %p, ptr %q) { declare void @test4_1(ptr nocapture) readonly define void @test4_2(ptr %p) { -; CHECK: Function Attrs: nofree readonly +; CHECK: Function Attrs: nofree memory(read) ; CHECK-LABEL: define {{[^@]+}}@test4_2 ; CHECK-SAME: (ptr nocapture readonly [[P:%.*]]) #[[ATTR3:[0-9]+]] { ; CHECK-NEXT: call void @test4_1(ptr [[P]]) @@ -54,7 +54,7 @@ define void @test4_2(ptr %p) { ; Missed optz'n: we could make %q readnone, but don't break test6! define void @test5(ptr %p, ptr %q) { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@test5 ; CHECK-SAME: (ptr nocapture writeonly [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR4:[0-9]+]] { ; CHECK-NEXT: store ptr [[Q]], ptr [[P]], align 8 @@ -81,7 +81,7 @@ define void @test6_2(ptr %p, ptr %q) { ; inalloca parameters are always considered written define void @test7_1(ptr inalloca(i32) %a) { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@test7_1 ; CHECK-SAME: (ptr nocapture inalloca(i32) [[A:%.*]]) #[[ATTR5:[0-9]+]] { ; CHECK-NEXT: ret void @@ -91,7 +91,7 @@ define void @test7_1(ptr inalloca(i32) %a) { ; preallocated parameters are always considered written define void @test7_2(ptr preallocated(i32) %a) { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@test7_2 ; CHECK-SAME: (ptr nocapture preallocated(i32) [[A:%.*]]) #[[ATTR5]] { ; CHECK-NEXT: ret void @@ -100,7 +100,7 @@ define void @test7_2(ptr preallocated(i32) %a) { } define ptr @test8_1(ptr %p) { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@test8_1 ; CHECK-SAME: (ptr readnone returned [[P:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: entry: @@ -111,7 +111,7 @@ entry: } define void @test8_2(ptr %p) { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@test8_2 ; CHECK-SAME: (ptr writeonly [[P:%.*]]) #[[ATTR4]] { ; CHECK-NEXT: entry: @@ -128,7 +128,7 @@ entry: declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>%val, <4 x ptr>, i32, <4 x i1>) define void @test9(<4 x ptr> %ptrs, <4 x i32>%val) { -; CHECK: Function Attrs: mustprogress nofree nosync nounwind willreturn writeonly +; CHECK: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(write) ; CHECK-LABEL: define {{[^@]+}}@test9 ; CHECK-SAME: (<4 x ptr> [[PTRS:%.*]], <4 x i32> [[VAL:%.*]]) #[[ATTR7:[0-9]+]] { ; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[VAL]], <4 x ptr> [[PTRS]], i32 4, <4 x i1> ) @@ -140,7 +140,7 @@ define void @test9(<4 x ptr> %ptrs, <4 x i32>%val) { declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i32>) define <4 x i32> @test10(<4 x ptr> %ptrs) { -; CHECK: Function Attrs: mustprogress nofree nosync nounwind readonly willreturn +; CHECK: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(read) ; CHECK-LABEL: define {{[^@]+}}@test10 ; CHECK-SAME: (<4 x ptr> [[PTRS:%.*]]) #[[ATTR9:[0-9]+]] { ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[PTRS]], i32 4, <4 x i1> , <4 x i32> undef) @@ -152,7 +152,7 @@ define <4 x i32> @test10(<4 x ptr> %ptrs) { declare <4 x i32> @test11_1(<4 x ptr>) argmemonly nounwind readonly define <4 x i32> @test11_2(<4 x ptr> %ptrs) { -; CHECK: Function Attrs: argmemonly nofree nounwind readonly +; CHECK: Function Attrs: nofree nounwind memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@test11_2 ; CHECK-SAME: (<4 x ptr> [[PTRS:%.*]]) #[[ATTR11:[0-9]+]] { ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @test11_1(<4 x ptr> [[PTRS]]) @@ -164,7 +164,7 @@ define <4 x i32> @test11_2(<4 x ptr> %ptrs) { declare <4 x i32> @test12_1(<4 x ptr>) argmemonly nounwind define <4 x i32> @test12_2(<4 x ptr> %ptrs) { -; CHECK: Function Attrs: argmemonly nounwind +; CHECK: Function Attrs: nounwind memory(argmem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@test12_2 ; CHECK-SAME: (<4 x ptr> [[PTRS:%.*]]) #[[ATTR12:[0-9]+]] { ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @test12_1(<4 x ptr> [[PTRS]]) @@ -175,7 +175,7 @@ define <4 x i32> @test12_2(<4 x ptr> %ptrs) { } define i32 @volatile_load(ptr %p) { -; CHECK: Function Attrs: inaccessiblemem_or_argmemonly mustprogress nofree norecurse nounwind willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@volatile_load ; CHECK-SAME: (ptr [[P:%.*]]) #[[ATTR13:[0-9]+]] { ; CHECK-NEXT: [[LOAD:%.*]] = load volatile i32, ptr [[P]], align 4 @@ -246,7 +246,7 @@ define void @fptr_test1b(ptr %p, ptr %f) { } define void @fptr_test1c(ptr %p, ptr %f) { -; CHECK: Function Attrs: nofree readonly +; CHECK: Function Attrs: nofree memory(read) ; CHECK-LABEL: define {{[^@]+}}@fptr_test1c ; CHECK-SAME: (ptr readnone [[P:%.*]], ptr nocapture readonly [[F:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: call void [[F]](ptr readnone [[P]]) #[[ATTR2:[0-9]+]] @@ -278,7 +278,7 @@ define void @fptr_test2b(ptr %p, ptr %f) { } define void @fptr_test2c(ptr %p, ptr %f) { -; CHECK: Function Attrs: nofree readonly +; CHECK: Function Attrs: nofree memory(read) ; CHECK-LABEL: define {{[^@]+}}@fptr_test2c ; CHECK-SAME: (ptr readonly [[P:%.*]], ptr nocapture readonly [[F:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: call void [[F]](ptr readonly [[P]]) #[[ATTR2]] @@ -289,7 +289,7 @@ define void @fptr_test2c(ptr %p, ptr %f) { } define void @alloca_recphi() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone +; CHECK: Function Attrs: nofree norecurse nosync nounwind memory(none) ; CHECK-LABEL: define {{[^@]+}}@alloca_recphi ; CHECK-SAME: () #[[ATTR14:[0-9]+]] { ; CHECK-NEXT: entry: diff --git a/llvm/test/Transforms/FunctionAttrs/stats.ll b/llvm/test/Transforms/FunctionAttrs/stats.ll index 2f36939846d20..5f007b4078ff3 100644 --- a/llvm/test/Transforms/FunctionAttrs/stats.ll +++ b/llvm/test/Transforms/FunctionAttrs/stats.ll @@ -16,13 +16,11 @@ entry: ret void } -; CHECK: 1 function-attrs - Number of functions marked argmemonly +; CHECK: 2 function-attrs - Number of functions with improved memory attribute ; CHECK-NEXT: 1 function-attrs - Number of arguments marked nocapture ; CHECK-NEXT: 1 function-attrs - Number of functions marked as nofree ; CHECK-NEXT: 2 function-attrs - Number of functions marked as norecurse ; CHECK-NEXT: 2 function-attrs - Number of functions marked as nosync ; CHECK-NEXT: 2 function-attrs - Number of functions marked as nounwind -; CHECK-NEXT: 1 function-attrs - Number of functions marked readonly ; CHECK-NEXT: 1 function-attrs - Number of arguments marked readonly ; CHECK-NEXT: 2 function-attrs - Number of functions marked as willreturn -; CHECK-NEXT: 1 function-attrs - Number of functions marked writeonly diff --git a/llvm/test/Transforms/FunctionAttrs/willreturn-callsites.ll b/llvm/test/Transforms/FunctionAttrs/willreturn-callsites.ll index 304c415951c64..ecc9a249046ab 100644 --- a/llvm/test/Transforms/FunctionAttrs/willreturn-callsites.ll +++ b/llvm/test/Transforms/FunctionAttrs/willreturn-callsites.ll @@ -38,7 +38,7 @@ define void @test_fn_willreturn(ptr %ptr) willreturn { } define void @test_fn_mustprogress_readonly_calls(ptr %ptr) mustprogress { -; CHECK: Function Attrs: mustprogress nofree readonly willreturn +; CHECK: Function Attrs: mustprogress nofree willreturn memory(read) ; CHECK-LABEL: @test_fn_mustprogress_readonly_calls( ; CHECK-NOT: call void @decl_readonly() # ; CHECK-NOT: call void @decl_readnone() # diff --git a/llvm/test/Transforms/FunctionAttrs/willreturn.ll b/llvm/test/Transforms/FunctionAttrs/willreturn.ll index 3413b96556195..1c422ea476d26 100644 --- a/llvm/test/Transforms/FunctionAttrs/willreturn.ll +++ b/llvm/test/Transforms/FunctionAttrs/willreturn.ll @@ -2,7 +2,7 @@ ; RUN: opt -function-attrs -S %s | FileCheck %s define void @mustprogress_readnone() mustprogress { -; CHECK: Function Attrs: mustprogress nofree norecurse noreturn nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse noreturn nosync nounwind willreturn memory(none) ; CHECK-LABEL: @mustprogress_readnone( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] @@ -17,7 +17,7 @@ while.body: } define i32 @mustprogress_load(ptr %ptr) mustprogress { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse noreturn nosync nounwind readonly willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse noreturn nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: @mustprogress_load( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] @@ -34,7 +34,7 @@ while.body: } define void @mustprogress_store(ptr %ptr) mustprogress { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse noreturn nosync nounwind writeonly +; CHECK: Function Attrs: mustprogress nofree norecurse noreturn nosync nounwind memory(argmem: write) ; CHECK-LABEL: @mustprogress_store( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] @@ -63,7 +63,7 @@ define void @mustprogress_call_unknown_fn() mustprogress { } define i32 @mustprogress_call_known_functions(ptr %ptr) mustprogress { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse noreturn nosync nounwind readonly willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse noreturn nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: @mustprogress_call_known_functions( ; CHECK-NEXT: call void @mustprogress_readnone() ; CHECK-NEXT: [[R:%.*]] = call i32 @mustprogress_load(ptr [[PTR:%.*]]) @@ -77,7 +77,7 @@ define i32 @mustprogress_call_known_functions(ptr %ptr) mustprogress { declare i32 @__gxx_personality_v0(...) define i64 @mustprogress_mayunwind() mustprogress personality ptr @__gxx_personality_v0 { -; CHECK: Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none) ; CHECK-LABEL: @mustprogress_mayunwind( ; CHECK-NEXT: [[A:%.*]] = invoke i64 @fn_noread() ; CHECK-NEXT: to label [[A:%.*]] unwind label [[B:%.*]] @@ -141,7 +141,7 @@ define void @willreturn_non_returning_function(i1 %c, ptr %p) { ; Infinite loop without mustprogress, will not return. define void @willreturn_loop() { -; CHECK: Function Attrs: nofree norecurse noreturn nosync nounwind readnone +; CHECK: Function Attrs: nofree norecurse noreturn nosync nounwind memory(none) ; CHECK-LABEL: @willreturn_loop( ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -156,7 +156,7 @@ loop: ; Finite loop. Could be willreturn but not detected. ; FIXME define void @willreturn_finite_loop() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone +; CHECK: Function Attrs: nofree norecurse nosync nounwind memory(none) ; CHECK-LABEL: @willreturn_finite_loop( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -183,7 +183,7 @@ end: ; Infinite recursion without mustprogress, will not return. define void @willreturn_recursion() { -; CHECK: Function Attrs: nofree nosync nounwind readnone +; CHECK: Function Attrs: nofree nosync nounwind memory(none) ; CHECK-LABEL: @willreturn_recursion( ; CHECK-NEXT: tail call void @willreturn_recursion() ; CHECK-NEXT: ret void @@ -194,7 +194,7 @@ define void @willreturn_recursion() { ; Irreducible infinite loop, will not return. define void @willreturn_irreducible(i1 %c) { -; CHECK: Function Attrs: nofree norecurse noreturn nosync nounwind readnone +; CHECK: Function Attrs: nofree norecurse noreturn nosync nounwind memory(none) ; CHECK-LABEL: @willreturn_irreducible( ; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: diff --git a/llvm/test/Transforms/FunctionAttrs/writeonly.ll b/llvm/test/Transforms/FunctionAttrs/writeonly.ll index 40ca265990487..0c8ec05223b36 100644 --- a/llvm/test/Transforms/FunctionAttrs/writeonly.ll +++ b/llvm/test/Transforms/FunctionAttrs/writeonly.ll @@ -2,7 +2,7 @@ ; RUN: opt < %s -passes=function-attrs -S | FileCheck %s define void @nouses-argworn-funrn(ptr writeonly %.aaa) { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define {{[^@]+}}@nouses-argworn-funrn ; CHECK-SAME: (ptr nocapture readnone [[DOTAAA:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: nouses-argworn-funrn_entry: @@ -13,7 +13,7 @@ nouses-argworn-funrn_entry: } define void @nouses-argworn-funro(ptr writeonly %.aaa, ptr %.bbb) { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind readonly willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) ; CHECK-LABEL: define {{[^@]+}}@nouses-argworn-funro ; CHECK-SAME: (ptr nocapture readnone [[DOTAAA:%.*]], ptr nocapture readonly [[DOTBBB:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: nouses-argworn-funro_entry: @@ -30,7 +30,7 @@ nouses-argworn-funro_entry: @d-ccc = internal global %_type_of_d-ccc <{ ptr null, i8 1, i8 13, i8 0, i8 -127 }>, align 8 define void @nouses-argworn-funwo(ptr writeonly %.aaa) { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write, argmem: none, inaccessiblemem: none) ; CHECK-LABEL: define {{[^@]+}}@nouses-argworn-funwo ; CHECK-SAME: (ptr nocapture readnone [[DOTAAA:%.*]]) #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: nouses-argworn-funwo_entry: @@ -43,7 +43,7 @@ nouses-argworn-funwo_entry: } define void @test_store(ptr %p) { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@test_store ; CHECK-SAME: (ptr nocapture writeonly [[P:%.*]]) #[[ATTR3:[0-9]+]] { ; CHECK-NEXT: store i8 0, ptr [[P]], align 1 @@ -55,7 +55,7 @@ define void @test_store(ptr %p) { @G = external global ptr define i8 @test_store_capture(ptr %p) { -; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(readwrite, argmem: read, inaccessiblemem: none) ; CHECK-LABEL: define {{[^@]+}}@test_store_capture ; CHECK-SAME: (ptr [[P:%.*]]) #[[ATTR4:[0-9]+]] { ; CHECK-NEXT: store ptr [[P]], ptr @G, align 8 @@ -70,7 +70,7 @@ define i8 @test_store_capture(ptr %p) { } define void @test_addressing(ptr %p) { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind willreturn writeonly +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@test_addressing ; CHECK-SAME: (ptr nocapture writeonly [[P:%.*]]) #[[ATTR3]] { ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[P]], i64 8 @@ -83,7 +83,7 @@ define void @test_addressing(ptr %p) { } define void @test_readwrite(ptr %p) { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nosync nounwind willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@test_readwrite ; CHECK-SAME: (ptr nocapture [[P:%.*]]) #[[ATTR5:[0-9]+]] { ; CHECK-NEXT: [[V:%.*]] = load i8, ptr [[P]], align 1 @@ -96,7 +96,7 @@ define void @test_readwrite(ptr %p) { } define void @test_volatile(ptr %p) { -; CHECK: Function Attrs: inaccessiblemem_or_argmemonly nofree norecurse nounwind +; CHECK: Function Attrs: nofree norecurse nounwind memory(argmem: readwrite, inaccessiblemem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@test_volatile ; CHECK-SAME: (ptr [[P:%.*]]) #[[ATTR6:[0-9]+]] { ; CHECK-NEXT: store volatile i8 0, ptr [[P]], align 1 @@ -107,7 +107,7 @@ define void @test_volatile(ptr %p) { } define void @test_atomicrmw(ptr %p) { -; CHECK: Function Attrs: argmemonly mustprogress nofree norecurse nounwind willreturn +; CHECK: Function Attrs: mustprogress nofree norecurse nounwind willreturn memory(argmem: readwrite) ; CHECK-LABEL: define {{[^@]+}}@test_atomicrmw ; CHECK-SAME: (ptr nocapture [[P:%.*]]) #[[ATTR7:[0-9]+]] { ; CHECK-NEXT: [[TMP1:%.*]] = atomicrmw add ptr [[P]], i8 0 seq_cst, align 1 @@ -134,7 +134,7 @@ declare void @direct2_callee(ptr %p) writeonly ; writeonly w/o nocapture is not enough define void @direct2(ptr %p) { -; CHECK: Function Attrs: writeonly +; CHECK: Function Attrs: memory(write) ; CHECK-LABEL: define {{[^@]+}}@direct2 ; CHECK-SAME: (ptr [[P:%.*]]) #[[ATTR8:[0-9]+]] { ; CHECK-NEXT: call void @direct2_callee(ptr [[P]]) @@ -146,9 +146,9 @@ define void @direct2(ptr %p) { } define void @direct2b(ptr %p) { -; CHECK: Function Attrs: writeonly +; CHECK: Function Attrs: memory(write) ; CHECK-LABEL: define {{[^@]+}}@direct2b -; CHECK-SAME: (ptr nocapture writeonly [[P:%.*]]) #[[ATTR8]] { +; CHECK-SAME: (ptr nocapture [[P:%.*]]) #[[ATTR8]] { ; CHECK-NEXT: call void @direct2_callee(ptr nocapture [[P]]) ; CHECK-NEXT: ret void ; @@ -209,9 +209,9 @@ define void @fptr_test2(ptr %p, ptr %f) { } define void @fptr_test3(ptr %p, ptr %f) { -; CHECK: Function Attrs: writeonly +; CHECK: Function Attrs: memory(write) ; CHECK-LABEL: define {{[^@]+}}@fptr_test3 -; CHECK-SAME: (ptr nocapture writeonly [[P:%.*]], ptr nocapture readonly [[F:%.*]]) #[[ATTR8]] { +; CHECK-SAME: (ptr nocapture [[P:%.*]], ptr nocapture readonly [[F:%.*]]) #[[ATTR8]] { ; CHECK-NEXT: call void [[F]](ptr nocapture [[P]]) #[[ATTR8]] ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/GVN/PRE/load-pre-across-backedge.ll b/llvm/test/Transforms/GVN/PRE/load-pre-across-backedge.ll new file mode 100644 index 0000000000000..fe7ceed54fc58 --- /dev/null +++ b/llvm/test/Transforms/GVN/PRE/load-pre-across-backedge.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -gvn -S < %s | FileCheck %s + +; Check that PRE-LOAD across backedge does not +; result in invalid dominator tree. +declare void @use(i32) + +define void @test1(i1 %c, i32 %arg) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[DOTBB2_CRIT_EDGE:%.*]] +; CHECK: .bb2_crit_edge: +; CHECK-NEXT: [[DOTPRE:%.*]] = shl i32 [[ARG:%.*]], 2 +; CHECK-NEXT: br label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[SHL1:%.*]] = shl i32 [[ARG]], 2 +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[SHL2_PRE_PHI:%.*]] = phi i32 [ [[DOTPRE]], [[DOTBB2_CRIT_EDGE]] ], [ [[SHL3:%.*]], [[BB3]] ] +; CHECK-NEXT: call void @use(i32 [[SHL2_PRE_PHI]]) +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: [[SHL3]] = shl i32 [[ARG]], 2 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr null, i32 [[SHL3]] +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[GEP]], align 4 +; CHECK-NEXT: call void @use(i32 [[V]]) +; CHECK-NEXT: br label [[BB2]] +; + br i1 %c, label %bb1, label %bb2 + +bb1: + %shl1 = shl i32 %arg, 2 + br label %bb3 + +bb2: + %shl2 = shl i32 %arg, 2 + call void @use(i32 %shl2) + br label %bb3 + +bb3: + %shl3 = shl i32 %arg, 2 + %gep = getelementptr i32, ptr null, i32 %shl3 + %v = load i32, ptr %gep, align 4 + call void @use(i32 %v) + br label %bb2 +} diff --git a/llvm/test/Transforms/GVN/PRE/pre-gep-load.ll b/llvm/test/Transforms/GVN/PRE/pre-gep-load.ll index ed3ac43b4beca..e40e6ffc0aa3b 100644 --- a/llvm/test/Transforms/GVN/PRE/pre-gep-load.ll +++ b/llvm/test/Transforms/GVN/PRE/pre-gep-load.ll @@ -95,14 +95,13 @@ define void @test_shortcut_safe(i1 %tst, i32 %p1, i32* %a) { ; CHECK-LABEL: @test_shortcut_safe( ; CHECK-NEXT: br i1 [[TST:%.*]], label [[SEXT1:%.*]], label [[PRE_DEST:%.*]] ; CHECK: pre.dest: -; CHECK-NEXT: [[DOTPRE:%.*]] = sext i32 [[P1:%.*]] to i64 ; CHECK-NEXT: br label [[SEXT_USE:%.*]] ; CHECK: sext1: -; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[P1]] to i64 +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[P1:%.*]] to i64 ; CHECK-NEXT: br label [[SEXT_USE]] ; CHECK: sext.use: -; CHECK-NEXT: [[IDXPROM2_PRE_PHI:%.*]] = phi i64 [ [[IDXPROM]], [[SEXT1]] ], [ [[DOTPRE]], [[PRE_DEST]] ] -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[IDXPROM2_PRE_PHI]] +; CHECK-NEXT: [[IDXPROM2:%.*]] = sext i32 [[P1]] to i64 +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[IDXPROM2]] ; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4 ; CHECK-NEXT: tail call void @g(i32 [[VAL]]) ; CHECK-NEXT: br label [[PRE_DEST]] diff --git a/llvm/test/Transforms/GlobalOpt/ctor-memset.ll b/llvm/test/Transforms/GlobalOpt/ctor-memset.ll index 8923fec97a63d..526076cd9a85b 100644 --- a/llvm/test/Transforms/GlobalOpt/ctor-memset.ll +++ b/llvm/test/Transforms/GlobalOpt/ctor-memset.ll @@ -115,5 +115,5 @@ define internal void @ctor8() { declare void @llvm.memset.p0.i64(ptr, i8, i64, i1) ;. -; CHECK: attributes #[[ATTR0:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn writeonly } +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } ;. diff --git a/llvm/test/Transforms/GlobalOpt/pr54572.ll b/llvm/test/Transforms/GlobalOpt/pr54572.ll index e4f3264b5871e..83640452ed1c4 100644 --- a/llvm/test/Transforms/GlobalOpt/pr54572.ll +++ b/llvm/test/Transforms/GlobalOpt/pr54572.ll @@ -19,5 +19,5 @@ define void @test() { ret void } ;. -; CHECK: attributes #[[ATTR0:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn } +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } ;. diff --git a/llvm/test/Transforms/IndVarSimplify/invalidate-modified-lcssa-phi.ll b/llvm/test/Transforms/IndVarSimplify/invalidate-modified-lcssa-phi.ll index fb748ae4dc494..856fc37620499 100644 --- a/llvm/test/Transforms/IndVarSimplify/invalidate-modified-lcssa-phi.ll +++ b/llvm/test/Transforms/IndVarSimplify/invalidate-modified-lcssa-phi.ll @@ -156,3 +156,55 @@ exit: %lcssa = phi i16 [ %sum.next, %loop ] ret i16 0 } + +define i32 @pr58750(i16 %a, ptr %dst, i1 %c.0) { +; CHECK-LABEL: @pr58750( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP186_NOT:%.*]] = icmp eq i16 [[A:%.*]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP186_NOT]]) +; CHECK-NEXT: br label [[OUTER_HEADER:%.*]] +; CHECK: outer.header: +; CHECK-NEXT: [[P_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LCSSA:%.*]], [[OUTER_LATCH:%.*]] ] +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[P_0]], 0 +; CHECK-NEXT: br label [[INNER:%.*]] +; CHECK: inner: +; CHECK-NEXT: store i16 0, ptr [[DST:%.*]], align 1 +; CHECK-NEXT: br i1 false, label [[INNER]], label [[OUTER_LATCH]] +; CHECK: outer.latch: +; CHECK-NEXT: [[LCSSA]] = phi i32 [ [[XOR]], [[INNER]] ] +; CHECK-NEXT: br i1 [[C_0:%.*]], label [[OUTER_HEADER]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[LCSSA_LCSSA:%.*]] = phi i32 [ [[LCSSA]], [[OUTER_LATCH]] ] +; CHECK-NEXT: ret i32 [[LCSSA_LCSSA]] +; +entry: + %cmp186.not = icmp eq i16 %a, 0 + call void @llvm.assume(i1 %cmp186.not) + br label %outer.header + +outer.header: + %p.0 = phi i32 [ 0, %entry ], [ %lcssa, %outer.latch ] + br label %inner + +inner: + %inner.iv = phi i16 [ 0, %outer.header ], [ %inner.iv.next, %inner ] + %p.1 = phi i32 [ %p.0, %outer.header ], [ %xor, %inner ] + store i16 %inner.iv, ptr %dst, align 1 + %conv = sext i16 %inner.iv to i32 + %xor = xor i32 %p.1, %conv + %inner.iv.next = add nuw i16 %inner.iv, 1 + %c.1 = icmp ult i16 %inner.iv.next, %a + br i1 %c.1, label %inner, label %outer.latch + +outer.latch: + %lcssa = phi i32 [ %xor, %inner ] + br i1 %c.0, label %outer.header, label %exit + +exit: + ret i32 %lcssa +} + +; Function Attrs: inaccessiblememonly nocallback nofree nosync nounwind willreturn +declare void @llvm.assume(i1 noundef) #1 + + diff --git a/llvm/test/Transforms/IndVarSimplify/scev-invalidation.ll b/llvm/test/Transforms/IndVarSimplify/scev-invalidation.ll index 0dac4f3eb13a0..5cecdac7d99f4 100644 --- a/llvm/test/Transforms/IndVarSimplify/scev-invalidation.ll +++ b/llvm/test/Transforms/IndVarSimplify/scev-invalidation.ll @@ -70,3 +70,57 @@ exit: %res = add i32 %c.ext, %or ret i32 %res } + +define i8 @l(i32 %inc, i1 %tobool.not.i) { +; CHECK-LABEL: @l( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[OUTER_HEADER:%.*]] +; CHECK: outer.header: +; CHECK-NEXT: br label [[INNER:%.*]] +; CHECK: inner: +; CHECK-NEXT: [[C_05_I:%.*]] = phi i32 [ [[INC_I:%.*]], [[INNER]] ], [ 0, [[OUTER_HEADER]] ] +; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[C_05_I]], 1 +; CHECK-NEXT: [[CMP_I:%.*]] = icmp ugt i32 [[C_05_I]], 0 +; CHECK-NEXT: [[OR_COND_I:%.*]] = select i1 [[CMP_I]], i1 true, i1 [[TOBOOL_NOT_I:%.*]] +; CHECK-NEXT: br i1 [[OR_COND_I]], label [[OUTER_LATCH:%.*]], label [[INNER]] +; CHECK: outer.latch: +; CHECK-NEXT: [[C_05_I_LCSSA:%.*]] = phi i32 [ [[C_05_I]], [[INNER]] ] +; CHECK-NEXT: [[LCSSA:%.*]] = phi i32 [ 0, [[INNER]] ] +; CHECK-NEXT: [[AND:%.*]] = and i32 1, [[INC:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[AND]] to i8 +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[C_05_I_LCSSA]] to i8 +; CHECK-NEXT: [[TMP2:%.*]] = sub i8 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i8 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[OUTER_HEADER]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: ret i8 0 +; +entry: + br label %outer.header + +outer.header: ; preds = %h.exit, %entry + %outer.iv = phi i16 [ 0, %entry ], [ %outer.iv.next, %outer.latch ] + %and = and i32 1, %inc + %conv = sext i16 %outer.iv to i32 + br label %inner + +inner: ; preds = %while.body.i, %for.cond + %c.05.i = phi i32 [ %inc.i, %inner ], [ 0, %outer.header ] + %i.addr.04.i = phi i32 [ 0, %inner ], [ %conv, %outer.header ] + %inc.i = add nsw i32 %c.05.i, 1 + %cmp.i = icmp sgt i32 %c.05.i, 0 + %or.cond.i = select i1 %cmp.i, i1 true, i1 %tobool.not.i + br i1 %or.cond.i, label %outer.latch, label %inner + +outer.latch: ; preds = %while.body.i + %lcssa = phi i32 [ 0, %inner ] + %0 = trunc i32 %and to i8 + %1 = trunc i32 %c.05.i to i8 + %2 = sub i8 %0, %1 + %tobool.not = icmp eq i8 %2, 0 + %outer.iv.next = add i16 %outer.iv, 1 + br i1 %tobool.not, label %outer.header, label %if.then + +if.then: ; preds = %h.exit + ret i8 0 +} diff --git a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll index 5e795b9baf07a..e5c61c2f3e479 100644 --- a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll +++ b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll @@ -1081,25 +1081,25 @@ declare void @memset_pattern8(i8*, i8*, i64) declare void @memset_pattern16(i8*, i8*, i64) ; CHECK-DAG: attributes [[NOFREE_NOUNWIND_WILLRETURN]] = { mustprogress nofree nounwind willreturn } -; CHECK-DAG: attributes [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] = { mustprogress nofree nounwind willreturn writeonly } +; CHECK-DAG: attributes [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] = { mustprogress nofree nounwind willreturn memory(write) } ; CHECK-DAG: attributes [[NOFREE_NOUNWIND]] = { nofree nounwind } -; CHECK-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCKIND_ALLOCUNINIT_ALLOCSIZE1_FAMILY_MALLOC]] = { inaccessiblememonly mustprogress nofree nounwind willreturn allockind("alloc,uninitialized,aligned") allocsize(1) "alloc-family"="malloc" } -; CHECK-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCKIND_ALLOCZEROED_ALLOCSIZE01_FAMILY_MALLOC]] = { inaccessiblememonly mustprogress nofree nounwind willreturn allockind("alloc,zeroed") allocsize(0,1) "alloc-family"="malloc" } -; CHECK-DAG: attributes [[NOFREE_NOUNWIND_READONLY_WILLRETURN]] = { mustprogress nofree nounwind readonly willreturn } -; CHECK-DAG: attributes [[ARGMEMONLY_NOFREE_NOUNWIND_WILLRETURN]] = { argmemonly mustprogress nofree nounwind willreturn } -; CHECK-DAG: attributes [[NOFREE_NOUNWIND_READONLY]] = { nofree nounwind readonly } -; CHECK-DAG: attributes [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_ALLOCKIND_FREE_FAMILY_MALLOC]] = { inaccessiblemem_or_argmemonly mustprogress nounwind willreturn allockind("free") "alloc-family"="malloc" } +; CHECK-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCKIND_ALLOCUNINIT_ALLOCSIZE1_FAMILY_MALLOC]] = { mustprogress nofree nounwind willreturn allockind("alloc,uninitialized,aligned") allocsize(1) memory(inaccessiblemem: readwrite) "alloc-family"="malloc" } +; CHECK-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCKIND_ALLOCZEROED_ALLOCSIZE01_FAMILY_MALLOC]] = { mustprogress nofree nounwind willreturn allockind("alloc,zeroed") allocsize(0,1) memory(inaccessiblemem: readwrite) "alloc-family"="malloc" } +; CHECK-DAG: attributes [[NOFREE_NOUNWIND_READONLY_WILLRETURN]] = { mustprogress nofree nounwind willreturn memory(read) } +; CHECK-DAG: attributes [[ARGMEMONLY_NOFREE_NOUNWIND_WILLRETURN]] = { mustprogress nofree nounwind willreturn memory(argmem: readwrite) } +; CHECK-DAG: attributes [[NOFREE_NOUNWIND_READONLY]] = { nofree nounwind memory(read) } +; CHECK-DAG: attributes [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_ALLOCKIND_FREE_FAMILY_MALLOC]] = { mustprogress nounwind willreturn allockind("free") memory(argmem: readwrite, inaccessiblemem: readwrite) "alloc-family"="malloc" } ; CHECK-DAG: attributes [[NOFREE_WILLRETURN]] = { mustprogress nofree willreturn } -; CHECK-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCKIND_ALLOCUNINIT_ALLOCSIZE0_FAMILY_MALLOC]] = { inaccessiblememonly mustprogress nofree nounwind willreturn allockind("alloc,uninitialized") allocsize(0) "alloc-family"="malloc" } -; CHECK-DAG: attributes [[ARGMEMONLY_NOFREE_NOUNWIND_READONLY_WILLRETURN]] = { argmemonly mustprogress nofree nounwind readonly willreturn } +; CHECK-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCKIND_ALLOCUNINIT_ALLOCSIZE0_FAMILY_MALLOC]] = { mustprogress nofree nounwind willreturn allockind("alloc,uninitialized") allocsize(0) memory(inaccessiblemem: readwrite) "alloc-family"="malloc" } +; CHECK-DAG: attributes [[ARGMEMONLY_NOFREE_NOUNWIND_READONLY_WILLRETURN]] = { mustprogress nofree nounwind willreturn memory(argmem: read) } ; CHECK-DAG: attributes [[NOFREE]] = { nofree } -; CHECK-DAG: attributes [[ARGMEMONLY_NOFREE_NOUNWIND]] = { argmemonly nofree nounwind } -; CHECK-DAG: attributes [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_ALLOCKIND_REALLOC_ALLOCSIZE1_FAMILY_MALLOC]] = { inaccessiblemem_or_argmemonly mustprogress nounwind willreturn allockind("realloc") allocsize(1) "alloc-family"="malloc" } -; CHECK-DAG: attributes [[INACCESSIBLEMEMORARGONLY_NOFREE_NOUNWIND_WILLRETURN_FAMILY_MALLOC]] = { inaccessiblemem_or_argmemonly mustprogress nofree nounwind willreturn "alloc-family"="malloc" } +; CHECK-DAG: attributes [[ARGMEMONLY_NOFREE_NOUNWIND]] = { nofree nounwind memory(argmem: readwrite) } +; CHECK-DAG: attributes [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_ALLOCKIND_REALLOC_ALLOCSIZE1_FAMILY_MALLOC]] = { mustprogress nounwind willreturn allockind("realloc") allocsize(1) memory(argmem: readwrite, inaccessiblemem: readwrite) "alloc-family"="malloc" } +; CHECK-DAG: attributes [[INACCESSIBLEMEMORARGONLY_NOFREE_NOUNWIND_WILLRETURN_FAMILY_MALLOC]] = { mustprogress nofree nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) "alloc-family"="malloc" } -; CHECK-NVPTX-DAG: attributes [[NOFREE_NOUNWIND_READNONE]] = { nofree nosync nounwind readnone } +; CHECK-NVPTX-DAG: attributes [[NOFREE_NOUNWIND_READNONE]] = { nofree nosync nounwind memory(none) } -; CHECK-AIX-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE0_FAMILY_VEC_MALLOC]] = { inaccessiblememonly mustprogress nofree nounwind willreturn allockind("alloc,uninitialized") allocsize(0) "alloc-family"="vec_malloc" } -; CHECK-AIX-DAG: attributes [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_FAMILY_VEC_MALLOC]] = { inaccessiblemem_or_argmemonly mustprogress nounwind willreturn allockind("free") "alloc-family"="vec_malloc" } -; CHECK-AIX-DAG: attributes [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_ALLOCSIZE_FAMILY_VEC_MALLOC]] = { inaccessiblemem_or_argmemonly mustprogress nounwind willreturn allockind("realloc") allocsize(1) "alloc-family"="vec_malloc" } -; CHECK-AIX-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE01_FAMILY_VEC_MALLOC]] = { inaccessiblememonly mustprogress nofree nounwind willreturn allockind("alloc,zeroed") allocsize(0,1) "alloc-family"="vec_malloc" } +; CHECK-AIX-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE0_FAMILY_VEC_MALLOC]] = { mustprogress nofree nounwind willreturn allockind("alloc,uninitialized") allocsize(0) memory(inaccessiblemem: readwrite) "alloc-family"="vec_malloc" } +; CHECK-AIX-DAG: attributes [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_FAMILY_VEC_MALLOC]] = { mustprogress nounwind willreturn allockind("free") memory(argmem: readwrite, inaccessiblemem: readwrite) "alloc-family"="vec_malloc" } +; CHECK-AIX-DAG: attributes [[INACCESSIBLEMEMORARGMEMONLY_NOUNWIND_WILLRETURN_ALLOCSIZE_FAMILY_VEC_MALLOC]] = { mustprogress nounwind willreturn allockind("realloc") allocsize(1) memory(argmem: readwrite, inaccessiblemem: readwrite) "alloc-family"="vec_malloc" } +; CHECK-AIX-DAG: attributes [[INACCESSIBLEMEMONLY_NOFREE_NOUNWIND_WILLRETURN_ALLOCSIZE01_FAMILY_VEC_MALLOC]] = { mustprogress nofree nounwind willreturn allockind("alloc,zeroed") allocsize(0,1) memory(inaccessiblemem: readwrite) "alloc-family"="vec_malloc" } diff --git a/llvm/test/Transforms/InferFunctionAttrs/norecurse_debug.ll b/llvm/test/Transforms/InferFunctionAttrs/norecurse_debug.ll index 9b90f59d3602b..12a59ea02cc88 100644 --- a/llvm/test/Transforms/InferFunctionAttrs/norecurse_debug.ll +++ b/llvm/test/Transforms/InferFunctionAttrs/norecurse_debug.ll @@ -52,5 +52,5 @@ attributes #1 = { nounwind readnone speculatable } !28 = !DILocation(line: 9, column: 18, scope: !2) !29 = !DILocation(line: 10, column: 1, scope: !2) -; CHECK: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn } +; CHECK: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(readwrite, argmem: write, inaccessiblemem: none) } ; CHECK-NOT: foo.coefficient1 diff --git a/llvm/test/Transforms/InferFunctionAttrs/readonly_and_writeonly.ll b/llvm/test/Transforms/InferFunctionAttrs/readonly_and_writeonly.ll index bf16bc80242a7..c1a5f11cf94ab 100644 --- a/llvm/test/Transforms/InferFunctionAttrs/readonly_and_writeonly.ll +++ b/llvm/test/Transforms/InferFunctionAttrs/readonly_and_writeonly.ll @@ -5,4 +5,4 @@ ; CHECK: declare double @acos(double) [[NOFREE_NOUNWIND_WILLRETURN_READNONE:#[0-9]+]] declare double @acos(double) readonly -; CHECK-DAG: attributes [[NOFREE_NOUNWIND_WILLRETURN_READNONE]] = { mustprogress nofree nosync nounwind readnone willreturn } +; CHECK-DAG: attributes [[NOFREE_NOUNWIND_WILLRETURN_READNONE]] = { mustprogress nofree nosync nounwind willreturn memory(none) } diff --git a/llvm/test/Transforms/Inline/cgscc-update.ll b/llvm/test/Transforms/Inline/cgscc-update.ll index 5b39d5914f564..b5c30360548c1 100644 --- a/llvm/test/Transforms/Inline/cgscc-update.ll +++ b/llvm/test/Transforms/Inline/cgscc-update.ll @@ -9,8 +9,8 @@ ; CHECK: declare void @unknown() declare void @unknown() -; Basic correctness check: this should get annotated as readnone. -; CHECK: Function Attrs: nounwind readnone +; Basic correctness check: this should get annotated as memory(none). +; CHECK: Function Attrs: nounwind memory(none) ; CHECK-NEXT: declare void @readnone() declare void @readnone() readnone nounwind @@ -26,8 +26,8 @@ entry: ret void } -; This function should have had 'readnone' deduced for its SCC. -; CHECK: Function Attrs: nofree noinline nosync nounwind readnone +; This function should have had 'memory(none)' deduced for its SCC. +; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) ; CHECK-NEXT: define void @test1_g() define void @test1_g() noinline { entry: @@ -35,8 +35,8 @@ entry: ret void } -; This function should have had 'readnone' deduced for its SCC. -; CHECK: Function Attrs: nofree noinline nosync nounwind readnone +; This function should have had 'memory(none)' deduced for its SCC. +; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) ; CHECK-NEXT: define void @test1_h() define void @test1_h() noinline { entry: @@ -58,8 +58,8 @@ entry: ret void()* @test2_h } -; This function should have had 'readnone' deduced for its SCC. -; CHECK: Function Attrs: nofree noinline nosync nounwind readnone +; This function should have had 'memory(none)' deduced for its SCC. +; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) ; CHECK-NEXT: define void @test2_g() define void @test2_g() noinline { entry: @@ -68,8 +68,8 @@ entry: ret void } -; This function should have had 'readnone' deduced for its SCC. -; CHECK: Function Attrs: nofree noinline nosync nounwind readnone +; This function should have had 'memory(none)' deduced for its SCC. +; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) ; CHECK-NEXT: define void @test2_h() define void @test2_h() noinline { entry: @@ -151,8 +151,8 @@ exit: ; interesting call graph update for the new call edge. Eventually, we still ; form a new SCC and should use that can deduce precise function attrs. -; This function should have had 'readnone' deduced for its SCC. -; CHECK: Function Attrs: nofree noinline nosync nounwind readnone +; This function should have had 'memory(none)' deduced for its SCC. +; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) ; CHECK-NEXT: define void @test4_f1() define void @test4_f1() noinline { entry: @@ -174,8 +174,8 @@ entry: ret void } -; This function should have had 'readnone' deduced for its SCC. -; CHECK: Function Attrs: nofree noinline nosync nounwind readnone +; This function should have had 'memory(none)' deduced for its SCC. +; CHECK: Function Attrs: nofree noinline nosync nounwind memory(none) ; CHECK-NEXT: define void @test4_h() define void @test4_h() noinline { entry: diff --git a/llvm/test/Transforms/Inline/inline_invoke.ll b/llvm/test/Transforms/Inline/inline_invoke.ll index 59df19dc8a0d5..8dfb170b14804 100644 --- a/llvm/test/Transforms/Inline/inline_invoke.ll +++ b/llvm/test/Transforms/Inline/inline_invoke.ll @@ -343,7 +343,7 @@ terminate: ; CHECK-NEXT: call void @_ZSt9terminatev() ; CHECK: attributes [[NUW]] = { nounwind } -; CHECK: attributes #1 = { nounwind readnone } +; CHECK: attributes #1 = { nounwind memory(none) } ; CHECK: attributes #2 = { ssp uwtable } -; CHECK: attributes #3 = { argmemonly nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #3 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } ; CHECK: attributes #4 = { noreturn nounwind } diff --git a/llvm/test/Transforms/InstCombine/AArch64/2012-04-23-Neon-Intrinsics.ll b/llvm/test/Transforms/InstCombine/AArch64/2012-04-23-Neon-Intrinsics.ll index 2a36dadaba927..56491c5efa4f4 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/2012-04-23-Neon-Intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/2012-04-23-Neon-Intrinsics.ll @@ -65,6 +65,6 @@ entry: declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone -; CHECK: attributes #0 = { nounwind readnone ssp } -; CHECK: attributes #1 = { nocallback nofree nosync nounwind readnone willreturn } +; CHECK: attributes #0 = { nounwind ssp memory(none) } +; CHECK: attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) } ; CHECK: attributes [[NUW]] = { nounwind } diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll index c29e0e0a38f65..c6ef477a4341d 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll @@ -32,29 +32,6 @@ define i1 @ptest_any2( %a) #0 { ret i1 %out } -; Rewrite PTEST_ANY(X=OP(PG,...), X) -> PTEST_ANY(PG, X)). -define i1 @ptest_any_brkb_z( %pg, %a) { -; CHECK-LABEL: @ptest_any_brkb_z( -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.brkb.z.nxv16i1( [[PG:%.*]], [[A:%.*]]) -; CHECK-NEXT: [[OUT:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) -; CHECK-NEXT: ret i1 [[OUT]] -; - %1 = tail call @llvm.aarch64.sve.brkb.z.nxv16i1( %pg, %a) - %out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) - ret i1 %out -} - -define i1 @ptest_any_rdffr_z( %pg) { -; CHECK-LABEL: @ptest_any_rdffr_z( -; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.rdffr.z( [[PG:%.*]]) -; CHECK-NEXT: [[OUT:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) -; CHECK-NEXT: ret i1 [[OUT]] -; - %1 = tail call @llvm.aarch64.sve.rdffr.z( %pg) - %out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) - ret i1 %out -} - define i1 @ptest_first( %a) #0 { ; CHECK-LABEL: @ptest_first( ; CHECK-NEXT: [[MASK:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 0) @@ -91,6 +68,140 @@ define i1 @ptest_last( %a) #0 { ret i1 %out } +; Rewrite PTEST_ANY(X=OP(PG,...), X) -> PTEST_ANY(PG, X)). + +define i1 @ptest_any_brka_z( %pg, %a) { +; CHECK-LABEL: @ptest_any_brka_z( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.brka.z.nxv16i1( [[PG:%.*]], [[A:%.*]]) +; CHECK-NEXT: [[OUT:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) +; CHECK-NEXT: ret i1 [[OUT]] +; + %1 = tail call @llvm.aarch64.sve.brka.z.nxv16i1( %pg, %a) + %out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) + ret i1 %out +} + +define i1 @ptest_any_brkpa_z( %pg, %a, %b) { +; CHECK-LABEL: @ptest_any_brkpa_z( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.brkpa.z.nxv16i1( [[PG:%.*]], [[A:%.*]], [[B:%.*]]) +; CHECK-NEXT: [[OUT:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) +; CHECK-NEXT: ret i1 [[OUT]] +; + %1 = tail call @llvm.aarch64.sve.brkpa.z.nxv16i1( %pg, %a, %b) + %out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) + ret i1 %out +} + +define i1 @ptest_any_brkb_z( %pg, %a) { +; CHECK-LABEL: @ptest_any_brkb_z( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.brkb.z.nxv16i1( [[PG:%.*]], [[A:%.*]]) +; CHECK-NEXT: [[OUT:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) +; CHECK-NEXT: ret i1 [[OUT]] +; + %1 = tail call @llvm.aarch64.sve.brkb.z.nxv16i1( %pg, %a) + %out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) + ret i1 %out +} + +define i1 @ptest_any_brkpb_z( %pg, %a, %b) { +; CHECK-LABEL: @ptest_any_brkpb_z( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.brkpb.z.nxv16i1( [[PG:%.*]], [[A:%.*]], [[B:%.*]]) +; CHECK-NEXT: [[OUT:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) +; CHECK-NEXT: ret i1 [[OUT]] +; + %1 = tail call @llvm.aarch64.sve.brkpb.z.nxv16i1( %pg, %a, %b) + %out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) + ret i1 %out +} + +define i1 @ptest_any_rdffr_z( %pg) { +; CHECK-LABEL: @ptest_any_rdffr_z( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.rdffr.z( [[PG:%.*]]) +; CHECK-NEXT: [[OUT:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) +; CHECK-NEXT: ret i1 [[OUT]] +; + %1 = tail call @llvm.aarch64.sve.rdffr.z( %pg) + %out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) + ret i1 %out +} + +define i1 @ptest_any_and_z( %pg, %a, %b) { +; CHECK-LABEL: @ptest_any_and_z( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.and.z.nxv16i1( [[PG:%.*]], [[A:%.*]], [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) +; CHECK-NEXT: ret i1 [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.and.z.nxv16i1( %pg, %a, %b) + %2 = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) + ret i1 %2 +} + +define i1 @ptest_any_bic_z( %pg, %a, %b) { +; CHECK-LABEL: @ptest_any_bic_z( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.bic.z.nxv16i1( [[PG:%.*]], [[A:%.*]], [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) +; CHECK-NEXT: ret i1 [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.bic.z.nxv16i1( %pg, %a, %b) + %2 = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) + ret i1 %2 +} + +define i1 @ptest_any_eor_z( %pg, %a, %b) { +; CHECK-LABEL: @ptest_any_eor_z( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.eor.z.nxv16i1( [[PG:%.*]], [[A:%.*]], [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) +; CHECK-NEXT: ret i1 [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.eor.z.nxv16i1( %pg, %a, %b) + %2 = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) + ret i1 %2 +} + +define i1 @ptest_any_nand_z( %pg, %a, %b) { +; CHECK-LABEL: @ptest_any_nand_z( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.nand.z.nxv16i1( [[PG:%.*]], [[A:%.*]], [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) +; CHECK-NEXT: ret i1 [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.nand.z.nxv16i1( %pg, %a, %b) + %2 = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) + ret i1 %2 +} + +define i1 @ptest_any_nor_z( %pg, %a, %b) { +; CHECK-LABEL: @ptest_any_nor_z( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.nor.z.nxv16i1( [[PG:%.*]], [[A:%.*]], [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) +; CHECK-NEXT: ret i1 [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.nor.z.nxv16i1( %pg, %a, %b) + %2 = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) + ret i1 %2 +} + +define i1 @ptest_any_orn_z( %pg, %a, %b) { +; CHECK-LABEL: @ptest_any_orn_z( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.orn.z.nxv16i1( [[PG:%.*]], [[A:%.*]], [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) +; CHECK-NEXT: ret i1 [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.orn.z.nxv16i1( %pg, %a, %b) + %2 = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) + ret i1 %2 +} + +define i1 @ptest_any_orr_z( %pg, %a, %b) { +; CHECK-LABEL: @ptest_any_orr_z( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.orr.z.nxv16i1( [[PG:%.*]], [[A:%.*]], [[B:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( [[PG]], [[TMP1]]) +; CHECK-NEXT: ret i1 [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.orr.z.nxv16i1( %pg, %a, %b) + %2 = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %1) + ret i1 %2 +} + declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) declare @llvm.aarch64.sve.ptrue.nxv8i1(i32) declare @llvm.aarch64.sve.ptrue.nxv4i1(i32) @@ -104,7 +215,18 @@ declare @llvm.aarch64.sve.convert.to.svbool.nxv8i1( @llvm.aarch64.sve.convert.to.svbool.nxv4i1() declare @llvm.aarch64.sve.convert.to.svbool.nxv2i1() +declare @llvm.aarch64.sve.brka.z.nxv16i1(, ) declare @llvm.aarch64.sve.brkb.z.nxv16i1(, ) +declare @llvm.aarch64.sve.brkpa.z.nxv16i1(, , ) +declare @llvm.aarch64.sve.brkpb.z.nxv16i1(, , ) declare @llvm.aarch64.sve.rdffr.z() +declare @llvm.aarch64.sve.and.z.nxv16i1(, , ) +declare @llvm.aarch64.sve.bic.z.nxv16i1(, , ) +declare @llvm.aarch64.sve.eor.z.nxv16i1(, , ) +declare @llvm.aarch64.sve.nand.z.nxv16i1(, , ) +declare @llvm.aarch64.sve.nor.z.nxv16i1(, , ) +declare @llvm.aarch64.sve.orn.z.nxv16i1(, , ) +declare @llvm.aarch64.sve.orr.z.nxv16i1(, , ) + attributes #0 = { "target-features"="+sve" } diff --git a/llvm/test/Transforms/InstCombine/fcmp-denormals-are-zero.ll b/llvm/test/Transforms/InstCombine/fcmp-denormals-are-zero.ll new file mode 100644 index 0000000000000..ef9b6f7ce4396 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/fcmp-denormals-are-zero.ll @@ -0,0 +1,354 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -instcombine %s | FileCheck %s + +@var = external global i32, align 4 + +; fcmp olt fabs(x), smallest_normalized_number -> fcmp oeq x, 0.0 +; https://alive2.llvm.org/ce/z/fib8cf +define void @denormal_input_preserve_sign_fcmp_olt_smallest_normalized(float %f32, double %f64, half %f16) #0 { +; CHECK-LABEL: @denormal_input_preserve_sign_fcmp_olt_smallest_normalized( +; CHECK-NEXT: [[CMPF32:%.*]] = fcmp oeq float [[F32:%.*]], 0.000000e+00 +; CHECK-NEXT: store volatile i1 [[CMPF32]], ptr @var, align 4 +; CHECK-NEXT: [[CMPF64:%.*]] = fcmp oeq double [[F64:%.*]], 0.000000e+00 +; CHECK-NEXT: store volatile i1 [[CMPF64]], ptr @var, align 4 +; CHECK-NEXT: [[CMPF16:%.*]] = fcmp oeq half [[F16:%.*]], 0xH0000 +; CHECK-NEXT: store volatile i1 [[CMPF16]], ptr @var, align 4 +; CHECK-NEXT: [[CMPF32_FLAGS:%.*]] = fcmp oeq float [[F32]], 0.000000e+00 +; CHECK-NEXT: store volatile i1 [[CMPF32_FLAGS]], ptr @var, align 4 +; CHECK-NEXT: ret void +; + %f32.fabs = call float @llvm.fabs.f32(float %f32) + %cmpf32 = fcmp olt float %f32.fabs, 0x3810000000000000 + store volatile i1 %cmpf32, ptr @var + + %f64.fabs = call double @llvm.fabs.f64(double %f64) + %cmpf64 = fcmp olt double %f64.fabs, 0x10000000000000 + store volatile i1 %cmpf64, ptr @var + + %f16.fabs = call half @llvm.fabs.f16(half %f16) + %cmpf16 = fcmp olt half %f16.fabs, 0xH0400 + store volatile i1 %cmpf16, ptr @var + + %f32.fabs.flags = call nsz nnan float @llvm.fabs.f32(float %f32) + %cmpf32.flags = fcmp olt float %f32.fabs.flags, 0x3810000000000000 + store volatile i1 %cmpf32.flags, ptr @var + + ret void +} + +; fcmp uge fabs(x), smallest_normalized_number -> fcmp une x, 0.0 +; https://alive2.llvm.org/ce/z/xmqBXx +define void @denormal_input_preserve_sign_fcmp_uge_smallest_normalized(float %f32, double %f64, half %f16) #0 { +; CHECK-LABEL: @denormal_input_preserve_sign_fcmp_uge_smallest_normalized( +; CHECK-NEXT: [[CMPF32:%.*]] = fcmp une float [[F32:%.*]], 0.000000e+00 +; CHECK-NEXT: store volatile i1 [[CMPF32]], ptr @var, align 4 +; CHECK-NEXT: [[CMPF64:%.*]] = fcmp une double [[F64:%.*]], 0.000000e+00 +; CHECK-NEXT: store volatile i1 [[CMPF64]], ptr @var, align 4 +; CHECK-NEXT: [[CMPF16:%.*]] = fcmp une half [[F16:%.*]], 0xH0000 +; CHECK-NEXT: store volatile i1 [[CMPF16]], ptr @var, align 4 +; CHECK-NEXT: ret void +; + %f32.fabs = call float @llvm.fabs.f32(float %f32) + %cmpf32 = fcmp uge float %f32.fabs, 0x3810000000000000 + store volatile i1 %cmpf32, ptr @var + + %f64.fabs = call double @llvm.fabs.f64(double %f64) + %cmpf64 = fcmp uge double %f64.fabs, 0x10000000000000 + store volatile i1 %cmpf64, ptr @var + + %f16.fabs = call half @llvm.fabs.f16(half %f16) + %cmpf16 = fcmp uge half %f16.fabs, 0xH0400 + store volatile i1 %cmpf16, ptr @var + ret void +} + +; fcmp oge fabs(x), smallest_normalized_number -> fcmp one x, 0.0 +; https://alive2.llvm.org/ce/z/ZucNzF +define void @denormal_input_preserve_sign_fcmp_oge_smallest_normalized(float %f32, double %f64, half %f16) #0 { +; CHECK-LABEL: @denormal_input_preserve_sign_fcmp_oge_smallest_normalized( +; CHECK-NEXT: [[CMPF32:%.*]] = fcmp one float [[F32:%.*]], 0.000000e+00 +; CHECK-NEXT: store volatile i1 [[CMPF32]], ptr @var, align 4 +; CHECK-NEXT: [[CMPF64:%.*]] = fcmp one double [[F64:%.*]], 0.000000e+00 +; CHECK-NEXT: store volatile i1 [[CMPF64]], ptr @var, align 4 +; CHECK-NEXT: [[CMPF16:%.*]] = fcmp one half [[F16:%.*]], 0xH0000 +; CHECK-NEXT: store volatile i1 [[CMPF16]], ptr @var, align 4 +; CHECK-NEXT: ret void +; + %f32.fabs = call float @llvm.fabs.f32(float %f32) + %cmpf32 = fcmp oge float %f32.fabs, 0x3810000000000000 + store volatile i1 %cmpf32, ptr @var + + %f64.fabs = call double @llvm.fabs.f64(double %f64) + %cmpf64 = fcmp oge double %f64.fabs, 0x10000000000000 + store volatile i1 %cmpf64, ptr @var + + %f16.fabs = call half @llvm.fabs.f16(half %f16) + %cmpf16 = fcmp oge half %f16.fabs, 0xH0400 + store volatile i1 %cmpf16, ptr @var + ret void +} + +; fcmp ult fabs(x), smallest_normalized_number -> fcmp ueq x, 0.0 +; https://alive2.llvm.org/ce/z/csAhZ2 +define void @denormal_input_preserve_sign_fcmp_ult_smallest_normalized(float %f32, double %f64, half %f16) #0 { +; CHECK-LABEL: @denormal_input_preserve_sign_fcmp_ult_smallest_normalized( +; CHECK-NEXT: [[CMPF32:%.*]] = fcmp ueq float [[F32:%.*]], 0.000000e+00 +; CHECK-NEXT: store volatile i1 [[CMPF32]], ptr @var, align 4 +; CHECK-NEXT: [[CMPF64:%.*]] = fcmp ueq double [[F64:%.*]], 0.000000e+00 +; CHECK-NEXT: store volatile i1 [[CMPF64]], ptr @var, align 4 +; CHECK-NEXT: [[CMPF16:%.*]] = fcmp ueq half [[F16:%.*]], 0xH0000 +; CHECK-NEXT: store volatile i1 [[CMPF16]], ptr @var, align 4 +; CHECK-NEXT: ret void +; + %f32.fabs = call float @llvm.fabs.f32(float %f32) + %cmpf32 = fcmp ult float %f32.fabs, 0x3810000000000000 + store volatile i1 %cmpf32, ptr @var + + %f64.fabs = call double @llvm.fabs.f64(double %f64) + %cmpf64 = fcmp ult double %f64.fabs, 0x10000000000000 + store volatile i1 %cmpf64, ptr @var + + %f16.fabs = call half @llvm.fabs.f16(half %f16) + %cmpf16 = fcmp ult half %f16.fabs, 0xH0400 + store volatile i1 %cmpf16, ptr @var + ret void +} + +define void @denormal_input_preserve_sign_vector_fcmp_olt_smallest_normalized(<2 x float> %f32, <2 x double> %f64, <2 x half> %f16) #0 { +; CHECK-LABEL: @denormal_input_preserve_sign_vector_fcmp_olt_smallest_normalized( +; CHECK-NEXT: [[CMPF32:%.*]] = fcmp oeq <2 x float> [[F32:%.*]], zeroinitializer +; CHECK-NEXT: store volatile <2 x i1> [[CMPF32]], ptr @var, align 4 +; CHECK-NEXT: [[CMPF64:%.*]] = fcmp oeq <2 x double> [[F64:%.*]], zeroinitializer +; CHECK-NEXT: store volatile <2 x i1> [[CMPF64]], ptr @var, align 4 +; CHECK-NEXT: [[CMPF16:%.*]] = fcmp oeq <2 x half> [[F16:%.*]], zeroinitializer +; CHECK-NEXT: store volatile <2 x i1> [[CMPF16]], ptr @var, align 4 +; CHECK-NEXT: ret void +; + %f32.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %f32) + %cmpf32 = fcmp olt <2 x float> %f32.fabs, + store volatile <2 x i1> %cmpf32, ptr @var + + %f64.fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %f64) + %cmpf64 = fcmp olt <2 x double> %f64.fabs, + store volatile <2 x i1> %cmpf64, ptr @var + + %f16.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %f16) + %cmpf16 = fcmp olt <2 x half> %f16.fabs, + store volatile <2 x i1> %cmpf16, ptr @var + ret void +} + +define void @denormal_input_preserve_sign_vector_fcmp_uge_smallest_normalized(<2 x float> %f32, <2 x double> %f64, <2 x half> %f16) #0 { +; CHECK-LABEL: @denormal_input_preserve_sign_vector_fcmp_uge_smallest_normalized( +; CHECK-NEXT: [[CMPF32:%.*]] = fcmp une <2 x float> [[F32:%.*]], zeroinitializer +; CHECK-NEXT: store volatile <2 x i1> [[CMPF32]], ptr @var, align 4 +; CHECK-NEXT: [[CMPF64:%.*]] = fcmp une <2 x double> [[F64:%.*]], zeroinitializer +; CHECK-NEXT: store volatile <2 x i1> [[CMPF64]], ptr @var, align 4 +; CHECK-NEXT: [[CMPF16:%.*]] = fcmp une <2 x half> [[F16:%.*]], zeroinitializer +; CHECK-NEXT: store volatile <2 x i1> [[CMPF16]], ptr @var, align 4 +; CHECK-NEXT: ret void +; + %f32.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %f32) + %cmpf32 = fcmp uge <2 x float> %f32.fabs, + store volatile <2 x i1> %cmpf32, ptr @var + + %f64.fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %f64) + %cmpf64 = fcmp uge <2 x double> %f64.fabs, + store volatile <2 x i1> %cmpf64, ptr @var + + %f16.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %f16) + %cmpf16 = fcmp uge <2 x half> %f16.fabs, + store volatile <2 x i1> %cmpf16, ptr @var + ret void +} + +define void @denormal_input_preserve_sign_vector_fcmp_oge_smallest_normalized(<2 x float> %f32, <2 x double> %f64, <2 x half> %f16) #0 { +; CHECK-LABEL: @denormal_input_preserve_sign_vector_fcmp_oge_smallest_normalized( +; CHECK-NEXT: [[CMPF32:%.*]] = fcmp one <2 x float> [[F32:%.*]], zeroinitializer +; CHECK-NEXT: store volatile <2 x i1> [[CMPF32]], ptr @var, align 4 +; CHECK-NEXT: [[CMPF64:%.*]] = fcmp one <2 x double> [[F64:%.*]], zeroinitializer +; CHECK-NEXT: store volatile <2 x i1> [[CMPF64]], ptr @var, align 4 +; CHECK-NEXT: [[CMPF16:%.*]] = fcmp one <2 x half> [[F16:%.*]], zeroinitializer +; CHECK-NEXT: store volatile <2 x i1> [[CMPF16]], ptr @var, align 4 +; CHECK-NEXT: ret void +; + %f32.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %f32) + %cmpf32 = fcmp oge <2 x float> %f32.fabs, + store volatile <2 x i1> %cmpf32, ptr @var + + %f64.fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %f64) + %cmpf64 = fcmp oge <2 x double> %f64.fabs, + store volatile <2 x i1> %cmpf64, ptr @var + + %f16.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %f16) + %cmpf16 = fcmp oge <2 x half> %f16.fabs, + store volatile <2 x i1> %cmpf16, ptr @var + ret void +} + +define void @denormal_input_preserve_sign_vector_fcmp_ult_smallest_normalized(<2 x float> %f32, <2 x double> %f64, <2 x half> %f16) #0 { +; CHECK-LABEL: @denormal_input_preserve_sign_vector_fcmp_ult_smallest_normalized( +; CHECK-NEXT: [[CMPF32:%.*]] = fcmp ueq <2 x float> [[F32:%.*]], zeroinitializer +; CHECK-NEXT: store volatile <2 x i1> [[CMPF32]], ptr @var, align 4 +; CHECK-NEXT: [[CMPF64:%.*]] = fcmp ueq <2 x double> [[F64:%.*]], zeroinitializer +; CHECK-NEXT: store volatile <2 x i1> [[CMPF64]], ptr @var, align 4 +; CHECK-NEXT: [[CMPF16:%.*]] = fcmp ueq <2 x half> [[F16:%.*]], zeroinitializer +; CHECK-NEXT: store volatile <2 x i1> [[CMPF16]], ptr @var, align 4 +; CHECK-NEXT: ret void +; + %f32.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %f32) + %cmpf32 = fcmp ult <2 x float> %f32.fabs, + store volatile <2 x i1> %cmpf32, ptr @var + + %f64.fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %f64) + %cmpf64 = fcmp ult <2 x double> %f64.fabs, + store volatile <2 x i1> %cmpf64, ptr @var + + %f16.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %f16) + %cmpf16 = fcmp ult <2 x half> %f16.fabs, + store volatile <2 x i1> %cmpf16, ptr @var + ret void +} + +; fcmp olt fabs(x), smallest_normalized_number -> fcmp oeq x, 0.0 +; https://alive2.llvm.org/ce/z/mpduXS +define void @denormal_input_positive_zero_fcmp_olt_smallest_normalized(float %f32, double %f64, half %f16) #1 { +; CHECK-LABEL: @denormal_input_positive_zero_fcmp_olt_smallest_normalized( +; CHECK-NEXT: [[CMPF32:%.*]] = fcmp oeq float [[F32:%.*]], 0.000000e+00 +; CHECK-NEXT: store volatile i1 [[CMPF32]], ptr @var, align 4 +; CHECK-NEXT: [[CMPF64:%.*]] = fcmp oeq double [[F64:%.*]], 0.000000e+00 +; CHECK-NEXT: store volatile i1 [[CMPF64]], ptr @var, align 4 +; CHECK-NEXT: [[CMPF16:%.*]] = fcmp oeq half [[F16:%.*]], 0xH0000 +; CHECK-NEXT: store volatile i1 [[CMPF16]], ptr @var, align 4 +; CHECK-NEXT: ret void +; + %f32.fabs = call float @llvm.fabs.f32(float %f32) + %cmpf32 = fcmp olt float %f32.fabs, 0x3810000000000000 + store volatile i1 %cmpf32, ptr @var + + %f64.fabs = call double @llvm.fabs.f64(double %f64) + %cmpf64 = fcmp olt double %f64.fabs, 0x10000000000000 + store volatile i1 %cmpf64, ptr @var + + %f16.fabs = call half @llvm.fabs.f16(half %f16) + %cmpf16 = fcmp olt half %f16.fabs, 0xH0400 + store volatile i1 %cmpf16, ptr @var + ret void +} + +; Should not fold with IEEE inputs. +define void @denormal_input_ieee(float %f32, double %f64, half %f16) #2 { +; CHECK-LABEL: @denormal_input_ieee( +; CHECK-NEXT: [[F32_FABS:%.*]] = call float @llvm.fabs.f32(float [[F32:%.*]]) +; CHECK-NEXT: [[CMPF32:%.*]] = fcmp olt float [[F32_FABS]], 0x3810000000000000 +; CHECK-NEXT: store volatile i1 [[CMPF32]], ptr @var, align 4 +; CHECK-NEXT: [[F64_FABS:%.*]] = call double @llvm.fabs.f64(double [[F64:%.*]]) +; CHECK-NEXT: [[CMPF64:%.*]] = fcmp olt double [[F64_FABS]], 0x10000000000000 +; CHECK-NEXT: store volatile i1 [[CMPF64]], ptr @var, align 4 +; CHECK-NEXT: [[F16_FABS:%.*]] = call half @llvm.fabs.f16(half [[F16:%.*]]) +; CHECK-NEXT: [[CMPF16:%.*]] = fcmp olt half [[F16_FABS]], 0xH0400 +; CHECK-NEXT: store volatile i1 [[CMPF16]], ptr @var, align 4 +; CHECK-NEXT: ret void +; + %f32.fabs = call float @llvm.fabs.f32(float %f32) + %cmpf32 = fcmp olt float %f32.fabs, 0x3810000000000000 + store volatile i1 %cmpf32, ptr @var + + %f64.fabs = call double @llvm.fabs.f64(double %f64) + %cmpf64 = fcmp olt double %f64.fabs, 0x10000000000000 + store volatile i1 %cmpf64, ptr @var + + %f16.fabs = call half @llvm.fabs.f16(half %f16) + %cmpf16 = fcmp olt half %f16.fabs, 0xH0400 + store volatile i1 %cmpf16, ptr @var + ret void +} + +; Only f32 case should fold. +define void @denormal_input_preserve_sign_f32_only(float %f32, double %f64, half %f16) #3 { +; CHECK-LABEL: @denormal_input_preserve_sign_f32_only( +; CHECK-NEXT: [[CMPF32:%.*]] = fcmp oeq float [[F32:%.*]], 0.000000e+00 +; CHECK-NEXT: store volatile i1 [[CMPF32]], ptr @var, align 4 +; CHECK-NEXT: [[F64_FABS:%.*]] = call double @llvm.fabs.f64(double [[F64:%.*]]) +; CHECK-NEXT: [[CMPF64:%.*]] = fcmp olt double [[F64_FABS]], 0x10000000000000 +; CHECK-NEXT: store volatile i1 [[CMPF64]], ptr @var, align 4 +; CHECK-NEXT: [[F16_FABS:%.*]] = call half @llvm.fabs.f16(half [[F16:%.*]]) +; CHECK-NEXT: [[CMPF16:%.*]] = fcmp olt half [[F16_FABS]], 0xH0400 +; CHECK-NEXT: store volatile i1 [[CMPF16]], ptr @var, align 4 +; CHECK-NEXT: ret void +; + %f32.fabs = call float @llvm.fabs.f32(float %f32) + %cmpf32 = fcmp olt float %f32.fabs, 0x3810000000000000 + store volatile i1 %cmpf32, ptr @var + + %f64.fabs = call double @llvm.fabs.f64(double %f64) + %cmpf64 = fcmp olt double %f64.fabs, 0x10000000000000 + store volatile i1 %cmpf64, ptr @var + + %f16.fabs = call half @llvm.fabs.f16(half %f16) + %cmpf16 = fcmp olt half %f16.fabs, 0xH0400 + store volatile i1 %cmpf16, ptr @var + ret void +} + +define void @wrong_fcmp_type_ole(float %f32, double %f64, half %f16) #0 { +; CHECK-LABEL: @wrong_fcmp_type_ole( +; CHECK-NEXT: [[F32_FABS:%.*]] = call float @llvm.fabs.f32(float [[F32:%.*]]) +; CHECK-NEXT: [[CMPF32:%.*]] = fcmp ole float [[F32_FABS]], 0x3810000000000000 +; CHECK-NEXT: store volatile i1 [[CMPF32]], ptr @var, align 4 +; CHECK-NEXT: [[F64_FABS:%.*]] = call double @llvm.fabs.f64(double [[F64:%.*]]) +; CHECK-NEXT: [[CMPF64:%.*]] = fcmp ole double [[F64_FABS]], 0x10000000000000 +; CHECK-NEXT: store volatile i1 [[CMPF64]], ptr @var, align 4 +; CHECK-NEXT: [[F16_FABS:%.*]] = call half @llvm.fabs.f16(half [[F16:%.*]]) +; CHECK-NEXT: [[CMPF16:%.*]] = fcmp ole half [[F16_FABS]], 0xH0400 +; CHECK-NEXT: store volatile i1 [[CMPF16]], ptr @var, align 4 +; CHECK-NEXT: ret void +; + %f32.fabs = call float @llvm.fabs.f32(float %f32) + %cmpf32 = fcmp ole float %f32.fabs, 0x3810000000000000 + store volatile i1 %cmpf32, ptr @var + + %f64.fabs = call double @llvm.fabs.f64(double %f64) + %cmpf64 = fcmp ole double %f64.fabs, 0x10000000000000 + store volatile i1 %cmpf64, ptr @var + + %f16.fabs = call half @llvm.fabs.f16(half %f16) + %cmpf16 = fcmp ole half %f16.fabs, 0xH0400 + store volatile i1 %cmpf16, ptr @var + ret void +} + +define void @missing_fabs(float %f32, double %f64, half %f16) #0 { +; CHECK-LABEL: @missing_fabs( +; CHECK-NEXT: [[CMPF32:%.*]] = fcmp olt float [[F32:%.*]], 0x3810000000000000 +; CHECK-NEXT: store volatile i1 [[CMPF32]], ptr @var, align 4 +; CHECK-NEXT: [[CMPF64:%.*]] = fcmp olt double [[F64:%.*]], 0x10000000000000 +; CHECK-NEXT: store volatile i1 [[CMPF64]], ptr @var, align 4 +; CHECK-NEXT: [[CMPF16:%.*]] = fcmp olt half [[F16:%.*]], 0xH0400 +; CHECK-NEXT: store volatile i1 [[CMPF16]], ptr @var, align 4 +; CHECK-NEXT: ret void +; + %cmpf32 = fcmp olt float %f32, 0x3810000000000000 + store volatile i1 %cmpf32, ptr @var + + %cmpf64 = fcmp olt double %f64, 0x10000000000000 + store volatile i1 %cmpf64, ptr @var + + %cmpf16 = fcmp olt half %f16, 0xH0400 + store volatile i1 %cmpf16, ptr @var + ret void +} + +declare float @llvm.fabs.f32(float) +declare <2 x float> @llvm.fabs.v2f32(<2 x float>) + +declare half @llvm.fabs.f16(half) +declare <2 x half> @llvm.fabs.v2f16(<2 x half>) + +declare double @llvm.fabs.f64(double) +declare <2 x double> @llvm.fabs.v2f64(<2 x double>) + +attributes #0 = { "denormal-fp-math"="ieee,preserve-sign" } +attributes #1 = { "denormal-fp-math"="ieee,positive-zero" } +attributes #2 = { "denormal-fp-math"="ieee,iee" } +attributes #3 = { "denormal-fp-math-f32"="ieee,preserve-sign" } diff --git a/llvm/test/Transforms/InstCombine/fdiv.ll b/llvm/test/Transforms/InstCombine/fdiv.ll index d0c0b66b813ad..dd49f8bca91da 100644 --- a/llvm/test/Transforms/InstCombine/fdiv.ll +++ b/llvm/test/Transforms/InstCombine/fdiv.ll @@ -937,3 +937,58 @@ define <2 x half> @powi_recip(<2 x half> %x, i32 %y) { %r = fdiv reassoc arcp nnan ninf <2 x half> , %p ret <2 x half> %r } + +define float @fdiv_zero_f32(float %x) { +; CHECK-LABEL: @fdiv_zero_f32( +; CHECK-NEXT: [[FDIV:%.*]] = fdiv float [[X:%.*]], 0.000000e+00 +; CHECK-NEXT: ret float [[FDIV]] +; + %fdiv = fdiv float %x, 0.0 + ret float %fdiv +} + +; https://alive2.llvm.org/ce/z/gLBFKB +define float @fdiv_nnan_zero_f32(float %x) { +; CHECK-LABEL: @fdiv_nnan_zero_f32( +; CHECK-NEXT: [[FDIV:%.*]] = call nnan float @llvm.copysign.f32(float 0x7FF0000000000000, float [[X:%.*]]) +; CHECK-NEXT: ret float [[FDIV]] +; + %fdiv = fdiv nnan float %x, 0.0 + ret float %fdiv +} + +define <2 x float> @fdiv_nnan_zero_v2f32(<2 x float> %x) { +; CHECK-LABEL: @fdiv_nnan_zero_v2f32( +; CHECK-NEXT: [[FDIV:%.*]] = call nnan <2 x float> @llvm.copysign.v2f32(<2 x float> , <2 x float> [[X:%.*]]) +; CHECK-NEXT: ret <2 x float> [[FDIV]] +; + %fdiv = fdiv nnan <2 x float> %x, zeroinitializer + ret <2 x float> %fdiv +} + +define float @fdiv_nnan_zero_f32_fmf(float %x) { +; CHECK-LABEL: @fdiv_nnan_zero_f32_fmf( +; CHECK-NEXT: [[FDIV:%.*]] = call nnan nsz float @llvm.copysign.f32(float 0x7FF0000000000000, float [[X:%.*]]) +; CHECK-NEXT: ret float [[FDIV]] +; + %fdiv = fdiv nnan nsz float %x, 0.0 + ret float %fdiv +} + +define <2 x float> @fdiv_nnan_zero_v2f32_fmf(<2 x float> %x) { +; CHECK-LABEL: @fdiv_nnan_zero_v2f32_fmf( +; CHECK-NEXT: [[FDIV:%.*]] = call nnan nsz <2 x float> @llvm.copysign.v2f32(<2 x float> , <2 x float> [[X:%.*]]) +; CHECK-NEXT: ret <2 x float> [[FDIV]] +; + %fdiv = fdiv nnan nsz <2 x float> %x, zeroinitializer + ret <2 x float> %fdiv +} + +define float @fdiv_nnan_neg_zero_f32(float %x) { +; CHECK-LABEL: @fdiv_nnan_neg_zero_f32( +; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan float [[X:%.*]], -0.000000e+00 +; CHECK-NEXT: ret float [[FDIV]] +; + %fdiv = fdiv nnan float %x, -0.0 + ret float %fdiv +} diff --git a/llvm/test/Transforms/InstCombine/load-store-forward.ll b/llvm/test/Transforms/InstCombine/load-store-forward.ll index 767c370e2614a..6be5f6ed42d53 100644 --- a/llvm/test/Transforms/InstCombine/load-store-forward.ll +++ b/llvm/test/Transforms/InstCombine/load-store-forward.ll @@ -104,8 +104,8 @@ define i32 @load_i32_store_nxv4i32(ptr %a) { ; CHECK-LABEL: @load_i32_store_nxv4i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: store shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), ptr [[A:%.*]], align 16 -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4 -; CHECK-NEXT: ret i32 [[TMP1]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 +; CHECK-NEXT: ret i32 [[TMP0]] ; entry: store shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), ptr %a, align 16 @@ -156,8 +156,8 @@ define float @load_f32_store_nxv4f32(ptr %a) { ; CHECK-LABEL: @load_f32_store_nxv4f32( ; CHECK-NEXT: entry: ; CHECK-NEXT: store shufflevector ( insertelement ( poison, float 1.000000e+00, i64 0), poison, zeroinitializer), ptr [[A:%.*]], align 16 -; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[A]], align 4 -; CHECK-NEXT: ret float [[TMP1]] +; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[A]], align 4 +; CHECK-NEXT: ret float [[TMP0]] ; entry: store shufflevector ( insertelement ( poison, float 1.0, i64 0), poison, zeroinitializer), ptr %a, align 16 @@ -182,8 +182,8 @@ define <4 x i32> @load_v4i32_store_nxv4i32(ptr %a) { ; CHECK-LABEL: @load_v4i32_store_nxv4i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: store shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), ptr [[A:%.*]], align 16 -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[A]], align 16 -; CHECK-NEXT: ret <4 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; CHECK-NEXT: ret <4 x i32> [[TMP0]] ; entry: store shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), ptr %a, align 16 @@ -195,8 +195,8 @@ define <4 x i16> @load_v4i16_store_nxv4i32(ptr %a) { ; CHECK-LABEL: @load_v4i16_store_nxv4i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: store shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), ptr [[A:%.*]], align 16 -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[A]], align 16 -; CHECK-NEXT: ret <4 x i16> [[TMP2]] +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[A]], align 16 +; CHECK-NEXT: ret <4 x i16> [[TMP0]] ; entry: store shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), ptr %a, align 16 @@ -224,8 +224,8 @@ define @load_nxv4i8_store_nxv4i32(ptr %a) { ; CHECK-LABEL: @load_nxv4i8_store_nxv4i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: store shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), ptr [[A:%.*]], align 16 -; CHECK-NEXT: [[TMP2:%.*]] = load , ptr [[A]], align 16 -; CHECK-NEXT: ret [[TMP2]] +; CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[A]], align 16 +; CHECK-NEXT: ret [[TMP0]] ; entry: store shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), ptr %a, align 16 @@ -253,3 +253,224 @@ define i1 @load_i1_store_i8(ptr %a) { %v = load i1, ptr %a ret i1 %v } + +define i32 @load_after_memset_0(ptr %a) { +; CHECK-LABEL: @load_after_memset_0( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false) +; CHECK-NEXT: ret i32 0 +; + call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false) + %v = load i32, ptr %a + ret i32 %v +} + +define float @load_after_memset_0_float(ptr %a) { +; CHECK-LABEL: @load_after_memset_0_float( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false) +; CHECK-NEXT: ret float 0.000000e+00 +; + call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false) + %v = load float, ptr %a + ret float %v +} + +define i27 @load_after_memset_0_non_byte_sized(ptr %a) { +; CHECK-LABEL: @load_after_memset_0_non_byte_sized( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false) +; CHECK-NEXT: ret i27 0 +; + call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false) + %v = load i27, ptr %a + ret i27 %v +} + +define i1 @load_after_memset_0_i1(ptr %a) { +; CHECK-LABEL: @load_after_memset_0_i1( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false) +; CHECK-NEXT: ret i1 false +; + call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false) + %v = load i1, ptr %a + ret i1 %v +} + +define <4 x i8> @load_after_memset_0_vec(ptr %a) { +; CHECK-LABEL: @load_after_memset_0_vec( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false) +; CHECK-NEXT: ret <4 x i8> zeroinitializer +; + call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false) + %v = load <4 x i8>, ptr %a + ret <4 x i8> %v +} + +define i32 @load_after_memset_1(ptr %a) { +; CHECK-LABEL: @load_after_memset_1( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 1, i64 16, i1 false) +; CHECK-NEXT: ret i32 16843009 +; + call void @llvm.memset.p0.i64(ptr %a, i8 1, i64 16, i1 false) + %v = load i32, ptr %a + ret i32 %v +} + +define float @load_after_memset_1_float(ptr %a) { +; CHECK-LABEL: @load_after_memset_1_float( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 1, i64 16, i1 false) +; CHECK-NEXT: ret float 0x3820202020000000 +; + call void @llvm.memset.p0.i64(ptr %a, i8 1, i64 16, i1 false) + %v = load float, ptr %a + ret float %v +} + +define i27 @load_after_memset_1_non_byte_sized(ptr %a) { +; CHECK-LABEL: @load_after_memset_1_non_byte_sized( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 1, i64 16, i1 false) +; CHECK-NEXT: ret i27 16843009 +; + call void @llvm.memset.p0.i64(ptr %a, i8 1, i64 16, i1 false) + %v = load i27, ptr %a + ret i27 %v +} + +define i1 @load_after_memset_1_i1(ptr %a) { +; CHECK-LABEL: @load_after_memset_1_i1( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 1, i64 16, i1 false) +; CHECK-NEXT: ret i1 true +; + call void @llvm.memset.p0.i64(ptr %a, i8 1, i64 16, i1 false) + %v = load i1, ptr %a + ret i1 %v +} + +define <4 x i8> @load_after_memset_1_vec(ptr %a) { +; CHECK-LABEL: @load_after_memset_1_vec( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 1, i64 16, i1 false) +; CHECK-NEXT: ret <4 x i8> +; + call void @llvm.memset.p0.i64(ptr %a, i8 1, i64 16, i1 false) + %v = load <4 x i8>, ptr %a + ret <4 x i8> %v +} + +define i32 @load_after_memset_unknown(ptr %a, i8 %byte) { +; CHECK-LABEL: @load_after_memset_unknown( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 [[BYTE:%.*]], i64 16, i1 false) +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[A]], align 4 +; CHECK-NEXT: ret i32 [[V]] +; + call void @llvm.memset.p0.i64(ptr %a, i8 %byte, i64 16, i1 false) + %v = load i32, ptr %a + ret i32 %v +} + +; TODO: Handle load at offset. +define i32 @load_after_memset_0_offset(ptr %a) { +; CHECK-LABEL: @load_after_memset_0_offset( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 4 +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[GEP]], align 4 +; CHECK-NEXT: ret i32 [[V]] +; + call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false) + %gep = getelementptr i8, ptr %a, i64 4 + %v = load i32, ptr %gep + ret i32 %v +} + +define i32 @load_after_memset_0_offset_too_large(ptr %a) { +; CHECK-LABEL: @load_after_memset_0_offset_too_large( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 13 +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[GEP]], align 4 +; CHECK-NEXT: ret i32 [[V]] +; + call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false) + %gep = getelementptr i8, ptr %a, i64 13 + %v = load i32, ptr %gep + ret i32 %v +} + +define i32 @load_after_memset_0_offset_negative(ptr %a) { +; CHECK-LABEL: @load_after_memset_0_offset_negative( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 -1 +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[GEP]], align 4 +; CHECK-NEXT: ret i32 [[V]] +; + call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false) + %gep = getelementptr i8, ptr %a, i64 -1 + %v = load i32, ptr %gep + ret i32 %v +} + +define i32 @load_after_memset_0_clobber(ptr %a) { +; CHECK-LABEL: @load_after_memset_0_clobber( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false) +; CHECK-NEXT: store i8 1, ptr [[A]], align 1 +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[A]], align 4 +; CHECK-NEXT: ret i32 [[V]] +; + call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false) + store i8 1, ptr %a + %v = load i32, ptr %a + ret i32 %v +} + +define i256 @load_after_memset_0_too_small(ptr %a) { +; CHECK-LABEL: @load_after_memset_0_too_small( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false) +; CHECK-NEXT: [[V:%.*]] = load i256, ptr [[A]], align 4 +; CHECK-NEXT: ret i256 [[V]] +; + call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false) + %v = load i256, ptr %a + ret i256 %v +} + +define i129 @load_after_memset_0_too_small_by_one_bit(ptr %a) { +; CHECK-LABEL: @load_after_memset_0_too_small_by_one_bit( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false) +; CHECK-NEXT: [[V:%.*]] = load i129, ptr [[A]], align 4 +; CHECK-NEXT: ret i129 [[V]] +; + call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false) + %v = load i129, ptr %a + ret i129 %v +} + +define i32 @load_after_memset_0_unknown_length(ptr %a, i64 %len) { +; CHECK-LABEL: @load_after_memset_0_unknown_length( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[A:%.*]], i8 0, i64 [[LEN:%.*]], i1 false) +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[A]], align 4 +; CHECK-NEXT: ret i32 [[V]] +; + call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 %len, i1 false) + %v = load i32, ptr %a + ret i32 %v +} + +define i32 @load_after_memset_0_atomic(ptr %a) { +; CHECK-LABEL: @load_after_memset_0_atomic( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false) +; CHECK-NEXT: [[V:%.*]] = load atomic i32, ptr [[A]] seq_cst, align 4 +; CHECK-NEXT: ret i32 [[V]] +; + call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false) + %v = load atomic i32, ptr %a seq_cst, align 4 + ret i32 %v +} + +define @load_after_memset_0_scalable(ptr %a) { +; CHECK-LABEL: @load_after_memset_0_scalable( +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false) +; CHECK-NEXT: [[V:%.*]] = load , ptr [[A]], align 4 +; CHECK-NEXT: ret [[V]] +; + call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false) + %v = load , ptr %a + ret %v +} + +declare void @llvm.memset.p0.i64(ptr, i8, i64, i1) diff --git a/llvm/test/Transforms/InstCombine/mul.ll b/llvm/test/Transforms/InstCombine/mul.ll index 5f0e6ceac449d..4cb2468b08753 100644 --- a/llvm/test/Transforms/InstCombine/mul.ll +++ b/llvm/test/Transforms/InstCombine/mul.ll @@ -227,12 +227,14 @@ define i32 @shl1_increment_use(i32 %x, i32 %y) { ret i32 %m } +; ((-1 << x) ^ -1) * y --> (y << x) - y + define i8 @shl1_decrement(i8 %x, i8 %y) { ; CHECK-LABEL: @shl1_decrement( -; CHECK-NEXT: [[POW2X:%.*]] = shl i8 -1, [[X:%.*]] -; CHECK-NEXT: [[X1:%.*]] = xor i8 [[POW2X]], -1 -; CHECK-NEXT: [[M:%.*]] = mul i8 [[X1]], [[Y:%.*]] -; CHECK-NEXT: ret i8 [[M]] +; CHECK-NEXT: [[Y_FR:%.*]] = freeze i8 [[Y:%.*]] +; CHECK-NEXT: [[MULSHL:%.*]] = shl i8 [[Y_FR]], [[X:%.*]] +; CHECK-NEXT: [[M1:%.*]] = sub i8 [[MULSHL]], [[Y_FR]] +; CHECK-NEXT: ret i8 [[M1]] ; %pow2x = shl i8 -1, %x %x1 = xor i8 %pow2x, -1 @@ -243,10 +245,9 @@ define i8 @shl1_decrement(i8 %x, i8 %y) { define i8 @shl1_decrement_commute(i8 %x, i8 noundef %p) { ; CHECK-LABEL: @shl1_decrement_commute( ; CHECK-NEXT: [[Y:%.*]] = ashr i8 [[P:%.*]], 1 -; CHECK-NEXT: [[NOTMASK:%.*]] = shl nsw i8 -1, [[X:%.*]] -; CHECK-NEXT: [[X1:%.*]] = xor i8 [[NOTMASK]], -1 -; CHECK-NEXT: [[M:%.*]] = mul i8 [[Y]], [[X1]] -; CHECK-NEXT: ret i8 [[M]] +; CHECK-NEXT: [[MULSHL:%.*]] = shl i8 [[Y]], [[X:%.*]] +; CHECK-NEXT: [[M1:%.*]] = sub i8 [[MULSHL]], [[Y]] +; CHECK-NEXT: ret i8 [[M1]] ; %y = ashr i8 %p, 1 ; thwart complexity-based canonicalization %pow2x = shl i8 1, %x @@ -257,10 +258,10 @@ define i8 @shl1_decrement_commute(i8 %x, i8 noundef %p) { define i8 @shl1_nuw_decrement(i8 %x, i8 %y) { ; CHECK-LABEL: @shl1_nuw_decrement( -; CHECK-NEXT: [[POW2X:%.*]] = shl i8 -1, [[X:%.*]] -; CHECK-NEXT: [[X1:%.*]] = xor i8 [[POW2X]], -1 -; CHECK-NEXT: [[M:%.*]] = mul nuw i8 [[X1]], [[Y:%.*]] -; CHECK-NEXT: ret i8 [[M]] +; CHECK-NEXT: [[Y_FR:%.*]] = freeze i8 [[Y:%.*]] +; CHECK-NEXT: [[MULSHL:%.*]] = shl i8 [[Y_FR]], [[X:%.*]] +; CHECK-NEXT: [[M1:%.*]] = sub i8 [[MULSHL]], [[Y_FR]] +; CHECK-NEXT: ret i8 [[M1]] ; %pow2x = shl i8 -1, %x %x1 = xor i8 %pow2x, -1 @@ -270,10 +271,10 @@ define i8 @shl1_nuw_decrement(i8 %x, i8 %y) { define i8 @shl1_nsw_decrement(i8 %x, i8 %y) { ; CHECK-LABEL: @shl1_nsw_decrement( -; CHECK-NEXT: [[POW2X:%.*]] = shl nsw i8 -1, [[X:%.*]] -; CHECK-NEXT: [[X1:%.*]] = xor i8 [[POW2X]], -1 -; CHECK-NEXT: [[M:%.*]] = mul nsw i8 [[X1]], [[Y:%.*]] -; CHECK-NEXT: ret i8 [[M]] +; CHECK-NEXT: [[Y_FR:%.*]] = freeze i8 [[Y:%.*]] +; CHECK-NEXT: [[MULSHL:%.*]] = shl i8 [[Y_FR]], [[X:%.*]] +; CHECK-NEXT: [[M1:%.*]] = sub i8 [[MULSHL]], [[Y_FR]] +; CHECK-NEXT: ret i8 [[M1]] ; %pow2x = shl nsw i8 -1, %x %x1 = xor i8 %pow2x, -1 @@ -281,6 +282,8 @@ define i8 @shl1_nsw_decrement(i8 %x, i8 %y) { ret i8 %m } +; negative test - extra use would require more instructions + define i32 @shl1_decrement_use(i32 %x, i32 %y) { ; CHECK-LABEL: @shl1_decrement_use( ; CHECK-NEXT: [[NOTMASK:%.*]] = shl nsw i32 -1, [[X:%.*]] @@ -296,12 +299,13 @@ define i32 @shl1_decrement_use(i32 %x, i32 %y) { ret i32 %m } +; the fold works for vectors too and if 'y' is a constant, sub becomes add + define <2 x i8> @shl1_decrement_vec(<2 x i8> %x) { ; CHECK-LABEL: @shl1_decrement_vec( -; CHECK-NEXT: [[POW2X:%.*]] = shl <2 x i8> , [[X:%.*]] -; CHECK-NEXT: [[X1:%.*]] = xor <2 x i8> [[POW2X]], -; CHECK-NEXT: [[M:%.*]] = mul <2 x i8> [[X1]], -; CHECK-NEXT: ret <2 x i8> [[M]] +; CHECK-NEXT: [[MULSHL:%.*]] = shl <2 x i8> , [[X:%.*]] +; CHECK-NEXT: [[M1:%.*]] = add <2 x i8> [[MULSHL]], +; CHECK-NEXT: ret <2 x i8> [[M1]] ; %pow2x = shl <2 x i8> , %x %x1 = xor <2 x i8> %pow2x, diff --git a/llvm/test/Transforms/InstCombine/or.ll b/llvm/test/Transforms/InstCombine/or.ll index 38409394654f4..25cf241f44c07 100644 --- a/llvm/test/Transforms/InstCombine/or.ll +++ b/llvm/test/Transforms/InstCombine/or.ll @@ -1561,3 +1561,18 @@ define i32 @mul_common_bits(i32 %p) { %r = or i32 %m, %x ret i32 %r } + +define <4 x i1> @and_or_not_or_logical_vec(<4 x i32> %ap, <4 x i32> %bp) { +; CHECK-LABEL: @and_or_not_or_logical_vec( +; CHECK-NEXT: [[A:%.*]] = icmp ne <4 x i32> [[AP:%.*]], zeroinitializer +; CHECK-NEXT: ret <4 x i1> [[A]] +; + %A = icmp eq <4 x i32> %ap, zeroinitializer + %B = icmp eq <4 x i32> %bp, zeroinitializer + %V = xor <4 x i1> %A, + %X = select <4 x i1> %B, <4 x i1> %V, <4 x i1> zeroinitializer + %W = or <4 x i1> %B, %A + %Y = xor <4 x i1> %W, + %Z = or <4 x i1> %X, %Y + ret <4 x i1> %Z +} diff --git a/llvm/test/Transforms/InstCombine/pow-to-sqrt.ll b/llvm/test/Transforms/InstCombine/pow-to-sqrt.ll new file mode 100644 index 0000000000000..2805456c89e82 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/pow-to-sqrt.ll @@ -0,0 +1,14 @@ +; RUN: opt < %s -passes=instcombine -S | FileCheck %s +; This is a check to assure the attributes of `pow` do +; not get passed to sqrt. + +define void @pow_to_sqrt(double %x) { +; CHECK-LABEL: @pow_to_sqrt( +; CHECK-NEXT: [[SQRT:%.*]] = call afn double @sqrt(double [[X:%.*]]) +; CHECK-NEXT: ret void +; + %call = call afn double @pow(double %x, double 1.5) + ret void +} + +declare double @pow(double noundef, double noundef) diff --git a/llvm/test/Transforms/InstCombine/select-safe-transforms.ll b/llvm/test/Transforms/InstCombine/select-safe-transforms.ll index dba59931235de..e5c313c361d59 100644 --- a/llvm/test/Transforms/InstCombine/select-safe-transforms.ll +++ b/llvm/test/Transforms/InstCombine/select-safe-transforms.ll @@ -685,6 +685,8 @@ define i1 @orn_and_cmp_1_logical(i37 %a, i37 %b, i1 %y) { ret i1 %or } +; TODO: This should fold the same way as the next test. + define i1 @orn_and_cmp_1_partial_logical(i37 %a, i37 %b, i1 %y) { ; CHECK-LABEL: @orn_and_cmp_1_partial_logical( ; CHECK-NEXT: [[X:%.*]] = icmp sgt i37 [[A:%.*]], [[B:%.*]] @@ -693,6 +695,21 @@ define i1 @orn_and_cmp_1_partial_logical(i37 %a, i37 %b, i1 %y) { ; CHECK-NEXT: [[OR:%.*]] = select i1 [[X_INV]], i1 true, i1 [[AND]] ; CHECK-NEXT: ret i1 [[OR]] ; + %x = icmp sgt i37 %a, %b + %x_inv = icmp sle i37 %a, %b + %and = and i1 %x, %y + %or = select i1 %x_inv, i1 true, i1 %and + ret i1 %or +} + +define i1 @orn_and_cmp_1_partial_logical_commute(i37 %a, i37 %b) { +; CHECK-LABEL: @orn_and_cmp_1_partial_logical_commute( +; CHECK-NEXT: [[Y:%.*]] = call i1 @gen1() +; CHECK-NEXT: [[X_INV:%.*]] = icmp sle i37 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[OR:%.*]] = select i1 [[X_INV]], i1 true, i1 [[Y]] +; CHECK-NEXT: ret i1 [[OR]] +; + %y = call i1 @gen1() ; thwart complexity-based canonicalization %x = icmp sgt i37 %a, %b %x_inv = icmp sle i37 %a, %b %and = and i1 %y, %x @@ -721,9 +738,58 @@ define i1 @orn_and_cmp_2_partial_logical(i16 %a, i16 %b, i1 %y) { ; CHECK-NEXT: [[OR:%.*]] = or i1 [[X_INV]], [[Y:%.*]] ; CHECK-NEXT: ret i1 [[OR]] ; + %x = icmp sge i16 %a, %b + %x_inv = icmp slt i16 %a, %b + %and = and i1 %x, %y + %or = select i1 %and, i1 true, i1 %x_inv + ret i1 %or +} + +define i1 @orn_and_cmp_2_partial_logical_commute(i16 %a, i16 %b) { +; CHECK-LABEL: @orn_and_cmp_2_partial_logical_commute( +; CHECK-NEXT: [[Y:%.*]] = call i1 @gen1() +; CHECK-NEXT: [[X_INV:%.*]] = icmp slt i16 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[OR:%.*]] = or i1 [[Y]], [[X_INV]] +; CHECK-NEXT: ret i1 [[OR]] +; + %y = call i1 @gen1() ; thwart complexity-based canonicalization %x = icmp sge i16 %a, %b %x_inv = icmp slt i16 %a, %b %and = and i1 %y, %x %or = select i1 %and, i1 true, i1 %x_inv ret i1 %or } + +; PR58552 - this would crash trying to replace non-matching types + +define <2 x i1> @not_logical_and(i1 %b, <2 x i32> %a) { +; CHECK-LABEL: @not_logical_and( +; CHECK-NEXT: [[COND:%.*]] = icmp ult <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[IMPLIED:%.*]] = icmp ugt <2 x i32> [[A]], +; CHECK-NEXT: [[AND:%.*]] = select i1 [[B:%.*]], <2 x i1> [[COND]], <2 x i1> zeroinitializer +; CHECK-NEXT: [[OR:%.*]] = select <2 x i1> [[IMPLIED]], <2 x i1> , <2 x i1> [[AND]] +; CHECK-NEXT: ret <2 x i1> [[OR]] +; + %cond = icmp ult <2 x i32> %a, + %implied = icmp ugt <2 x i32> %a, + %and = select i1 %b, <2 x i1> %cond, <2 x i1> zeroinitializer + %or = select <2 x i1> %implied, <2 x i1> , <2 x i1> %and + ret <2 x i1> %or +} + +; This could reduce, but we do not match select-of-vectors with scalar condition as logical-and. + +define <2 x i1> @not_logical_and2(i1 %b, <2 x i32> %a) { +; CHECK-LABEL: @not_logical_and2( +; CHECK-NEXT: [[COND:%.*]] = icmp ult <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[IMPLIED:%.*]] = icmp ugt <2 x i32> [[A]], +; CHECK-NEXT: [[AND:%.*]] = select i1 [[B:%.*]], <2 x i1> [[COND]], <2 x i1> zeroinitializer +; CHECK-NEXT: [[OR:%.*]] = select <2 x i1> [[AND]], <2 x i1> , <2 x i1> [[IMPLIED]] +; CHECK-NEXT: ret <2 x i1> [[OR]] +; + %cond = icmp ult <2 x i32> %a, + %implied = icmp ugt <2 x i32> %a, + %and = select i1 %b, <2 x i1> %cond, <2 x i1> zeroinitializer + %or = select <2 x i1> %and, <2 x i1> , <2 x i1> %implied + ret <2 x i1> %or +} diff --git a/llvm/test/Transforms/InstCombine/shuffle-binop.ll b/llvm/test/Transforms/InstCombine/shuffle-binop.ll index 687efa0ac503e..5e3a6ff8250e4 100644 --- a/llvm/test/Transforms/InstCombine/shuffle-binop.ll +++ b/llvm/test/Transforms/InstCombine/shuffle-binop.ll @@ -50,13 +50,13 @@ define <4 x i8> @splat_binop_splat_x(<4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @splat_binop_splat_x( ; CHECK-NEXT: [[XSPLAT:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: call void @use(<4 x i8> [[XSPLAT]]) -; CHECK-NEXT: [[B:%.*]] = add <4 x i8> [[XSPLAT]], [[Y:%.*]] -; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector <4 x i8> [[B]], <4 x i8> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = add nsw <4 x i8> [[X]], [[Y:%.*]] +; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: ret <4 x i8> [[BSPLAT]] ; %xsplat = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> zeroinitializer call void @use(<4 x i8> %xsplat) - %b = add <4 x i8> %xsplat, %y + %b = add nsw <4 x i8> %xsplat, %y %bsplat = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer ret <4 x i8> %bsplat } @@ -65,14 +65,14 @@ define <4 x i8> @splat_binop_splat_y(<4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: @splat_binop_splat_y( ; CHECK-NEXT: [[YSPLAT:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: call void @use(<4 x i8> [[YSPLAT]]) -; CHECK-NEXT: [[B:%.*]] = sub <4 x i8> [[X:%.*]], [[YSPLAT]] -; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector <4 x i8> [[B]], <4 x i8> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i8> [[X:%.*]], [[Y]] +; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i8> [[BSPLAT]] ; %ysplat = shufflevector <4 x i8> %y, <4 x i8> poison, <4 x i32> zeroinitializer call void @use(<4 x i8> %ysplat) %b = sub <4 x i8> %x, %ysplat - %bsplat = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer + %bsplat = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> ret <4 x i8> %bsplat } @@ -82,21 +82,40 @@ define <4 x i8> @splat_binop_splat_x_splat_y(<4 x i8> %x, <4 x i8> %y) { ; CHECK-NEXT: call void @use(<4 x i8> [[XSPLAT]]) ; CHECK-NEXT: [[YSPLAT:%.*]] = shufflevector <4 x i8> [[Y:%.*]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: call void @use(<4 x i8> [[YSPLAT]]) -; CHECK-NEXT: [[B:%.*]] = mul <4 x i8> [[XSPLAT]], [[YSPLAT]] -; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector <4 x i8> [[B]], <4 x i8> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw <4 x i8> [[Y]], [[X]] +; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: ret <4 x i8> [[BSPLAT]] ; %xsplat = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> zeroinitializer call void @use(<4 x i8> %xsplat) %ysplat = shufflevector <4 x i8> %y, <4 x i8> poison, <4 x i32> zeroinitializer call void @use(<4 x i8> %ysplat) - %b = mul <4 x i8> %xsplat, %ysplat + %b = mul nuw <4 x i8> %xsplat, %ysplat %bsplat = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer ret <4 x i8> %bsplat } -define @vscale_splat_binop_splat_x( %x, %y) { -; CHECK-LABEL: @vscale_splat_binop_splat_x( +define <4 x float> @splat_binop_splat_x_splat_y_fmath_flags(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: @splat_binop_splat_x_splat_y_fmath_flags( +; CHECK-NEXT: [[XSPLAT:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: call void @use(<4 x float> [[XSPLAT]]) +; CHECK-NEXT: [[YSPLAT:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: call void @use(<4 x float> [[YSPLAT]]) +; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <4 x float> [[Y]], [[X]] +; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: ret <4 x float> [[BSPLAT]] +; + %xsplat = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> zeroinitializer + call void @use(<4 x float> %xsplat) + %ysplat = shufflevector <4 x float> %y, <4 x float> poison, <4 x i32> zeroinitializer + call void @use(<4 x float> %ysplat) + %b = fmul fast <4 x float> %xsplat, %ysplat + %bsplat = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer + ret <4 x float> %bsplat +} + +define @vscale_splat_udiv_splat_x( %x, %y) { +; CHECK-LABEL: @vscale_splat_udiv_splat_x( ; CHECK-NEXT: [[XSPLAT:%.*]] = shufflevector [[X:%.*]], poison, zeroinitializer ; CHECK-NEXT: [[B:%.*]] = udiv [[XSPLAT]], [[Y:%.*]] ; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector [[B]], poison, zeroinitializer @@ -108,6 +127,19 @@ define @vscale_splat_binop_splat_x( %x, %bsplat } +define @vscale_splat_urem_splat_x( %x, %y) { +; CHECK-LABEL: @vscale_splat_urem_splat_x( +; CHECK-NEXT: [[XSPLAT:%.*]] = shufflevector [[X:%.*]], poison, zeroinitializer +; CHECK-NEXT: [[B:%.*]] = urem [[XSPLAT]], [[Y:%.*]] +; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector [[B]], poison, zeroinitializer +; CHECK-NEXT: ret [[BSPLAT]] +; + %xsplat = shufflevector %x, poison, zeroinitializer + %b = urem %xsplat, %y + %bsplat = shufflevector %b, poison, zeroinitializer + ret %bsplat +} + define @vscale_splat_binop_splat_y( %x, %y) { ; CHECK-LABEL: @vscale_splat_binop_splat_y( ; CHECK-NEXT: [[YSPLAT:%.*]] = shufflevector [[Y:%.*]], poison, zeroinitializer @@ -140,8 +172,8 @@ define @vscale_splat_binop_splat_x_splat_y_calls( [[XSPLAT]]) ; CHECK-NEXT: [[YSPLAT:%.*]] = shufflevector [[Y:%.*]], poison, zeroinitializer ; CHECK-NEXT: call void @use_v( [[YSPLAT]]) -; CHECK-NEXT: [[B:%.*]] = lshr [[XSPLAT]], [[YSPLAT]] -; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector [[B]], poison, zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = lshr [[X]], [[Y]] +; CHECK-NEXT: [[BSPLAT:%.*]] = shufflevector [[TMP1]], poison, zeroinitializer ; CHECK-NEXT: ret [[BSPLAT]] ; %xsplat = shufflevector %x, poison, zeroinitializer @@ -153,5 +185,22 @@ define @vscale_splat_binop_splat_x_splat_y_calls( %bsplat } +define <2 x double> @shuffle_op2_0th_element_mask(ptr %a, ptr %b) { + ;%0 = load <2 x double>, ptr @d, align 16 +; CHECK-LABEL: @shuffle_op2_0th_element_mask( +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[A:%.*]], align 16 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[B:%.*]], align 16 +; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: ret <2 x double> [[SHUFFLE]] +; + %1 = load <2 x double>, ptr %a, align 16 + %2 = shufflevector <2 x double> %1, <2 x double> poison, <2 x i32> zeroinitializer + %3 = load <2 x double>, ptr %b, align 16 + %sub = fsub <2 x double> %3, %2 + %shuffle = shufflevector <2 x double> %sub, <2 x double> %sub, <2 x i32> + ret <2 x double> %shuffle +} + declare void @use(<4 x i8>) -declare void @use_v() \ No newline at end of file +declare void @use_v() diff --git a/llvm/test/Transforms/InstCombine/stpncpy-1.ll b/llvm/test/Transforms/InstCombine/stpncpy-1.ll index 6501ca4c6ff8b..9f9442705d126 100644 --- a/llvm/test/Transforms/InstCombine/stpncpy-1.ll +++ b/llvm/test/Transforms/InstCombine/stpncpy-1.ll @@ -448,6 +448,6 @@ define void @call_stpncpy_s(ptr %dst, ptr %src, i64 %n) { ret void } ;. -; ANY: attributes #[[ATTR0:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn writeonly } -; ANY: attributes #[[ATTR1:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn } +; ANY: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } +; ANY: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } ;. diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll index 4f5349c1b5c09..69fe99fe8521a 100644 --- a/llvm/test/Transforms/InstCombine/sub.ll +++ b/llvm/test/Transforms/InstCombine/sub.ll @@ -2229,16 +2229,14 @@ define i8 @demand_sub_from_variable_lowbits3(i8 %x, i8 %y) { ret i8 %r } -; TODO: ; C - ((C3 - X) & C2) --> (C - (C2 & C3)) + (X & C2) when: -; (C3 - (C2 & C3) + 1) is pow2 +; (C3 - ((C2 & C3) - 1)) is pow2 ; ((C2 + C3) & ((C2 & C3) - 1)) == ((C2 & C3) - 1) ; C2 is negative pow2 define i10 @sub_to_and_nuw(i10 %x) { ; CHECK-LABEL: @sub_to_and_nuw( -; CHECK-NEXT: [[SUB:%.*]] = sub nuw i10 71, [[X:%.*]] -; CHECK-NEXT: [[AND:%.*]] = and i10 [[SUB]], 120 -; CHECK-NEXT: [[R:%.*]] = sub nuw nsw i10 443, [[AND]] +; CHECK-NEXT: [[TMP1:%.*]] = and i10 [[X:%.*]], 120 +; CHECK-NEXT: [[R:%.*]] = add nuw nsw i10 [[TMP1]], 379 ; CHECK-NEXT: ret i10 [[R]] ; %sub = sub nuw i10 71, %x @@ -2247,15 +2245,13 @@ define i10 @sub_to_and_nuw(i10 %x) { ret i10 %r } -; TODO: ; C - ((C3 -nuw X) & C2) --> (C - (C2 & C3)) + (X & C2) when: -; (C3 - (C2 & C3) + 1) is pow2 +; (C3 - ((C2 & C3) - 1)) is pow2 ; ((C2 + C3) & ((C2 & C3) - 1)) == ((C2 & C3) - 1) define i10 @sub_to_and_negpow2(i10 %x) { ; CHECK-LABEL: @sub_to_and_negpow2( -; CHECK-NEXT: [[SUB:%.*]] = sub i10 71, [[X:%.*]] -; CHECK-NEXT: [[AND:%.*]] = and i10 [[SUB]], -8 -; CHECK-NEXT: [[R:%.*]] = sub i10 33, [[AND]] +; CHECK-NEXT: [[TMP1:%.*]] = and i10 [[X:%.*]], -8 +; CHECK-NEXT: [[R:%.*]] = add i10 [[TMP1]], -31 ; CHECK-NEXT: ret i10 [[R]] ; %sub = sub i10 71, %x @@ -2342,9 +2338,8 @@ define i10 @sub_to_and_negative4(i10 %x) { define <2 x i8> @sub_to_and_vector1(<2 x i8> %x) { ; CHECK-LABEL: @sub_to_and_vector1( -; CHECK-NEXT: [[SUB:%.*]] = sub nuw <2 x i8> , [[X:%.*]] -; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[SUB]], -; CHECK-NEXT: [[R:%.*]] = sub nsw <2 x i8> , [[AND]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i8> [[TMP1]], ; CHECK-NEXT: ret <2 x i8> [[R]] ; %sub = sub nuw <2 x i8> , %x diff --git a/llvm/test/Transforms/InstCombine/zext.ll b/llvm/test/Transforms/InstCombine/zext.ll index 765ae1a1b64a5..25f59d5570ee2 100644 --- a/llvm/test/Transforms/InstCombine/zext.ll +++ b/llvm/test/Transforms/InstCombine/zext.ll @@ -1,6 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -passes=instcombine -S | FileCheck %s +declare void @use1(i1) +declare void @use32(i32) +declare void @use_vec(<2 x i9>) + define i64 @test_sext_zext(i16 %A) { ; CHECK-LABEL: @test_sext_zext( ; CHECK-NEXT: [[C2:%.*]] = zext i16 [[A:%.*]] to i64 @@ -172,9 +176,6 @@ define i47 @sext_zext_apint2(i11 %A) { ret i47 %c2 } -declare void @use1(i1) -declare void @use32(i32) - define i32 @masked_bit_set(i32 %x, i32 %y) { ; CHECK-LABEL: @masked_bit_set( ; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], [[Y:%.*]] @@ -535,3 +536,109 @@ join: %conv4 = zext i1 %x1 to i16 ret i16 %conv4 } + +; negative test - but this could be transformed to eliminate a use of 't' + +define i64 @and_trunc_extra_use1(i64 %x, i32 %y) { +; CHECK-LABEL: @and_trunc_extra_use1( +; CHECK-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i32 +; CHECK-NEXT: call void @use32(i32 [[T]]) +; CHECK-NEXT: [[A:%.*]] = and i32 [[T]], [[Y:%.*]] +; CHECK-NEXT: [[Z:%.*]] = zext i32 [[A]] to i64 +; CHECK-NEXT: ret i64 [[Z]] +; + %t = trunc i64 %x to i32 + call void @use32(i32 %t) + %a = and i32 %t, %y + %z = zext i32 %a to i64 + ret i64 %z +} + +; negative test - but this could be transformed to eliminate a use of 't' + +define i64 @and_trunc_extra_use1_commute(i64 %x, i32 %p) { +; CHECK-LABEL: @and_trunc_extra_use1_commute( +; CHECK-NEXT: [[Y:%.*]] = mul i32 [[P:%.*]], [[P]] +; CHECK-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i32 +; CHECK-NEXT: call void @use32(i32 [[T]]) +; CHECK-NEXT: [[A:%.*]] = and i32 [[Y]], [[T]] +; CHECK-NEXT: [[Z:%.*]] = zext i32 [[A]] to i64 +; CHECK-NEXT: ret i64 [[Z]] +; + %y = mul i32 %p, %p ; thwart complexity-based canonicalization + %t = trunc i64 %x to i32 + call void @use32(i32 %t) + %a = and i32 %y, %t + %z = zext i32 %a to i64 + ret i64 %z +} + +; negative test - avoid creating an extra instruction + +define i64 @and_trunc_extra_use2(i64 %x, i32 %y) { +; CHECK-LABEL: @and_trunc_extra_use2( +; CHECK-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i32 +; CHECK-NEXT: [[A:%.*]] = and i32 [[T]], [[Y:%.*]] +; CHECK-NEXT: call void @use32(i32 [[A]]) +; CHECK-NEXT: [[Z:%.*]] = zext i32 [[A]] to i64 +; CHECK-NEXT: ret i64 [[Z]] +; + %t = trunc i64 %x to i32 + %a = and i32 %t, %y + call void @use32(i32 %a) + %z = zext i32 %a to i64 + ret i64 %z +} + +; With constant mask, we duplicate it as a wider constant. + +define i64 @and_trunc_extra_use2_constant(i64 %x) { +; CHECK-LABEL: @and_trunc_extra_use2_constant( +; CHECK-NEXT: [[T:%.*]] = trunc i64 [[X:%.*]] to i32 +; CHECK-NEXT: [[A:%.*]] = and i32 [[T]], 42 +; CHECK-NEXT: call void @use32(i32 [[A]]) +; CHECK-NEXT: [[Z:%.*]] = and i64 [[X]], 42 +; CHECK-NEXT: ret i64 [[Z]] +; + %t = trunc i64 %x to i32 + %a = and i32 %t, 42 + call void @use32(i32 %a) + %z = zext i32 %a to i64 + ret i64 %z +} + +; Works with arbitrary vectors and verify that the constant is zext. + +define <2 x i17> @and_trunc_extra_use3_constant_vec(<2 x i17> %x) { +; CHECK-LABEL: @and_trunc_extra_use3_constant_vec( +; CHECK-NEXT: [[T:%.*]] = trunc <2 x i17> [[X:%.*]] to <2 x i9> +; CHECK-NEXT: call void @use_vec(<2 x i9> [[T]]) +; CHECK-NEXT: [[A:%.*]] = and <2 x i9> [[T]], +; CHECK-NEXT: call void @use_vec(<2 x i9> [[A]]) +; CHECK-NEXT: [[Z:%.*]] = and <2 x i17> [[X]], +; CHECK-NEXT: ret <2 x i17> [[Z]] +; + %t = trunc <2 x i17> %x to <2 x i9> + call void @use_vec(<2 x i9> %t) + %a = and <2 x i9> %t, + call void @use_vec(<2 x i9> %a) + %z = zext <2 x i9> %a to <2 x i17> + ret <2 x i17> %z +} + +; negative test - would require another cast + +define i64 @and_trunc_extra_use1_wider_src(i65 %x, i32 %y) { +; CHECK-LABEL: @and_trunc_extra_use1_wider_src( +; CHECK-NEXT: [[T:%.*]] = trunc i65 [[X:%.*]] to i32 +; CHECK-NEXT: call void @use32(i32 [[T]]) +; CHECK-NEXT: [[A:%.*]] = and i32 [[T]], [[Y:%.*]] +; CHECK-NEXT: [[Z:%.*]] = zext i32 [[A]] to i64 +; CHECK-NEXT: ret i64 [[Z]] +; + %t = trunc i65 %x to i32 + call void @use32(i32 %t) + %a = and i32 %t, %y + %z = zext i32 %a to i64 + ret i64 %z +} diff --git a/llvm/test/Transforms/InstSimplify/fdiv.ll b/llvm/test/Transforms/InstSimplify/fdiv.ll index 2b998b3e73234..38e31257e185a 100644 --- a/llvm/test/Transforms/InstSimplify/fdiv.ll +++ b/llvm/test/Transforms/InstSimplify/fdiv.ll @@ -59,3 +59,98 @@ define <2 x i1> @pr6096() { %fcmp = fcmp ole <2 x float> %fdiv, zeroinitializer ret <2 x i1> %fcmp } + + +; https://alive2.llvm.org/ce/z/JxX5in +define float @fdiv_nnan_ninf_by_zero_f32(float %x) { +; CHECK-LABEL: @fdiv_nnan_ninf_by_zero_f32( +; CHECK-NEXT: ret float poison +; + %fdiv = fdiv nnan ninf float %x, 0.0 + ret float %fdiv +} + +define float @fdiv_nnan_ninf_by_negzero_f32(float %x) { +; CHECK-LABEL: @fdiv_nnan_ninf_by_negzero_f32( +; CHECK-NEXT: ret float poison +; + %fdiv = fdiv nnan ninf float %x, -0.0 + ret float %fdiv +} + +define float @fdiv_nnan_ninf_by_undef_f32(float %x) { +; CHECK-LABEL: @fdiv_nnan_ninf_by_undef_f32( +; CHECK-NEXT: ret float poison +; + %fdiv = fdiv nnan ninf float %x, undef + ret float %fdiv +} + +define float @fdiv_nnan_ninf_by_poison_f32(float %x) { +; CHECK-LABEL: @fdiv_nnan_ninf_by_poison_f32( +; CHECK-NEXT: ret float poison +; + %fdiv = fdiv nnan ninf float %x, poison + ret float %fdiv +} + +define <2 x float> @fdiv_nnan_ninf_by_zero_v2f32(<2 x float> %x) { +; CHECK-LABEL: @fdiv_nnan_ninf_by_zero_v2f32( +; CHECK-NEXT: ret <2 x float> poison +; + %fdiv = fdiv nnan ninf <2 x float> %x, zeroinitializer + ret <2 x float> %fdiv +} + +define <2 x float> @fdiv_nnan_ninf_by_undef_v2f32(<2 x float> %x) { +; CHECK-LABEL: @fdiv_nnan_ninf_by_undef_v2f32( +; CHECK-NEXT: ret <2 x float> poison +; + %fdiv = fdiv nnan ninf <2 x float> %x, undef + ret <2 x float> %fdiv +} + +define <2 x float> @fdiv_nnan_ninf_by_zero_undef_v2f32(<2 x float> %x) { +; CHECK-LABEL: @fdiv_nnan_ninf_by_zero_undef_v2f32( +; CHECK-NEXT: ret <2 x float> poison +; + %fdiv = fdiv nnan ninf <2 x float> %x, + ret <2 x float> %fdiv +} + +; https://alive2.llvm.org/ce/z/wRV28p +define float @fdiv_nnan_nsz_ninf_by_zero_f32(float %x) { +; CHECK-LABEL: @fdiv_nnan_nsz_ninf_by_zero_f32( +; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan nsz float [[X:%.*]], 0.000000e+00 +; CHECK-NEXT: ret float [[FDIV]] +; + %fdiv = fdiv nnan nsz float %x, 0.0 + ret float %fdiv +} + +define float @fdiv_nnan_nsz_ninf_by_negzero_f32(float %x) { +; CHECK-LABEL: @fdiv_nnan_nsz_ninf_by_negzero_f32( +; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan nsz float [[X:%.*]], -0.000000e+00 +; CHECK-NEXT: ret float [[FDIV]] +; + %fdiv = fdiv nnan nsz float %x, -0.0 + ret float %fdiv +} + +define <2 x float> @fdiv_nnan_nsz_ninf_by_zero_v2f32(<2 x float> %x) { +; CHECK-LABEL: @fdiv_nnan_nsz_ninf_by_zero_v2f32( +; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan nsz <2 x float> [[X:%.*]], zeroinitializer +; CHECK-NEXT: ret <2 x float> [[FDIV]] +; + %fdiv = fdiv nnan nsz <2 x float> %x, zeroinitializer + ret <2 x float> %fdiv +} + +define <2 x float> @fdiv_nnan_nsz_ninf_by_negzero_v2f32(<2 x float> %x) { +; CHECK-LABEL: @fdiv_nnan_nsz_ninf_by_negzero_v2f32( +; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan nsz <2 x float> [[X:%.*]], +; CHECK-NEXT: ret <2 x float> [[FDIV]] +; + %fdiv = fdiv nnan nsz <2 x float> %x, + ret <2 x float> %fdiv +} diff --git a/llvm/test/Transforms/InstSimplify/or.ll b/llvm/test/Transforms/InstSimplify/or.ll index a279cab582d93..913b760dd331c 100644 --- a/llvm/test/Transforms/InstSimplify/or.ll +++ b/llvm/test/Transforms/InstSimplify/or.ll @@ -454,6 +454,48 @@ define <2 x i4> @and_or_not_or_commute7(<2 x i4> %A, <2 x i4> %B) { ret <2 x i4> %r } +; (~A & B) | ~(A | B) --> ~A with logical and +define i1 @and_or_not_or_logical(i1 %A, i1 %B) { +; CHECK-LABEL: @and_or_not_or_logical( +; CHECK-NEXT: [[V:%.*]] = xor i1 [[A:%.*]], true +; CHECK-NEXT: ret i1 [[V]] +; + %V = xor i1 %A, true + %X = select i1 %V, i1 %B, i1 false + %W = or i1 %B, %A + %Y = xor i1 %W, true + %Z = or i1 %X, %Y + ret i1 %Z +} + +; (~B & A) | ~(A | B) --> ~A with logical and +define i1 @and_or_not_or_logical_rev(i1 %A, i1 %B) { +; CHECK-LABEL: @and_or_not_or_logical_rev( +; CHECK-NEXT: [[V:%.*]] = xor i1 [[A:%.*]], true +; CHECK-NEXT: ret i1 [[V]] +; + %V = xor i1 %A, true + %X = select i1 %B, i1 %V, i1 false + %W = or i1 %B, %A + %Y = xor i1 %W, true + %Z = or i1 %X, %Y + ret i1 %Z +} + +; (~A & B) | ~(A | B) --> ~A with logical And and logical Or +define i1 @and_or_not_logical_or_logical_rev(i1 %A, i1 %B) { +; CHECK-LABEL: @and_or_not_logical_or_logical_rev( +; CHECK-NEXT: [[V:%.*]] = xor i1 [[A:%.*]], true +; CHECK-NEXT: ret i1 [[V]] +; + %V = xor i1 %A, true + %X = select i1 %B, i1 %V, i1 false + %W = select i1 %B, i1 true, i1 %A + %Y = xor i1 %W, true + %Z = or i1 %X, %Y + ret i1 %Z +} + ; negative test - It is not safe to propagate an undef element from the 'not' op. define <2 x i4> @and_or_not_or_commute7_undef_elt(<2 x i4> %A, <2 x i4> %B) { diff --git a/llvm/test/Transforms/LICM/scalar-promote.ll b/llvm/test/Transforms/LICM/scalar-promote.ll index 972ed74988bd2..479be9d87a01a 100644 --- a/llvm/test/Transforms/LICM/scalar-promote.ll +++ b/llvm/test/Transforms/LICM/scalar-promote.ll @@ -600,7 +600,7 @@ Out: } define i8 @test_hoistable_existing_load_sinkable_store_writeonly(ptr dereferenceable(8) %ptr, i8 %start) writeonly { -; CHECK: Function Attrs: writeonly +; CHECK: Function Attrs: memory(write) ; CHECK-LABEL: @test_hoistable_existing_load_sinkable_store_writeonly( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[PTR_PROMOTED:%.*]] = load i8, ptr [[PTR:%.*]], align 1 @@ -641,7 +641,7 @@ exit: ; Test case for PR51248. define void @test_sink_store_only() writeonly { -; CHECK: Function Attrs: writeonly +; CHECK: Function Attrs: memory(write) ; CHECK-LABEL: @test_sink_store_only( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] @@ -678,7 +678,7 @@ exit: } define void @test_sink_store_to_local_object_only_loop_must_execute() writeonly { -; CHECK: Function Attrs: writeonly +; CHECK: Function Attrs: memory(write) ; CHECK-LABEL: @test_sink_store_to_local_object_only_loop_must_execute( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A:%.*]] = alloca i8, align 1 @@ -719,7 +719,7 @@ exit: ; The store in the loop may not execute, so we need to introduce a load in the ; pre-header. Make sure the writeonly attribute is dropped. define void @test_sink_store_to_local_object_only_loop_may_not_execute(i8 %n) writeonly { -; CHECK: Function Attrs: writeonly +; CHECK: Function Attrs: memory(write) ; CHECK-LABEL: @test_sink_store_to_local_object_only_loop_may_not_execute( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A:%.*]] = alloca i8, align 1 @@ -761,7 +761,7 @@ exit: declare dereferenceable(8) noalias ptr @alloc_writeonly() writeonly define void @test_sink_store_to_noalias_call_object_only_loop_may_not_execute1(i8 %n) writeonly { -; CHECK: Function Attrs: writeonly +; CHECK: Function Attrs: memory(write) ; CHECK-LABEL: @test_sink_store_to_noalias_call_object_only_loop_may_not_execute1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A:%.*]] = call noalias dereferenceable(8) ptr @alloc_writeonly() @@ -801,7 +801,7 @@ exit: } define void @test_sink_store_only_no_phi_needed() writeonly { -; CHECK: Function Attrs: writeonly +; CHECK: Function Attrs: memory(write) ; CHECK-LABEL: @test_sink_store_only_no_phi_needed( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] diff --git a/llvm/test/Transforms/LICM/strlen.ll b/llvm/test/Transforms/LICM/strlen.ll index fc47660aaafb7..e4e72b253e05e 100644 --- a/llvm/test/Transforms/LICM/strlen.ll +++ b/llvm/test/Transforms/LICM/strlen.ll @@ -13,7 +13,7 @@ loop: } ; CHECK: declare i64 @strlen(ptr nocapture) #0 -; CHECK: attributes #0 = { argmemonly mustprogress nofree nounwind readonly willreturn } +; CHECK: attributes #0 = { mustprogress nofree nounwind willreturn memory(argmem: read) } declare i64 @strlen(ptr) diff --git a/llvm/test/Transforms/LoopInterchange/multilevel-partial-reduction.ll b/llvm/test/Transforms/LoopInterchange/multilevel-partial-reduction.ll new file mode 100644 index 0000000000000..5c40ee5b00e5c --- /dev/null +++ b/llvm/test/Transforms/LoopInterchange/multilevel-partial-reduction.ll @@ -0,0 +1,77 @@ +; RUN: opt < %s -loop-interchange -cache-line-size=4 -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \ +; RUN: -verify-dom-info -verify-loop-info -verify-loop-lcssa +; RUN: FileCheck --input-file=%t --check-prefix=REMARKS %s + +@b = external global [512 x [4 x i32]] +@c = global [2 x [4 x i32]] zeroinitializer, align 1 + +; Check that the outermost and the middle loops are not interchanged since +; the innermost loop has a reduction operation which is however not in a form +; that loop interchange can handle. Interchanging the outermost and the +; middle loops would intervene with the reduction and cause miscompile. + +; REMARKS: --- !Missed +; REMARKS-NEXT: Pass: loop-interchange +; REMARKS-NEXT: Name: UnsupportedPHIInner +; REMARKS-NEXT: Function: test7 +; REMARKS: --- !Missed +; REMARKS-NEXT: Pass: loop-interchange +; REMARKS-NEXT: Name: UnsupportedPHIInner +; REMARKS-NEXT: Function: test7 + +define i32 @test7() { +entry: + br label %for.cond1.preheader.i + +for.cond1.preheader.i: ; preds = %for.inc19.i, %entry + %i.011.i = phi i16 [ 0, %entry ], [ %inc20.i, %for.inc19.i ] + br label %for.cond4.preheader.i + +for.cond4.preheader.i: ; preds = %middle.block, %for.cond1.preheader.i + %j.010.i = phi i16 [ 0, %for.cond1.preheader.i ], [ %inc17.i, %middle.block ] + %arrayidx14.i = getelementptr inbounds [2 x [4 x i32]], ptr @c, i16 0, i16 %i.011.i, i16 %j.010.i + %arrayidx14.promoted.i = load i32, ptr %arrayidx14.i, align 1 + %0 = insertelement <4 x i32> , i32 %arrayidx14.promoted.i, i64 0 + br label %vector.body + +vector.body: ; preds = %vector.body, %for.cond4.preheader.i + %index = phi i16 [ 0, %for.cond4.preheader.i ], [ %index.next, %vector.body ] + %vec.phi = phi <4 x i32> [ %0, %for.cond4.preheader.i ], [ %16, %vector.body ] + %1 = or i16 %index, 1 + %2 = or i16 %index, 2 + %3 = or i16 %index, 3 + %4 = getelementptr inbounds [512 x [4 x i32]], ptr @b, i16 0, i16 %index, i16 %j.010.i + %5 = getelementptr inbounds [512 x [4 x i32]], ptr @b, i16 0, i16 %1, i16 %j.010.i + %6 = getelementptr inbounds [512 x [4 x i32]], ptr @b, i16 0, i16 %2, i16 %j.010.i + %7 = getelementptr inbounds [512 x [4 x i32]], ptr @b, i16 0, i16 %3, i16 %j.010.i + %8 = load i32, ptr %4, align 1 + %9 = load i32, ptr %5, align 1 + %10 = load i32, ptr %6, align 1 + %11 = load i32, ptr %7, align 1 + %12 = insertelement <4 x i32> poison, i32 %8, i64 0 + %13 = insertelement <4 x i32> %12, i32 %9, i64 1 + %14 = insertelement <4 x i32> %13, i32 %10, i64 2 + %15 = insertelement <4 x i32> %14, i32 %11, i64 3 + %16 = add <4 x i32> %15, %vec.phi + %index.next = add nuw i16 %index, 4 + %17 = icmp eq i16 %index.next, 512 + br i1 %17, label %middle.block, label %vector.body + +middle.block: ; preds = %vector.body + %18 = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %16) + store i32 %18, ptr %arrayidx14.i, align 1 + %inc17.i = add nuw nsw i16 %j.010.i, 1 + %exitcond12.not.i = icmp eq i16 %inc17.i, 4 + br i1 %exitcond12.not.i, label %for.inc19.i, label %for.cond4.preheader.i + +for.inc19.i: ; preds = %middle.block + %inc20.i = add nuw nsw i16 %i.011.i, 1 + %exitcond13.not.i = icmp eq i16 %inc20.i, 2 + br i1 %exitcond13.not.i, label %test.exit, label %for.cond1.preheader.i + +test.exit: ; preds = %for.inc19.i + %19 = load i32, ptr @c, align 1 + ret i32 %19 +} + +declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr56627.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr56627.ll new file mode 100644 index 0000000000000..40fbd3ed04cc0 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr56627.ll @@ -0,0 +1,23 @@ +; RUN: opt < %s -S -passes=loop-vectorize | FileCheck %s + +; Check that we can vectorize this loop without crashing. + +target triple = "aarch64-none-linux-gnu" +define float @quux() { +; CHECK: @quux +bb: + br label %bb1 + +bb1: + %tmp = phi i64 [ %tmp3, %bb1 ], [ 0, %bb ] + %tmp2 = phi float [ %tmp5, %bb1 ], [ 0.000000e+00, %bb ] + %tmp3 = add nsw i64 %tmp, 1 + %tmp5 = fadd float %tmp2, 3.000000e+00 + %tmp6 = mul i32 0, 0 + %tmp7 = icmp sgt i64 %tmp, 0 + br i1 %tmp7, label %bb8, label %bb1 + +bb8: + %tmp9 = phi float [ %tmp5, %bb1 ] + ret float %tmp9 +} \ No newline at end of file diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll index 091b117c182ac..9ae930514e613 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll @@ -117,7 +117,7 @@ define float @cond_cmp_sel(float* noalias %a, float* noalias %cond, i64 %N) { ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr float, float* [[TMP9]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = bitcast float* [[TMP10]] to * ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4f32.p0nxv4f32(* [[TMP11]], i32 4, [[TMP8]], poison) -; CHECK-NEXT: [[TMP12:%.*]] = select fast [[TMP8]], [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, float 0xFFF0000000000000, i32 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP12:%.*]] = select fast [[TMP8]], [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, float 0x7FF0000000000000, i32 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fmin.nxv4f32( [[TMP12]]) ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt float [[TMP13]], [[VEC_PHI]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT]] = select fast i1 [[RDX_MINMAX_CMP]], float [[TMP13]], float [[VEC_PHI]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll new file mode 100644 index 0000000000000..4ba2ae23daeaf --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll @@ -0,0 +1,71 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=riscv64 -mattr=+v -loop-vectorize < %s | FileCheck %s + +; FIXME: Using a <4 x i32> would be strictly better than tail folded +; scalable vectorization in this case. +define void @small_trip_count(i32* nocapture %a) nounwind vscale_range(4,1024) { +; CHECK-LABEL: @small_trip_count( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 -5, [[TMP1]] +; CHECK-NEXT: br i1 [[TMP2]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[TMP3]], 2 +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], 2 +; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP6]], 1 +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 4, [[TMP7]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP4]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i32(i32 [[TMP8]], i32 4) +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP9]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to * +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i32.p0nxv2i32(* [[TMP11]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP12:%.*]] = add nsw [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i32 1, i32 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP10]] to * +; CHECK-NEXT: call void @llvm.masked.store.nxv2i32.p0nxv2i32( [[TMP12]], * [[TMP13]], i32 4, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[IV]] +; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[GEP]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[V]], 1 +; CHECK-NEXT: store i32 [[ADD]], i32* [[GEP]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[IV]], 3 +; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i32 [ %iv.next, %loop ], [ 0, %entry ] + %gep = getelementptr inbounds i32, i32* %a, i32 %iv + %v = load i32, i32* %gep, align 4 + %add = add nsw i32 %v, 1 + store i32 %add, i32* %gep, align 4 + %iv.next = add i32 %iv, 1 + %cond = icmp eq i32 %iv, 3 + br i1 %cond, label %exit, label %loop + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/zvl32b.ll b/llvm/test/Transforms/LoopVectorize/RISCV/zvl32b.ll new file mode 100644 index 0000000000000..e4dd8479cdc5d --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/zvl32b.ll @@ -0,0 +1,69 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple riscv64-linux-gnu -mattr=+zve32f,+f -S 2>%t | FileCheck %s + +target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" +target triple = "riscv64" + +; We can't use scalable vectorization for Zvl32b due to RVVBitsPerBlock being +; 64. Since our vscale value is vlen/RVVBitsPerBlock this makes vscale 0. +; Make sure we fall back to fixed vectorization instead. +define void @vector_add_i16(ptr noalias nocapture %a, i16 %v, i64 %n) { +; CHECK-LABEL: @vector_add_i16( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[V:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i16> poison, i16 [[V]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT3]], <2 x i16> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], <2 x i64> [[VEC_IND]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A]], <2 x i64> [[STEP_ADD]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> [[TMP0]], i32 2, <2 x i1> , <2 x i16> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> [[TMP1]], i32 2, <2 x i1> , <2 x i16> poison) +; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i16> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i16> [[WIDE_MASKED_GATHER2]], [[BROADCAST_SPLAT4]] +; CHECK-NEXT: call void @llvm.masked.scatter.v2i16.v2p0(<2 x i16> [[TMP2]], <2 x ptr> [[TMP0]], i32 2, <2 x i1> ) +; CHECK-NEXT: call void @llvm.masked.scatter.v2i16.v2p0(<2 x i16> [[TMP3]], <2 x ptr> [[TMP1]], i32 2, <2 x i1> ) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[ELEM:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 +; CHECK-NEXT: [[ADD:%.*]] = add i16 [[ELEM]], [[V]] +; CHECK-NEXT: store i16 [[ADD]], ptr [[ARRAYIDX]], align 2 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv + %elem = load i16, ptr %arrayidx + %add = add i16 %elem, %v + store i16 %add, ptr %arrayidx + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll index 8be98ebf087d5..ff3a071fc507a 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll @@ -168,7 +168,7 @@ define float @cond_cmp_sel(float* noalias %a, float* noalias %cond, i64 %N) { ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.continue6: ; CHECK-NEXT: [[TMP25:%.*]] = phi <4 x float> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP26:%.*]] = select fast <4 x i1> [[TMP2]], <4 x float> [[TMP25]], <4 x float> +; CHECK-NEXT: [[TMP26:%.*]] = select fast <4 x i1> [[TMP2]], <4 x float> [[TMP25]], <4 x float> ; CHECK-NEXT: [[TMP27:%.*]] = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[TMP26]]) ; CHECK-NEXT: [[TMP28]] = call fast float @llvm.minnum.f32(float [[TMP27]], float [[VEC_PHI]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/strided-store-double.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/strided-store-double.ll index 650afc820b729..e5dec5d06eca3 100644 --- a/llvm/test/Transforms/LowerMatrixIntrinsics/strided-store-double.ll +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/strided-store-double.ll @@ -85,4 +85,4 @@ declare void @llvm.matrix.column.major.store.v10f64.i64(<10 x double>, double*, ; CHECK: declare void @llvm.matrix.column.major.store.v6f64.i64(<6 x double>, double* nocapture writeonly, i64, i1 immarg, i32 immarg, i32 immarg) #0 ; CHECK: declare void @llvm.matrix.column.major.store.v10f64.i64(<10 x double>, double* nocapture writeonly, i64, i1 immarg, i32 immarg, i32 immarg) #0 -; CHECK: attributes #0 = { argmemonly nocallback nofree nosync nounwind willreturn writeonly } +; CHECK: attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) } diff --git a/llvm/test/Transforms/MemCpyOpt/intel-lifetime-move.ll b/llvm/test/Transforms/MemCpyOpt/intel-lifetime-move.ll new file mode 100644 index 0000000000000..b27983fe48583 --- /dev/null +++ b/llvm/test/Transforms/MemCpyOpt/intel-lifetime-move.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes="memcpyopt" -S %s | FileCheck %s + +; memcpyopt lifetime skipping, is moving the lifetime.start before the def +; of %tm3. + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.foo = type { ptr, ptr, i64, i64 } + +declare dso_local i32 @pluto(...) + +define dso_local void @wombat(ptr %meow) local_unnamed_addr { +; CHECK-LABEL: @wombat( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TM:%.*]] = alloca [[STRUCT_FOO:%.*]], align 8 +; CHECK-NEXT: [[TM1:%.*]] = alloca [[STRUCT_FOO]], align 8 +; CHECK-NEXT: [[TM2:%.*]] = bitcast ptr [[TM1]] to ptr +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(32) [[TM2]], ptr noundef nonnull align 8 dereferenceable(32) [[MEOW:%.*]], i64 32, i1 false) +; CHECK-NEXT: [[TM3:%.*]] = bitcast ptr [[TM]] to ptr +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[TM3]]) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TM3]], ptr align 8 [[MEOW]], i64 32, i1 false) +; CHECK-NEXT: ret void +; +bb: + %tm = alloca %struct.foo, align 8 + %tm1 = alloca %struct.foo, align 8 + %tm2 = bitcast ptr %tm1 to ptr + call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(32) %tm2, ptr noundef nonnull align 8 dereferenceable(32) %meow, i64 32, i1 false) + %tm3 = bitcast ptr %tm to ptr + call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %tm3) + call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(32) %tm3, ptr noundef nonnull align 8 dereferenceable(32) %tm2, i64 32, i1 false) + ret void +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 + +; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite) +declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #2 + +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } diff --git a/llvm/test/Transforms/MemCpyOpt/lifetime.ll b/llvm/test/Transforms/MemCpyOpt/lifetime.ll index 77b94a7135d01..58e4ab8950553 100644 --- a/llvm/test/Transforms/MemCpyOpt/lifetime.ll +++ b/llvm/test/Transforms/MemCpyOpt/lifetime.ll @@ -55,9 +55,8 @@ define i32 @call_slot_move_lifetime_start() { ; CHECK-LABEL: @call_slot_move_lifetime_start( ; CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[DST:%.*]] = alloca i32, align 4 -; CHECK-NEXT: call void @call(ptr [[TMP]]) ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[DST]]) -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DST]], ptr align 4 [[TMP]], i64 4, i1 false) +; CHECK-NEXT: call void @call(ptr [[DST]]) ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[DST]]) ; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[DST]], align 4 ; CHECK-NEXT: ret i32 [[V]] diff --git a/llvm/test/Transforms/ObjCARC/basic.ll b/llvm/test/Transforms/ObjCARC/basic.ll index 62ce2cffaf115..8178d48fa8c85 100644 --- a/llvm/test/Transforms/ObjCARC/basic.ll +++ b/llvm/test/Transforms/ObjCARC/basic.ll @@ -3073,5 +3073,5 @@ define void @test68(i8* %a, i8* %b) { !5 = !{i32 2, !"Debug Info Version", i32 3} ; CHECK: attributes [[NUW]] = { nounwind } -; CHECK: attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn } +; CHECK: attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; CHECK: ![[RELEASE]] = !{} diff --git a/llvm/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll b/llvm/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll index 03257d2e5429a..a31be00210286 100644 --- a/llvm/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll +++ b/llvm/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll @@ -105,7 +105,7 @@ declare void @NSLog(i8*, ...) declare void @llvm.dbg.value(metadata, metadata, metadata) nounwind readnone ; CHECK: attributes #0 = { ssp uwtable } -; CHECK: attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn } +; CHECK: attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; CHECK: attributes #2 = { nonlazybind } ; CHECK: attributes [[NUW]] = { nounwind } ; CHECK: attributes #4 = { noinline ssp uwtable } diff --git a/llvm/test/Transforms/ObjCARC/nested.ll b/llvm/test/Transforms/ObjCARC/nested.ll index e1c46a0861d42..cc947717de46f 100644 --- a/llvm/test/Transforms/ObjCARC/nested.ll +++ b/llvm/test/Transforms/ObjCARC/nested.ll @@ -821,5 +821,5 @@ entry: ; CHECK: attributes [[NUW]] = { nounwind } -; CHECK: attributes #1 = { argmemonly nocallback nofree nounwind willreturn writeonly } +; CHECK: attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: write) } ; CHECK: attributes #2 = { nonlazybind } diff --git a/llvm/test/Transforms/ObjCARC/rle-s2l.ll b/llvm/test/Transforms/ObjCARC/rle-s2l.ll index e13ff35e91f28..d62147875a558 100644 --- a/llvm/test/Transforms/ObjCARC/rle-s2l.ll +++ b/llvm/test/Transforms/ObjCARC/rle-s2l.ll @@ -135,4 +135,4 @@ define void @test7(i8** %p, i8* %n, i8** %q, i8* %m) { } ; CHECK: attributes #0 = { nounwind } -; CHECK: attributes [[RO]] = { readonly } +; CHECK: attributes [[RO]] = { memory(read) } diff --git a/llvm/test/Transforms/OpenMP/add_attributes.ll b/llvm/test/Transforms/OpenMP/add_attributes.ll index 9d48cd1c7bca5..7ad863c38ef07 100644 --- a/llvm/test/Transforms/OpenMP/add_attributes.ll +++ b/llvm/test/Transforms/OpenMP/add_attributes.ll @@ -1211,67 +1211,67 @@ attributes #0 = { noinline cold } ; CHECK: ; Function Attrs: cold convergent noinline nounwind ; CHECK-NEXT: declare void @__kmpc_barrier_simple_spmd(%struct.ident_t* nocapture nofree readonly, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: write) ; OPTIMISTIC-NEXT: declare dso_local void @omp_set_num_threads(i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: write) ; OPTIMISTIC-NEXT: declare dso_local void @omp_set_dynamic(i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: write) ; OPTIMISTIC-NEXT: declare dso_local void @omp_set_nested(i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: write) ; OPTIMISTIC-NEXT: declare dso_local void @omp_set_max_active_levels(i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: write) ; OPTIMISTIC-NEXT: declare dso_local void @omp_set_schedule(i32, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_threads() ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare dso_local void @use_int(i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_dynamic() -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_nested() -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_max_threads() -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_thread_num() -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_procs() -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_in_parallel() -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_in_final() -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_active_level() -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_level() -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_ancestor_thread_num(i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_team_size(i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_thread_limit() -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_max_active_levels() -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare dso_local void @omp_get_schedule(i32* nocapture writeonly, i32* nocapture writeonly) ; OPTIMISTIC-NOT: Function Attrs @@ -1313,7 +1313,7 @@ attributes #0 = { noinline cold } ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare dso_local void @omp_init_nest_lock_with_hint(%struct.omp_nest_lock_t*, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare dso_local double @omp_get_wtime() ; OPTIMISTIC-NOT: Function Attrs @@ -1340,7 +1340,7 @@ attributes #0 = { noinline cold } ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare dso_local i32 @omp_get_team_num() -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_cancellation() ; OPTIMISTIC-NOT: Function Attrs @@ -1370,25 +1370,25 @@ attributes #0 = { noinline cold } ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare dso_local i32 @omp_get_device_num() -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_proc_bind() -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_places() ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare dso_local i32 @omp_get_place_num_procs(i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare dso_local void @omp_get_place_proc_ids(i32, i32* nocapture writeonly) -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_place_num() -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_partition_num_places() -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local void @omp_get_partition_place_nums(i32*) ; OPTIMISTIC-NOT: Function Attrs @@ -1433,10 +1433,10 @@ attributes #0 = { noinline cold } ; OPTIMISTIC-NOT: Function Attrs ; OPTIMISTIC: declare dso_local i32 @omp_pause_resource_all(i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_supported_active_levels() -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: read) ; OPTIMISTIC-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t* nocapture nofree readonly) ; OPTIMISTIC: ; Function Attrs: nounwind @@ -1445,25 +1445,25 @@ attributes #0 = { noinline cold } ; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_taskwait(%struct.ident_t* nocapture nofree readonly, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_taskyield(%struct.ident_t* nocapture nofree readonly, i32, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_push_num_threads(%struct.ident_t* nocapture nofree readonly, i32, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_push_proc_bind(%struct.ident_t* nocapture nofree readonly, i32, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_serialized_parallel(%struct.ident_t* nocapture nofree readonly, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_end_serialized_parallel(%struct.ident_t* nocapture nofree readonly, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare i32 @__kmpc_master(%struct.ident_t* nocapture nofree readonly, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_end_master(%struct.ident_t* nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: convergent nounwind @@ -1499,43 +1499,43 @@ attributes #0 = { noinline cold } ; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_end_ordered(%struct.ident_t* nocapture nofree readonly, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_init_4(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_init_4u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_init_8(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_fini(%struct.ident_t* nocapture nofree readonly, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_team_static_init_4(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_team_static_init_4u(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_team_static_init_8(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_team_static_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_4(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_4u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_8(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64) ; OPTIMISTIC: ; Function Attrs: convergent nounwind @@ -1556,52 +1556,52 @@ attributes #0 = { noinline cold } ; OPTIMISTIC: ; Function Attrs: convergent nounwind ; OPTIMISTIC-NEXT: declare void @__kmpc_taskgroup(%struct.ident_t* nocapture nofree readonly, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_dist_dispatch_init_4(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32, i32, i32, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_dist_dispatch_init_4u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32, i32, i32, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_dist_dispatch_init_8(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64, i64, i64, i64) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_dist_dispatch_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64, i64, i64, i64) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_init_4(%struct.ident_t* nocapture nofree readonly, i32, i32, i32, i32, i32, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_init_4u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32, i32, i32, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_init_8(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i64, i64, i64) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i64, i64, i64) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare i32 @__kmpc_dispatch_next_4(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare i32 @__kmpc_dispatch_next_4u(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare i32 @__kmpc_dispatch_next_8(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare i32 @__kmpc_dispatch_next_8u(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_fini_4(%struct.ident_t* nocapture nofree readonly, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_fini_4u(%struct.ident_t* nocapture nofree readonly, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_fini_8(%struct.ident_t* nocapture nofree readonly, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_fini_8u(%struct.ident_t* nocapture nofree readonly, i32) ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn @@ -1619,7 +1619,7 @@ attributes #0 = { noinline cold } ; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn ; OPTIMISTIC-NEXT: declare i32 @__kmpc_cancellationpoint(%struct.ident_t* nocapture nofree readonly, i32, i32) -; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) ; OPTIMISTIC-NEXT: declare void @__kmpc_push_num_teams(%struct.ident_t* nocapture nofree readonly, i32, i32, i32) ; OPTIMISTIC: ; Function Attrs: nounwind @@ -1673,7 +1673,7 @@ attributes #0 = { noinline cold } ; OPTIMISTIC: ; Function Attrs: nosync nounwind willreturn ; OPTIMISTIC-NEXT: declare void @__kmpc_destroy_allocator(i32, i8*) -; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly +; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn memory(inaccessiblemem: write) ; OPTIMISTIC-NEXT: declare void @__kmpc_push_target_tripcount_mapper(%struct.ident_t*, i64, i64) ; OPTIMISTIC: ; Function Attrs: convergent nounwind diff --git a/llvm/test/Transforms/OpenMP/parallel_deletion.ll b/llvm/test/Transforms/OpenMP/parallel_deletion.ll index ec2f3e219d21d..108499631d196 100644 --- a/llvm/test/Transforms/OpenMP/parallel_deletion.ll +++ b/llvm/test/Transforms/OpenMP/parallel_deletion.ll @@ -72,7 +72,7 @@ define internal void @.omp_outlined.willreturn.0(i32* noalias %.global_tid., i32 ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.0 ; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @readonly() #[[ATTR4:[0-9]+]] +; CHECK-NEXT: call void @readonly() ; CHECK-NEXT: ret void ; ; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.0 @@ -194,9 +194,9 @@ entry: define internal void @.omp_outlined..0(i32* noalias %.global_tid., i32* noalias %.bound_tid.) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..0 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR4:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @readonly() #[[ATTR4]] +; CHECK-NEXT: call void @readonly() ; CHECK-NEXT: ret void ; ; CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..0 @@ -338,7 +338,7 @@ define internal void @.omp_outlined..3(i32* noalias %.global_tid., i32* noalias ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..3 ; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR6:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR14:[0-9]+]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR9:[0-9]+]] ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; CHECK: if.then: @@ -466,7 +466,7 @@ define internal void @.omp_outlined..5(i32* noalias %.global_tid., i32* noalias ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..5 ; CHECK-SAME: (i32* noalias nocapture nofree readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @[[GLOB0]]) #[[ATTR14]] +; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @[[GLOB0]]) #[[ATTR9]] ; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* noundef nonnull @[[GLOB0]], i32 [[TMP]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 @@ -541,7 +541,7 @@ define internal void @.omp_outlined..6(i32* noalias %.global_tid., i32* noalias ; CHECK-NEXT: [[A1:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 ; CHECK-NEXT: [[TMP:%.*]] = bitcast i32* [[A1]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 [[TMP]]) #[[ATTR0]] +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull align 4 [[TMP]]) #[[ATTR14:[0-9]+]] ; CHECK-NEXT: store i32 1, i32* [[A1]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i32** ; CHECK-NEXT: store i32* [[A1]], i32** [[TMP1]], align 8 diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll index 27f4e3e608ec9..3e0abd5deadbd 100644 --- a/llvm/test/Transforms/OpenMP/remove_globalization.ll +++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll @@ -83,7 +83,7 @@ define internal void @bar() { ; CHECK-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) #[[ATTR0]], !dbg [[DBG8:![0-9]+]] -; CHECK-NEXT: call void @share(i8* nofree [[TMP0]]) #[[ATTR6:[0-9]+]], !dbg [[DBG8]] +; CHECK-NEXT: call void @share(i8* nofree [[TMP0]]) #[[ATTR1]], !dbg [[DBG8]] ; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[TMP0]], i64 4) #[[ATTR0]] ; CHECK-NEXT: ret void ; @@ -91,7 +91,7 @@ define internal void @bar() { ; CHECK-DISABLED-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-DISABLED-NEXT: entry: ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) #[[ATTR0]], !dbg [[DBG8:![0-9]+]] -; CHECK-DISABLED-NEXT: call void @share(i8* nofree [[TMP0]]) #[[ATTR6:[0-9]+]], !dbg [[DBG8]] +; CHECK-DISABLED-NEXT: call void @share(i8* nofree [[TMP0]]) #[[ATTR1]], !dbg [[DBG8]] ; CHECK-DISABLED-NEXT: call void @__kmpc_free_shared(i8* [[TMP0]], i64 4) #[[ATTR0]] ; CHECK-DISABLED-NEXT: ret void ; @@ -257,19 +257,17 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ;. ; CHECK: attributes #[[ATTR0]] = { nounwind } ; CHECK: attributes #[[ATTR1]] = { nosync nounwind } -; CHECK: attributes #[[ATTR2]] = { nounwind readnone } -; CHECK: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind writeonly } +; CHECK: attributes #[[ATTR2]] = { nounwind memory(none) } +; CHECK: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind memory(write) } ; CHECK: attributes #[[ATTR4:[0-9]+]] = { nosync nounwind allocsize(0) } ; CHECK: attributes #[[ATTR5:[0-9]+]] = { "llvm.assume"="omp_no_openmp" } -; CHECK: attributes #[[ATTR6]] = { nosync nounwind writeonly } ;. ; CHECK-DISABLED: attributes #[[ATTR0]] = { nounwind } ; CHECK-DISABLED: attributes #[[ATTR1]] = { nosync nounwind } -; CHECK-DISABLED: attributes #[[ATTR2]] = { nounwind readnone } -; CHECK-DISABLED: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind writeonly } +; CHECK-DISABLED: attributes #[[ATTR2]] = { nounwind memory(none) } +; CHECK-DISABLED: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind memory(write) } ; CHECK-DISABLED: attributes #[[ATTR4:[0-9]+]] = { nosync nounwind allocsize(0) } ; CHECK-DISABLED: attributes #[[ATTR5:[0-9]+]] = { "llvm.assume"="omp_no_openmp" } -; CHECK-DISABLED: attributes #[[ATTR6]] = { nosync nounwind writeonly } ;. ; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 13.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) ; CHECK: [[META1:![0-9]+]] = !DIFile(filename: "remove_globalization.c", directory: "/tmp/remove_globalization.c") diff --git a/llvm/test/Transforms/OpenMP/replace_globalization.ll b/llvm/test/Transforms/OpenMP/replace_globalization.ll index df2cd952cd278..9651c0e0ad471 100644 --- a/llvm/test/Transforms/OpenMP/replace_globalization.ll +++ b/llvm/test/Transforms/OpenMP/replace_globalization.ll @@ -150,8 +150,8 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) ; CHECK-NEXT: [[X:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i64 4) #[[ATTR6:[0-9]+]] ; CHECK-NEXT: call void @unknown_no_openmp() -; CHECK-NEXT: call void @use.internalized(i8* nofree [[X]]) #[[ATTR7:[0-9]+]] -; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[X]], i64 4) #[[ATTR8:[0-9]+]] +; CHECK-NEXT: call void @use.internalized(i8* nofree [[X]]) #[[ATTR6]] +; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[X]], i64 4) #[[ATTR6]] ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; CHECK-NEXT: ret void ; @@ -163,14 +163,14 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], -1 ; CHECK-NEXT: br i1 [[CMP]], label [[MASTER1:%.*]], label [[EXIT:%.*]] ; CHECK: master1: -; CHECK-NEXT: call void @use.internalized(i8* nofree addrspacecast (i8 addrspace(3)* getelementptr inbounds ([16 x i8], [16 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*)) #[[ATTR7]] +; CHECK-NEXT: call void @use.internalized(i8* nofree addrspacecast (i8 addrspace(3)* getelementptr inbounds ([16 x i8], [16 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*)) #[[ATTR6]] ; CHECK-NEXT: br label [[NEXT:%.*]] ; CHECK: next: ; CHECK-NEXT: call void @unknown_no_openmp() ; CHECK-NEXT: [[B0:%.*]] = icmp eq i32 [[C]], -1 ; CHECK-NEXT: br i1 [[B0]], label [[MASTER2:%.*]], label [[EXIT]] ; CHECK: master2: -; CHECK-NEXT: call void @use.internalized(i8* nofree addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @y_shared, i32 0, i32 0) to i8*)) #[[ATTR7]] +; CHECK-NEXT: call void @use.internalized(i8* nofree addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @y_shared, i32 0, i32 0) to i8*)) #[[ATTR6]] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -185,15 +185,15 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ; CHECK-NEXT: br i1 [[C0]], label [[MASTER3:%.*]], label [[EXIT:%.*]] ; CHECK: master3: ; CHECK-NEXT: [[Z:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i64 24) #[[ATTR6]], !dbg [[DBG10:![0-9]+]] -; CHECK-NEXT: call void @use.internalized(i8* nofree [[Z]]) #[[ATTR7]] -; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[Z]], i64 24) #[[ATTR8]] +; CHECK-NEXT: call void @use.internalized(i8* nofree [[Z]]) #[[ATTR6]] +; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[Z]], i64 24) #[[ATTR6]] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: nofree norecurse nounwind writeonly +; CHECK: Function Attrs: nofree norecurse nounwind memory(write) ; CHECK-LABEL: define {{[^@]+}}@use.internalized ; CHECK-SAME: (i8* nofree [[X:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: @@ -208,6 +208,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ; CHECK-NEXT: ret void ; ; +; CHECK: Function Attrs: nosync nounwind allocsize(0) memory(read) ; CHECK-LABEL: define {{[^@]+}}@__kmpc_alloc_shared ; CHECK-SAME: (i64 [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: [[L:%.*]] = load i32, i32* @offset, align 4 @@ -216,14 +217,12 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ; ;. ; CHECK: attributes #[[ATTR0]] = { "kernel" } -; CHECK: attributes #[[ATTR1]] = { nofree norecurse nounwind writeonly } -; CHECK: attributes #[[ATTR2]] = { nosync nounwind readonly allocsize(0) } +; CHECK: attributes #[[ATTR1]] = { nofree norecurse nounwind memory(write) } +; CHECK: attributes #[[ATTR2]] = { nosync nounwind allocsize(0) memory(read) } ; CHECK: attributes #[[ATTR3:[0-9]+]] = { nosync nounwind } -; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind readnone speculatable willreturn } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; CHECK: attributes #[[ATTR5:[0-9]+]] = { "llvm.assume"="omp_no_openmp" } -; CHECK: attributes #[[ATTR6]] = { nounwind readonly } -; CHECK: attributes #[[ATTR7]] = { nounwind writeonly } -; CHECK: attributes #[[ATTR8]] = { nounwind } +; CHECK: attributes #[[ATTR6]] = { nounwind } ;. ; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) ; CHECK: [[META1:![0-9]+]] = !DIFile(filename: "replace_globalization.c", directory: "/tmp/replace_globalization.c") diff --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll index 0841600da2456..c707256b12c96 100644 --- a/llvm/test/Transforms/OpenMP/spmdization.ll +++ b/llvm/test/Transforms/OpenMP/spmdization.ll @@ -702,9 +702,9 @@ define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__2 ; AMDGPU-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 4, addrspace(5) +; AMDGPU-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5) ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast i8 addrspace(5)* [[TMP0]] to i8* +; AMDGPU-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast i8 addrspace(5)* [[X_H2S]] to i8* ; AMDGPU-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[MALLOC_CAST]] to i32* ; AMDGPU-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR7]] ; AMDGPU-NEXT: br label [[FOR_COND:%.*]] @@ -716,18 +716,18 @@ define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias ; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]] ; AMDGPU-NEXT: ret void ; AMDGPU: for.body: -; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP2]], i64 0) +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP1]], i64 0) ; AMDGPU-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; AMDGPU-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2 ; NVPTX-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 4 +; NVPTX-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4 ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[TMP0]] to i32* +; NVPTX-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[X_H2S]] to i32* ; NVPTX-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR7]] ; NVPTX-NEXT: br label [[FOR_COND:%.*]] ; NVPTX: for.cond: @@ -738,18 +738,18 @@ define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias ; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-NEXT: ret void ; NVPTX: for.body: -; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP2]], i64 0) +; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP1]], i64 0) ; NVPTX-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; NVPTX-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2 ; AMDGPU-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 4, addrspace(5) +; AMDGPU-DISABLED-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5) ; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; AMDGPU-DISABLED-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast i8 addrspace(5)* [[TMP0]] to i8* +; AMDGPU-DISABLED-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast i8 addrspace(5)* [[X_H2S]] to i8* ; AMDGPU-DISABLED-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[MALLOC_CAST]] to i32* ; AMDGPU-DISABLED-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR7]] ; AMDGPU-DISABLED-NEXT: br label [[FOR_COND:%.*]] @@ -761,18 +761,18 @@ define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias ; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: for.body: -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* @__omp_outlined__3_wrapper.ID, i8** [[TMP2]], i64 0) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* @__omp_outlined__3_wrapper.ID, i8** [[TMP1]], i64 0) ; AMDGPU-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; AMDGPU-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__2 ; NVPTX-DISABLED-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 4 +; NVPTX-DISABLED-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4 ; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; NVPTX-DISABLED-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[TMP0]] to i32* +; NVPTX-DISABLED-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[X_H2S]] to i32* ; NVPTX-DISABLED-NEXT: call void @use(i32* nocapture [[X_ON_STACK]]) #[[ATTR7]] ; NVPTX-DISABLED-NEXT: br label [[FOR_COND:%.*]] ; NVPTX-DISABLED: for.cond: @@ -783,9 +783,9 @@ define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias ; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: for.body: -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* @__omp_outlined__3_wrapper.ID, i8** [[TMP2]], i64 0) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* @__omp_outlined__3_wrapper.ID, i8** [[TMP1]], i64 0) ; NVPTX-DISABLED-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 ; NVPTX-DISABLED-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] ; @@ -2426,7 +2426,7 @@ attributes #11 = { convergent } ; AMDGPU: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) } ; AMDGPU: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" } ; AMDGPU: attributes #[[ATTR8]] = { convergent } -; AMDGPU: attributes #[[ATTR9:[0-9]+]] = { argmemonly nocallback nofree nosync nounwind willreturn } +; AMDGPU: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } ; AMDGPU: attributes #[[ATTR10:[0-9]+]] = { alwaysinline } ; AMDGPU: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind } ;. @@ -2439,7 +2439,7 @@ attributes #11 = { convergent } ; NVPTX: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) } ; NVPTX: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" } ; NVPTX: attributes #[[ATTR8]] = { convergent } -; NVPTX: attributes #[[ATTR9:[0-9]+]] = { argmemonly nocallback nofree nosync nounwind willreturn } +; NVPTX: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } ; NVPTX: attributes #[[ATTR10:[0-9]+]] = { alwaysinline } ; NVPTX: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind } ;. @@ -2452,7 +2452,7 @@ attributes #11 = { convergent } ; AMDGPU-DISABLED: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) } ; AMDGPU-DISABLED: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" } ; AMDGPU-DISABLED: attributes #[[ATTR8]] = { convergent } -; AMDGPU-DISABLED: attributes #[[ATTR9:[0-9]+]] = { argmemonly nocallback nofree nosync nounwind willreturn } +; AMDGPU-DISABLED: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } ; AMDGPU-DISABLED: attributes #[[ATTR10:[0-9]+]] = { alwaysinline } ; AMDGPU-DISABLED: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind } ;. @@ -2465,7 +2465,7 @@ attributes #11 = { convergent } ; NVPTX-DISABLED: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) } ; NVPTX-DISABLED: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" } ; NVPTX-DISABLED: attributes #[[ATTR8]] = { convergent } -; NVPTX-DISABLED: attributes #[[ATTR9:[0-9]+]] = { argmemonly nocallback nofree nosync nounwind willreturn } +; NVPTX-DISABLED: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } ; NVPTX-DISABLED: attributes #[[ATTR10:[0-9]+]] = { alwaysinline } ; NVPTX-DISABLED: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind } ;. diff --git a/llvm/test/Transforms/OpenMP/spmdization_assumes.ll b/llvm/test/Transforms/OpenMP/spmdization_assumes.ll index 6c7fcd30f3de6..3d58485871918 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_assumes.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_assumes.ll @@ -146,7 +146,7 @@ attributes #6 = { convergent nounwind "llvm.assume"="ompx_spmd_amenable" } !11 = !{!"Simple C/C++ TBAA"} ;. ; CHECK: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -; CHECK: attributes #[[ATTR1]] = { alwaysinline mustprogress nofree norecurse nosync nounwind readnone willreturn "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +; CHECK: attributes #[[ATTR1]] = { alwaysinline mustprogress nofree norecurse nosync nounwind willreturn memory(none) "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } ; CHECK: attributes #[[ATTR2]] = { norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } ; CHECK: attributes #[[ATTR3]] = { nounwind } ; CHECK: attributes #[[ATTR4:[0-9]+]] = { alwaysinline } diff --git a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll index fa5a2ee8c7f73..411d73a066c02 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll @@ -396,11 +396,11 @@ attributes #5 = { convergent nounwind "llvm.assume"="omp_no_openmp,ompx_spmd_ame ; CHECK: attributes #[[ATTR0]] = { convergent norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } ; CHECK: attributes #[[ATTR1:[0-9]+]] = { alwaysinline } ; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="all" "llvm.assume"="omp_no_openmp,ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } -; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nounwind readonly willreturn "frame-pointer"="all" "llvm.assume"="ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="all" "llvm.assume"="ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } ; CHECK: attributes #[[ATTR4:[0-9]+]] = { nosync nounwind allocsize(0) } ; CHECK: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind } ; CHECK: attributes #[[ATTR6]] = { nounwind } -; CHECK: attributes #[[ATTR7:[0-9]+]] = { inaccessiblememonly nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } ; CHECK: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind } ; CHECK: attributes #[[ATTR9]] = { nounwind willreturn } ; CHECK: attributes #[[ATTR10]] = { convergent nounwind "llvm.assume"="omp_no_openmp,ompx_spmd_amenable" } @@ -408,11 +408,11 @@ attributes #5 = { convergent nounwind "llvm.assume"="omp_no_openmp,ompx_spmd_ame ; CHECK-DISABLED: attributes #[[ATTR0]] = { convergent norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } ; CHECK-DISABLED: attributes #[[ATTR1:[0-9]+]] = { alwaysinline } ; CHECK-DISABLED: attributes #[[ATTR2:[0-9]+]] = { convergent "frame-pointer"="all" "llvm.assume"="omp_no_openmp,ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } -; CHECK-DISABLED: attributes #[[ATTR3:[0-9]+]] = { convergent nounwind readonly willreturn "frame-pointer"="all" "llvm.assume"="ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } +; CHECK-DISABLED: attributes #[[ATTR3:[0-9]+]] = { convergent nounwind willreturn memory(read) "frame-pointer"="all" "llvm.assume"="ompx_spmd_amenable" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } ; CHECK-DISABLED: attributes #[[ATTR4:[0-9]+]] = { nosync nounwind allocsize(0) } ; CHECK-DISABLED: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind } ; CHECK-DISABLED: attributes #[[ATTR6]] = { nounwind } -; CHECK-DISABLED: attributes #[[ATTR7:[0-9]+]] = { inaccessiblememonly nocallback nofree nosync nounwind willreturn } +; CHECK-DISABLED: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } ; CHECK-DISABLED: attributes #[[ATTR8:[0-9]+]] = { convergent nounwind } ; CHECK-DISABLED: attributes #[[ATTR9]] = { nounwind willreturn } ; CHECK-DISABLED: attributes #[[ATTR10]] = { convergent nounwind "llvm.assume"="omp_no_openmp,ompx_spmd_amenable" } diff --git a/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll index 5896d4166e2cb..e49de449e5113 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll @@ -155,7 +155,7 @@ define weak void @__omp_offloading_2b_10393b5_generic_l20() #0 { ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: -; CHECK-NEXT: call void @generic_helper() #[[ATTR6:[0-9]+]] +; CHECK-NEXT: call void @generic_helper() #[[ATTR5]] ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) ; CHECK-NEXT: ret void ; CHECK: worker.exit: @@ -168,7 +168,7 @@ define weak void @__omp_offloading_2b_10393b5_generic_l20() #0 { ; CHECK-DISABLE-SPMDIZATION-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK-DISABLE-SPMDIZATION: user_code.entry: -; CHECK-DISABLE-SPMDIZATION-NEXT: call void @generic_helper() #[[ATTR6:[0-9]+]] +; CHECK-DISABLE-SPMDIZATION-NEXT: call void @generic_helper() #[[ATTR5]] ; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; CHECK-DISABLE-SPMDIZATION-NEXT: ret void ; CHECK-DISABLE-SPMDIZATION: worker.exit: @@ -194,7 +194,7 @@ define internal void @spmd_helper() #1 { ; CHECK-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; CHECK-NEXT: call void @leaf() #[[ATTR6]] +; CHECK-NEXT: call void @leaf() #[[ATTR5]] ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR2:[0-9]+]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP1]], i64 0) @@ -204,7 +204,7 @@ define internal void @spmd_helper() #1 { ; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-DISABLE-SPMDIZATION-NEXT: entry: ; CHECK-DISABLE-SPMDIZATION-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -; CHECK-DISABLE-SPMDIZATION-NEXT: call void @leaf() #[[ATTR6]] +; CHECK-DISABLE-SPMDIZATION-NEXT: call void @leaf() #[[ATTR5]] ; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) #[[ATTR2:[0-9]+]] ; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** ; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* @__omp_outlined___wrapper.ID, i8** [[TMP1]], i64 0) @@ -226,7 +226,7 @@ define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-NEXT: call void @unknown() #[[ATTR7:[0-9]+]] +; CHECK-NEXT: call void @unknown() #[[ATTR6:[0-9]+]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__omp_outlined__ @@ -234,7 +234,7 @@ define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias ; CHECK-DISABLE-SPMDIZATION-NEXT: entry: ; CHECK-DISABLE-SPMDIZATION-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 ; CHECK-DISABLE-SPMDIZATION-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -; CHECK-DISABLE-SPMDIZATION-NEXT: call void @unknown() #[[ATTR7:[0-9]+]] +; CHECK-DISABLE-SPMDIZATION-NEXT: call void @unknown() #[[ATTR6:[0-9]+]] ; CHECK-DISABLE-SPMDIZATION-NEXT: ret void ; entry: @@ -328,13 +328,13 @@ define internal void @generic_helper() #1 { ; CHECK-LABEL: define {{[^@]+}}@generic_helper ; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @leaf() #[[ATTR6]] +; CHECK-NEXT: call void @leaf() #[[ATTR5]] ; CHECK-NEXT: ret void ; ; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@generic_helper ; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR4]] { ; CHECK-DISABLE-SPMDIZATION-NEXT: entry: -; CHECK-DISABLE-SPMDIZATION-NEXT: call void @leaf() #[[ATTR6]] +; CHECK-DISABLE-SPMDIZATION-NEXT: call void @leaf() #[[ATTR5]] ; CHECK-DISABLE-SPMDIZATION-NEXT: ret void ; entry: @@ -371,19 +371,17 @@ attributes #5 = { convergent } ; CHECK: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } ; CHECK: attributes #[[ATTR2]] = { nounwind } ; CHECK: attributes #[[ATTR3:[0-9]+]] = { alwaysinline } -; CHECK: attributes #[[ATTR4]] = { convergent noinline nounwind writeonly "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } +; CHECK: attributes #[[ATTR4]] = { convergent noinline nounwind memory(write) "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } ; CHECK: attributes #[[ATTR5]] = { convergent nounwind } -; CHECK: attributes #[[ATTR6]] = { convergent nounwind writeonly } -; CHECK: attributes #[[ATTR7]] = { convergent } +; CHECK: attributes #[[ATTR6]] = { convergent } ;. ; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } ; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } ; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR2]] = { nounwind } ; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR3:[0-9]+]] = { alwaysinline } -; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR4]] = { convergent noinline nounwind writeonly "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } +; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR4]] = { convergent noinline nounwind memory(write) "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } ; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR5]] = { convergent nounwind } -; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR6]] = { convergent nounwind writeonly } -; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR7]] = { convergent } +; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR6]] = { convergent } ;. ; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"spmd", i32 12, i32 0} ; CHECK: [[META1:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"generic", i32 20, i32 1} diff --git a/llvm/test/Transforms/PGOProfile/consecutive-zeros.ll b/llvm/test/Transforms/PGOProfile/consecutive-zeros.ll index a388cbc37d5a6..6634838d21e1a 100644 --- a/llvm/test/Transforms/PGOProfile/consecutive-zeros.ll +++ b/llvm/test/Transforms/PGOProfile/consecutive-zeros.ll @@ -1,6 +1,5 @@ -; REQUIRES: asserts ; RUN: llvm-profdata merge %S/Inputs/consecutive-zeros.proftext -o %t.profdata -; RUN: opt < %s -debug -passes=pgo-instr-use,pgo-memop-opt -pgo-memop-count-threshold=0 -pgo-memop-percent-threshold=0 -pgo-test-profile-file=%t.profdata -S 2>&1 | FileCheck %s +; RUN: opt < %s -passes=pgo-instr-use,pgo-memop-opt -pgo-memop-count-threshold=0 -pgo-memop-percent-threshold=0 -pgo-test-profile-file=%t.profdata -S 2>&1 | FileCheck %s target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/PGOProfile/critical-edge-threshold.ll b/llvm/test/Transforms/PGOProfile/critical-edge-threshold.ll new file mode 100644 index 0000000000000..d70fbaa791057 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/critical-edge-threshold.ll @@ -0,0 +1,27 @@ +; Test the critical edge threahold +; RUN: opt < %s -passes=pgo-instr-gen -pgo-critical-edge-threshold=1 -pgo-instrument-entry=true -S | FileCheck %s + +@sum = dso_local global i32 0, align 4 + +define void @foo(i32 %a, i32 %b) { +entry: + %tobool.not = icmp eq i32 %a, 0 + br i1 %tobool.not, label %if.end4, label %if.then + +if.then: + %0 = load i32, ptr @sum, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, ptr @sum, align 4 + %tobool1.not = icmp eq i32 %b, 0 + br i1 %tobool1.not, label %if.end4, label %if.then2 + +if.then2: + %inc3 = add nsw i32 %0, 2 + store i32 %inc3, ptr @sum, align 4 + br label %if.end4 + +if.end4: + ret void +} + +; CHECK-NOT: call void @llvm.instrprof.increment(ptr @__profn_foo diff --git a/llvm/test/Transforms/RewriteStatepointsForGC/X86/intrinsic-attributes.ll b/llvm/test/Transforms/RewriteStatepointsForGC/X86/intrinsic-attributes.ll index b0d6531f05342..ce8d977b8d4a5 100644 --- a/llvm/test/Transforms/RewriteStatepointsForGC/X86/intrinsic-attributes.ll +++ b/llvm/test/Transforms/RewriteStatepointsForGC/X86/intrinsic-attributes.ll @@ -1,6 +1,6 @@ ; RUN: opt < %s -S -rewrite-statepoints-for-gc | FileCheck %s -; CHECK: Function Attrs: nocallback nofree nosync nounwind readnone willreturn +; CHECK: Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) ; CHECK: declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) diff --git a/llvm/test/Transforms/RewriteStatepointsForGC/statepoint-attrs.ll b/llvm/test/Transforms/RewriteStatepointsForGC/statepoint-attrs.ll index 1580b09f25882..1eb6cefc4dcd8 100644 --- a/llvm/test/Transforms/RewriteStatepointsForGC/statepoint-attrs.ll +++ b/llvm/test/Transforms/RewriteStatepointsForGC/statepoint-attrs.ll @@ -20,6 +20,6 @@ define void @test1(i8 addrspace(1)* %arg) gc "statepoint-example" { attributes #1 = { norecurse noimplicitfloat } ;. -; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone } +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind memory(none) } ; CHECK: attributes #[[ATTR1]] = { noimplicitfloat norecurse } ;. diff --git a/llvm/test/Transforms/SCCP/conditions-ranges.ll b/llvm/test/Transforms/SCCP/conditions-ranges.ll index 3d4b529dbc79e..e108d63e9091f 100644 --- a/llvm/test/Transforms/SCCP/conditions-ranges.ll +++ b/llvm/test/Transforms/SCCP/conditions-ranges.ll @@ -1369,3 +1369,12 @@ bb139: ; preds = %bb135 bb142: ; preds = %bb139 ret void } + +define i1 @ptr_icmp_data_layout() { +; CHECK-LABEL: @ptr_icmp_data_layout( +; CHECK-NEXT: ret i1 false +; + %a.end = getelementptr i32, ptr @A, i64 1 + %cmp = icmp eq ptr %a.end, @A + ret i1 %cmp +} diff --git a/llvm/test/Transforms/SCCP/ipscp-drop-argmemonly.ll b/llvm/test/Transforms/SCCP/ipscp-drop-argmemonly.ll index 015c9afb3720e..22726e0cac1f1 100644 --- a/llvm/test/Transforms/SCCP/ipscp-drop-argmemonly.ll +++ b/llvm/test/Transforms/SCCP/ipscp-drop-argmemonly.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals ; RUN: opt -passes=ipsccp -S %s | FileCheck %s ; Test cases to ensure argmemonly/inaccessiblemem_or_argmemonly attributes are @@ -10,8 +10,11 @@ ; Here the pointer argument %arg will be replaced by a constant. We need to ; drop argmemonly. +;. +; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = internal global i32 0 +;. define internal void @ptrarg.1(ptr %arg, i32 %val) argmemonly nounwind { -; CHECK: Function Attrs: nounwind +; CHECK: Function Attrs: nounwind memory(readwrite, inaccessiblemem: none) ; CHECK-LABEL: @ptrarg.1( ; CHECK-NEXT: store i32 10, ptr @g, align 4 ; CHECK-NEXT: ret void @@ -37,7 +40,7 @@ define i32 @caller.1(i32 %n) { ; Here only the non-pointer argument %val is replaced, no need ; to drop the argmemonly attribute. define internal void @ptrarg.2(ptr %arg, i32 %val) argmemonly nounwind { -; CHECK: Function Attrs: argmemonly nounwind +; CHECK: Function Attrs: nounwind memory(argmem: readwrite) ; CHECK-LABEL: @ptrarg.2( ; CHECK-NEXT: store i32 10, ptr [[ARG:%.*]], align 4 ; CHECK-NEXT: ret void @@ -59,7 +62,7 @@ define void @caller.2(ptr %ptr) { ; Here the pointer argument %arg will be replaced by a constant. We need to ; drop inaccessiblemem_or_argmemonly. define internal void @ptrarg.3(ptr %arg, i32 %val) inaccessiblemem_or_argmemonly nounwind { -; CHECK: Function Attrs: nounwind +; CHECK: Function Attrs: nounwind memory(readwrite) ; CHECK-LABEL: @ptrarg.3( ; CHECK-NEXT: store i32 10, ptr @g, align 4 ; CHECK-NEXT: ret void @@ -85,7 +88,7 @@ define i32 @caller.3(i32 %n) { ; Here only the non-pointer argument %val is replaced, no need ; to drop the inaccessiblemem_or_argmemonly attribute. define internal void @ptrarg.4(ptr %arg, i32 %val) inaccessiblemem_or_argmemonly nounwind { -; CHECK: Function Attrs: inaccessiblemem_or_argmemonly nounwind +; CHECK: Function Attrs: nounwind memory(argmem: readwrite, inaccessiblemem: readwrite) ; CHECK-LABEL: @ptrarg.4( ; CHECK-NEXT: store i32 10, ptr [[ARG:%.*]], align 4 ; CHECK-NEXT: ret void @@ -107,7 +110,7 @@ define void @caller.4(ptr %ptr) { ; Here the pointer argument %arg will be replaced by a constant. We need to ; drop inaccessiblemem_or_argmemonly. define internal void @ptrarg.5(ptr %arg, i32 %val) argmemonly inaccessiblemem_or_argmemonly nounwind { -; CHECK: Function Attrs: nounwind +; CHECK: Function Attrs: nounwind memory(readwrite, inaccessiblemem: none) ; CHECK-LABEL: @ptrarg.5( ; CHECK-NEXT: store i32 10, ptr @g, align 4 ; CHECK-NEXT: ret void @@ -143,10 +146,10 @@ define internal void @ptrarg.6.cs.attributes(ptr %arg, i32 %val) { define i32 @caller.6.cs.attributes(i32 %n) { ; CHECK-LABEL: @caller.6.cs.attributes( ; CHECK-NEXT: store i32 1, ptr @g, align 4 -; CHECK-NEXT: tail call void @ptrarg.5(ptr @g, i32 10) [[NOUNWIND:#[0-9]+]] -; CHECK-NEXT: tail call void @ptrarg.5(ptr @g, i32 10) [[NOUNWIND:#[0-9]+]] -; CHECK-NEXT: tail call void @ptrarg.5(ptr @g, i32 10) [[NOUNWIND:#[0-9]+]] -; CHECK-NEXT: tail call void @ptrarg.5(ptr @g, i32 10) [[NOUNWIND:#[0-9]+]] +; CHECK-NEXT: tail call void @ptrarg.5(ptr @g, i32 10) #[[ATTR0:[0-9]+]] +; CHECK-NEXT: tail call void @ptrarg.5(ptr @g, i32 10) #[[ATTR2:[0-9]+]] +; CHECK-NEXT: tail call void @ptrarg.5(ptr @g, i32 10) #[[ATTR0]] +; CHECK-NEXT: tail call void @ptrarg.5(ptr @g, i32 10) #[[ATTR4:[0-9]+]] ; CHECK-NEXT: [[G_VAL:%.*]] = load i32, ptr @g, align 4 ; CHECK-NEXT: ret i32 [[G_VAL]] ; @@ -159,4 +162,10 @@ define i32 @caller.6.cs.attributes(i32 %n) { ret i32 %g.val } -; CHECK: [[NOUNWIND]] = { nounwind } +;. +; CHECK: attributes #[[ATTR0]] = { nounwind memory(readwrite, inaccessiblemem: none) } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind memory(argmem: readwrite) } +; CHECK: attributes #[[ATTR2]] = { nounwind memory(readwrite) } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { nounwind memory(argmem: readwrite, inaccessiblemem: readwrite) } +; CHECK: attributes #[[ATTR4]] = { nounwind } +;. diff --git a/llvm/test/Transforms/SCCP/remove-call-inst.ll b/llvm/test/Transforms/SCCP/remove-call-inst.ll index b4ab128bffa28..6881b82a938fb 100644 --- a/llvm/test/Transforms/SCCP/remove-call-inst.ll +++ b/llvm/test/Transforms/SCCP/remove-call-inst.ll @@ -36,4 +36,4 @@ return: } ; CHECK: attributes #0 = { noreturn nounwind } -; CHECK: attributes #1 = { nounwind readnone willreturn } +; CHECK: attributes #1 = { nounwind willreturn memory(none) } diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/phi-use-order-scalable.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/phi-use-order-scalable.ll new file mode 100644 index 0000000000000..8e30d1865bf61 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/phi-use-order-scalable.ll @@ -0,0 +1,78 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=slp-vectorizer -S < %s | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-arm-none-eabi" + +define @scalable(i1 %c, i32 %srcALen, i32 %srcBLen) { +; CHECK-LABEL: @scalable( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: br label [[IF_END12:%.*]] +; CHECK: if.else: +; CHECK-NEXT: br label [[IF_END12]] +; CHECK: if.end12: +; CHECK-NEXT: [[SRCALEN_ADDR_0:%.*]] = phi i32 [ [[SRCALEN:%.*]], [[IF_THEN]] ], [ [[SRCBLEN:%.*]], [[IF_ELSE]] ] +; CHECK-NEXT: [[SRCBLEN_ADDR_0:%.*]] = phi i32 [ [[SRCBLEN]], [[IF_THEN]] ], [ [[SRCALEN]], [[IF_ELSE]] ] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT78:%.*]] = insertelement poison, i32 [[SRCBLEN_ADDR_0]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT82:%.*]] = insertelement poison, i32 [[SRCALEN_ADDR_0]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT83:%.*]] = shufflevector [[BROADCAST_SPLATINSERT82]], poison, zeroinitializer +; CHECK-NEXT: ret [[BROADCAST_SPLAT83]] +; +entry: + br i1 %c, label %if.then, label %if.else + +if.then: ; preds = %entry + br label %if.end12 + +if.else: ; preds = %entry + br label %if.end12 + +if.end12: ; preds = %if.else, %if.then + %srcALen.addr.0 = phi i32 [ %srcALen, %if.then ], [ %srcBLen, %if.else ] + %srcBLen.addr.0 = phi i32 [ %srcBLen, %if.then ], [ %srcALen, %if.else ] + %broadcast.splatinsert78 = insertelement poison, i32 %srcBLen.addr.0, i64 0 + %broadcast.splatinsert82 = insertelement poison, i32 %srcALen.addr.0, i64 0 + %broadcast.splat83 = shufflevector %broadcast.splatinsert82, poison, zeroinitializer + ret %broadcast.splat83 +} + +define @multiuse(i1 %c, i32 %srcALen, i32 %srcBLen) { +; CHECK-LABEL: @multiuse( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: br label [[IF_END12:%.*]] +; CHECK: if.else: +; CHECK-NEXT: br label [[IF_END12]] +; CHECK: if.end12: +; CHECK-NEXT: [[SRCALEN_ADDR_0:%.*]] = phi i32 [ [[SRCALEN:%.*]], [[IF_THEN]] ], [ [[SRCBLEN:%.*]], [[IF_ELSE]] ] +; CHECK-NEXT: [[SRCBLEN_ADDR_0:%.*]] = phi i32 [ [[SRCBLEN]], [[IF_THEN]] ], [ [[SRCALEN]], [[IF_ELSE]] ] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT78:%.*]] = insertelement poison, i32 [[SRCBLEN_ADDR_0]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT82:%.*]] = insertelement poison, i32 [[SRCALEN_ADDR_0]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT83:%.*]] = shufflevector [[BROADCAST_SPLATINSERT82]], poison, zeroinitializer +; CHECK-NEXT: [[X:%.*]] = add i32 [[SRCALEN_ADDR_0]], [[SRCBLEN_ADDR_0]] +; CHECK-NEXT: [[BROADCAST_SPLAT84:%.*]] = insertelement [[BROADCAST_SPLAT83]], i32 [[SRCBLEN_ADDR_0]], i64 1 +; CHECK-NEXT: ret [[BROADCAST_SPLAT84]] +; +entry: + br i1 %c, label %if.then, label %if.else + +if.then: ; preds = %entry + br label %if.end12 + +if.else: ; preds = %entry + br label %if.end12 + +if.end12: ; preds = %if.else, %if.then + %srcALen.addr.0 = phi i32 [ %srcALen, %if.then ], [ %srcBLen, %if.else ] + %srcBLen.addr.0 = phi i32 [ %srcBLen, %if.then ], [ %srcALen, %if.else ] + %broadcast.splatinsert78 = insertelement poison, i32 %srcBLen.addr.0, i64 0 + %broadcast.splatinsert82 = insertelement poison, i32 %srcALen.addr.0, i64 0 + %broadcast.splat83 = shufflevector %broadcast.splatinsert82, poison, zeroinitializer + %x = add i32 %srcALen.addr.0, %srcBLen.addr.0 + %broadcast.splat84 = insertelement %broadcast.splat83, i32 %srcBLen.addr.0, i64 1 + ret %broadcast.splat84 +} + diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll index f870fb3a9bc1f..5dff4be7493f9 100644 --- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll @@ -63,8 +63,8 @@ define <4 x half> @phis_reverse(i1 %cmp1, <4 x half> %in1, <4 x half> %in2) { ; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x half> [[IN1]], i64 1 ; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x half> [[IN1]], i64 2 ; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x half> [[IN1]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x half> poison, half [[A0]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> [[TMP0]], half [[A1]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x half> poison, half [[A1]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> [[TMP0]], half [[A0]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x half> poison, half [[A2]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x half> [[TMP2]], half [[A3]], i32 1 ; CHECK-NEXT: br i1 [[CMP:%.*]], label [[BB1:%.*]], label [[BB0:%.*]] @@ -73,15 +73,15 @@ define <4 x half> @phis_reverse(i1 %cmp1, <4 x half> %in1, <4 x half> %in2) { ; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x half> [[IN2]], i64 1 ; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x half> [[IN2]], i64 2 ; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x half> [[IN2]], i64 3 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x half> poison, half [[B0]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x half> [[TMP4]], half [[B1]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x half> poison, half [[B1]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x half> [[TMP4]], half [[B0]], i32 1 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x half> poison, half [[B2]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x half> [[TMP6]], half [[B3]], i32 1 ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: ; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x half> [ [[TMP1]], %entry ], [ [[TMP5]], %bb0 ] ; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x half> [ [[TMP3]], %entry ], [ [[TMP7]], %bb0 ] -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x half> [[TMP8]], <2 x half> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x half> [[TMP8]], <2 x half> poison, <4 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x half> [[TMP9]], <2 x half> poison, <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x half> [[TMP10]], <4 x half> [[TMP11]], <4 x i32> ; CHECK-NEXT: ret <4 x half> [[TMP12]] diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll index f39b03c01dc6a..cb868d440b69d 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll @@ -64,7 +64,7 @@ entry: ret void } -; CHECK-IL: Function Attrs: inaccessiblememonly nocallback nofree nosync nounwind willreturn +; CHECK-IL: Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) ; CHECK-IL-NEXT: declare void @llvm.pseudoprobe(i64, i64, i32, i64) ; CHECK-IL: ![[#FOO:]] = distinct !DISubprogram(name: "foo" diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/invalidate-block-and-loop-dispositions.ll b/llvm/test/Transforms/SimpleLoopUnswitch/invalidate-block-and-loop-dispositions.ll index c7ce403f808eb..fcef88667449f 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/invalidate-block-and-loop-dispositions.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/invalidate-block-and-loop-dispositions.ll @@ -110,3 +110,31 @@ inner: } declare i16 @bar() + +define void @pr58751(i16 %a, ptr %dst) { +entry: + %c.1 = icmp eq i16 %a, 0 + br label %outer.header + +outer.header: + %outer.iv = phi i16 [ %a, %entry ], [ %outer.iv.next, %outer.latch ] + br label %inner.header + +inner.header: + %inner.iv = phi i16 [ %outer.iv, %outer.header ], [ %inner.iv.next, %inner.latch ] + br i1 %c.1, label %outer.latch, label %inner.latch + +inner.latch: + %inner.iv.next = add nsw i16 %inner.iv, 1 + store i16 %inner.iv.next, ptr %dst, align 1 + %c.2 = icmp eq i16 %inner.iv.next, 0 + br i1 %c.2, label %exit, label %inner.header + +outer.latch: + %outer.iv.next = add nsw i16 %outer.iv, 1 + br label %outer.header + +exit: + ret void +} + diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-skip-selects-in-guards.ll b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-skip-selects-in-guards.ll new file mode 100644 index 0000000000000..91dd181b0a98a --- /dev/null +++ b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-skip-selects-in-guards.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes='loop(simple-loop-unswitch),verify' -S < %s | FileCheck %s +; RUN: opt -passes='loop-mssa(simple-loop-unswitch),verify' -S < %s | FileCheck %s +; RUN: opt -simple-loop-unswitch -enable-nontrivial-unswitch -verify-memoryssa -S < %s | FileCheck %s + +declare ptr @pluto() +declare void @llvm.experimental.guard(i1, ...) +declare void @widget() + +; REQUIRES: asserts + +define void @foo(ptr addrspace(1) %arg, i64 %arg1) personality ptr @pluto { +; CHECK-LABEL: @foo( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = icmp slt i32 poison, 570 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP]], i1 true, i1 false +; CHECK-NEXT: br i1 [[TMP]], label [[BB_SPLIT_US:%.*]], label [[BB_SPLIT:%.*]] +; CHECK: bb.split.us: +; CHECK-NEXT: br label [[BB3_US:%.*]] +; CHECK: bb3.us: +; CHECK-NEXT: br label [[GUARDED_US:%.*]] +; CHECK: bb4.us: +; CHECK-NEXT: invoke void @widget() +; CHECK-NEXT: to label [[BB6_US:%.*]] unwind label [[BB7_SPLIT_US:%.*]] +; CHECK: bb6.us: +; CHECK-NEXT: invoke void @widget() +; CHECK-NEXT: to label [[BB3_US]] unwind label [[BB7_SPLIT_US]] +; CHECK: guarded.us: +; CHECK-NEXT: invoke void @widget() +; CHECK-NEXT: to label [[BB4_US:%.*]] unwind label [[BB7_SPLIT_US]] +; CHECK: bb7.split.us: +; CHECK-NEXT: [[TMP8_US:%.*]] = landingpad { ptr, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: br label [[BB7:%.*]] +; CHECK: bb.split: +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb3: +; CHECK-NEXT: br label [[DEOPT:%.*]] +; CHECK: deopt: +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 false, i32 7) [ "deopt"() ] +; CHECK-NEXT: unreachable +; CHECK: bb7: +; CHECK-NEXT: ret void +; +bb: + %tmp = icmp slt i32 poison, 570 + %tmp2 = select i1 %tmp, i1 true, i1 false + br label %bb3 + +bb3: ; preds = %bb6, %bb + call void (i1, ...) @llvm.experimental.guard(i1 %tmp2, i32 7) [ "deopt"() ] + invoke void @widget() + to label %bb4 unwind label %bb7 + +bb4: ; preds = %bb3 + invoke void @widget() + to label %bb6 unwind label %bb7 + +bb6: ; preds = %bb4 + invoke void @widget() + to label %bb3 unwind label %bb7 + +bb7: ; preds = %bb6, %bb4, %bb3 + %tmp8 = landingpad { ptr, i32 } + cleanup + ret void +} + diff --git a/llvm/test/Transforms/SimplifyCFG/X86/merge-compatible-invokes-of-landingpad-debuginfo.ll b/llvm/test/Transforms/SimplifyCFG/X86/merge-compatible-invokes-of-landingpad-debuginfo.ll index b1802ba8022ba..bfbba255f3c6f 100644 --- a/llvm/test/Transforms/SimplifyCFG/X86/merge-compatible-invokes-of-landingpad-debuginfo.ll +++ b/llvm/test/Transforms/SimplifyCFG/X86/merge-compatible-invokes-of-landingpad-debuginfo.ll @@ -68,7 +68,7 @@ declare void @destructor() declare dso_local i32 @__gxx_personality_v0(...) ;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { noreturn } -; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind readnone speculatable willreturn } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ;. ; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) ; CHECK: [[META1:![0-9]+]] = !DIFile(filename: "", directory: "/") diff --git a/llvm/test/Transforms/SimplifyCFG/X86/merge-compatible-invokes-of-landingpad.ll b/llvm/test/Transforms/SimplifyCFG/X86/merge-compatible-invokes-of-landingpad.ll index 20ecd03476c63..7b0359aa1d71a 100644 --- a/llvm/test/Transforms/SimplifyCFG/X86/merge-compatible-invokes-of-landingpad.ll +++ b/llvm/test/Transforms/SimplifyCFG/X86/merge-compatible-invokes-of-landingpad.ll @@ -2475,5 +2475,5 @@ declare dso_local i32 @__gxx_personality_v0(...) ;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { noreturn } ; CHECK: attributes #[[ATTR1]] = { nomerge } -; CHECK: attributes #[[ATTR2]] = { readnone } +; CHECK: attributes #[[ATTR2]] = { memory(none) } ;. diff --git a/llvm/test/Transforms/SimplifyCFG/assume.ll b/llvm/test/Transforms/SimplifyCFG/assume.ll index ef3e5376f3950..cd41a8040d8b7 100644 --- a/llvm/test/Transforms/SimplifyCFG/assume.ll +++ b/llvm/test/Transforms/SimplifyCFG/assume.ll @@ -1,21 +1,133 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -S < %s | FileCheck %s -define void @test1() { - call void @llvm.assume(i1 0) - ret void +define void @assume_false_to_unreachable1() { +; CHECK-LABEL: @assume_false_to_unreachable1( +; CHECK-NEXT: unreachable +; + call void @llvm.assume(i1 0) + ret void -; CHECK-LABEL: @test1 -; CHECK-NOT: llvm.assume -; CHECK: unreachable } -define void @test2() { - call void @llvm.assume(i1 undef) - ret void +define void @assume_undef_to_unreachable() { +; CHECK-LABEL: @assume_undef_to_unreachable( +; CHECK-NEXT: unreachable +; + call void @llvm.assume(i1 undef) + ret void -; CHECK-LABEL: @test2 -; CHECK-NOT: llvm.assume -; CHECK: unreachable +} + +define i32 @speculate_block_with_assume_basic(i1 %c, i32 %x) { +; CHECK-LABEL: @speculate_block_with_assume_basic( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[C:%.*]], i32 1, i32 0 +; CHECK-NEXT: ret i32 [[SPEC_SELECT]] +; +entry: + br i1 %c, label %if, label %join + +if: + %cmp = icmp ne i32 %x, 0 + call void @llvm.assume(i1 %cmp) + br label %join + +join: + %phi = phi i32 [ 0, %entry ], [ 1, %if ] + ret i32 %phi +} + +define i32 @speculate_block_with_assume_extra_instr(i1 %c, i32 %x) { +; CHECK-LABEL: @speculate_block_with_assume_extra_instr( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[X:%.*]], 1 +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[C:%.*]], i32 [[ADD]], i32 0 +; CHECK-NEXT: ret i32 [[SPEC_SELECT]] +; +entry: + br i1 %c, label %if, label %join + +if: + %add = add i32 %x, 1 + %cmp = icmp ne i32 %add, 0 + call void @llvm.assume(i1 %cmp) + br label %join + +join: + %phi = phi i32 [ 0, %entry ], [ %add, %if ] + ret i32 %phi +} + +; We only allow speculating one instruction. Here %add and %add2 are used by +; the assume, but not ephemeral, because they are also used by %phi. +define i32 @speculate_block_with_assume_extra_instrs_too_many(i1 %c, i32 %x) { +; CHECK-LABEL: @speculate_block_with_assume_extra_instrs_too_many( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[JOIN:%.*]] +; CHECK: if: +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[X:%.*]], 1 +; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[ADD]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[ADD2]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: br label [[JOIN]] +; CHECK: join: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD2]], [[IF]] ] +; CHECK-NEXT: ret i32 [[PHI]] +; +entry: + br i1 %c, label %if, label %join + +if: + %add = add i32 %x, 1 + %add2 = add i32 %add, 1 + %cmp = icmp ne i32 %add2, 0 + call void @llvm.assume(i1 %cmp) + br label %join + +join: + %phi = phi i32 [ 0, %entry ], [ %add2, %if ] + ret i32 %phi +} + +define i32 @speculate_block_with_assume_extra_instrs_okay(i1 %c, i32 %x) { +; CHECK-LABEL: @speculate_block_with_assume_extra_instrs_okay( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[X:%.*]], 1 +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[C:%.*]], i32 [[ADD]], i32 0 +; CHECK-NEXT: ret i32 [[SPEC_SELECT]] +; +entry: + br i1 %c, label %if, label %join + +if: + %add = add i32 %x, 1 + %add2 = add i32 %add, 1 + %cmp = icmp ne i32 %add2, 0 + call void @llvm.assume(i1 %cmp) + br label %join + +join: + %phi = phi i32 [ 0, %entry ], [ %add, %if ] + ret i32 %phi +} + +define i32 @speculate_block_with_assume_operand_bundle(i1 %c, ptr %p) { +; CHECK-LABEL: @speculate_block_with_assume_operand_bundle( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[C:%.*]], i32 1, i32 0 +; CHECK-NEXT: ret i32 [[SPEC_SELECT]] +; +entry: + br i1 %c, label %if, label %join + +if: + call void @llvm.assume(i1 true) ["nonnull"(ptr %p)] + br label %join + +join: + %phi = phi i32 [ 0, %entry ], [ 1, %if ] + ret i32 %phi } declare void @llvm.assume(i1) nounwind diff --git a/llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll b/llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll index fc200e041125e..7965c8d23cd00 100644 --- a/llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll +++ b/llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll @@ -1133,7 +1133,7 @@ exit: ;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind uwtable } ; CHECK: attributes #[[ATTR1]] = { nounwind } -; CHECK: attributes #[[ATTR2:[0-9]+]] = { noredzone nounwind readnone ssp } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { noredzone nounwind ssp memory(none) } ;. ; CHECK: [[PROF0]] = !{!"branch_weights", i32 5, i32 11} ; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 3} diff --git a/llvm/test/Transforms/SimplifyCFG/two-entry-phi-fold-crash.ll b/llvm/test/Transforms/SimplifyCFG/two-entry-phi-fold-crash.ll index cfad18938461d..2d698d1c42b99 100644 --- a/llvm/test/Transforms/SimplifyCFG/two-entry-phi-fold-crash.ll +++ b/llvm/test/Transforms/SimplifyCFG/two-entry-phi-fold-crash.ll @@ -8,16 +8,11 @@ define i32 @wibble(i8* %arg, i8** %arg1) { ; CHECK-NEXT: bb: ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb2: -; CHECK-NEXT: [[BORG:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[BORG]], [[BB8:%.*]] ] -; CHECK-NEXT: [[BORG3:%.*]] = phi i32 [ 8, [[BB]] ], [ [[BORG10:%.*]], [[BB8]] ] +; CHECK-NEXT: [[BORG:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[BORG]], [[BB2]] ] +; CHECK-NEXT: [[BORG3:%.*]] = phi i32 [ 8, [[BB]] ], [ [[SPEC_SELECT:%.*]], [[BB2]] ] ; CHECK-NEXT: [[BORG4:%.*]] = tail call i32 @blam(i8* [[ARG:%.*]], i32 [[BORG]]) ; CHECK-NEXT: [[BORG5:%.*]] = icmp eq i32 [[BORG4]], 0 -; CHECK-NEXT: br i1 [[BORG5]], label [[BB8]], label [[BB6:%.*]] -; CHECK: bb6: -; CHECK-NEXT: [[BORG7:%.*]] = load i8*, i8** [[ARG1:%.*]], align 4 -; CHECK-NEXT: br label [[BB8]] -; CHECK: bb8: -; CHECK-NEXT: [[BORG10]] = phi i32 [ [[BORG4]], [[BB6]] ], [ [[BORG3]], [[BB2]] ] +; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[BORG5]], i32 [[BORG3]], i32 [[BORG4]] ; CHECK-NEXT: [[BORG11:%.*]] = icmp ult i32 [[BORG]], 2 ; CHECK-NEXT: br i1 [[BORG11]], label [[BB2]], label [[BB12:%.*]] ; CHECK: bb12: diff --git a/llvm/test/Transforms/VectorCombine/AArch64/insert-shuffle-binop.ll b/llvm/test/Transforms/VectorCombine/AArch64/insert-shuffle-binop.ll deleted file mode 100644 index c75f53bc68583..0000000000000 --- a/llvm/test/Transforms/VectorCombine/AArch64/insert-shuffle-binop.ll +++ /dev/null @@ -1,216 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes='vector-combine' -S %s | FileCheck %s - -target triple = "aarch64-none-eabi" - -define @fadd_vscale_insertelt_a_shuffle_insert_b(float %0, float %1) { -; CHECK-LABEL: @fadd_vscale_insertelt_a_shuffle_insert_b( -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, float [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, float [[TMP1:%.*]], i64 0 -; CHECK-NEXT: [[R:%.*]] = fadd fast [[BROADCAST_SPLATINSERT2]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector [[R]], poison, zeroinitializer -; CHECK-NEXT: ret [[TMP3]] -; - %broadcast.splatinsert = insertelement poison, float %0, i64 0 - %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer - %broadcast.splatinsert2 = insertelement poison, float %1, i64 0 - %r = fadd fast %broadcast.splatinsert2, %broadcast.splat - %3 = shufflevector %r, poison, zeroinitializer - ret %3 -} - -define <4 x float> @fadd_fixed_insertelt_a_shuffle_insert_b(float %0, float %1) { -; CHECK-LABEL: @fadd_fixed_insertelt_a_shuffle_insert_b( -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP1:%.*]], i64 0 -; CHECK-NEXT: [[R:%.*]] = fadd fast <4 x float> [[BROADCAST_SPLATINSERT2]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[R]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: ret <4 x float> [[TMP3]] -; - %broadcast.splatinsert = insertelement <4 x float> poison, float %0, i64 0 - %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer - %broadcast.splatinsert2 = insertelement <4 x float> poison, float %1, i64 0 - %r = fadd fast <4 x float> %broadcast.splatinsert2, %broadcast.splat - %3 = shufflevector <4 x float> %r, <4 x float> poison, <4 x i32> zeroinitializer - ret <4 x float> %3 -} - -define @fsub_vscale_insertelt_a_shuffle_insert_b(float %0, float %1) { -; CHECK-LABEL: @fsub_vscale_insertelt_a_shuffle_insert_b( -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, float [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, float [[TMP1:%.*]], i64 0 -; CHECK-NEXT: [[R:%.*]] = fsub fast [[BROADCAST_SPLATINSERT2]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector [[R]], poison, zeroinitializer -; CHECK-NEXT: ret [[TMP3]] -; - %broadcast.splatinsert = insertelement poison, float %0, i64 0 - %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer - %broadcast.splatinsert2 = insertelement poison, float %1, i64 0 - %r = fsub fast %broadcast.splatinsert2, %broadcast.splat - %3 = shufflevector %r, poison, zeroinitializer - ret %3 -} - -define <4 x float> @fsub_fixed_insertelt_a_shuffle_insert_b(float %0, float %1) { -; CHECK-LABEL: @fsub_fixed_insertelt_a_shuffle_insert_b( -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP1:%.*]], i64 0 -; CHECK-NEXT: [[R:%.*]] = fsub fast <4 x float> [[BROADCAST_SPLATINSERT2]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[R]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: ret <4 x float> [[TMP3]] -; - %broadcast.splatinsert = insertelement <4 x float> poison, float %0, i64 0 - %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer - %broadcast.splatinsert2 = insertelement <4 x float> poison, float %1, i64 0 - %r = fsub fast <4 x float> %broadcast.splatinsert2, %broadcast.splat - %3 = shufflevector <4 x float> %r, <4 x float> poison, <4 x i32> zeroinitializer - ret <4 x float> %3 -} - -define @fadd_vscale_shuffle_insert_a_insert_b(float %0, float %1) { -; CHECK-LABEL: @fadd_vscale_shuffle_insert_a_insert_b( -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, float [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, float [[TMP1:%.*]], i64 0 -; CHECK-NEXT: [[R:%.*]] = fadd fast [[BROADCAST_SPLAT]], [[BROADCAST_SPLATINSERT2]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector [[R]], poison, zeroinitializer -; CHECK-NEXT: ret [[TMP3]] -; - %broadcast.splatinsert = insertelement poison, float %0, i64 0 - %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer - %broadcast.splatinsert2 = insertelement poison, float %1, i64 0 - %r = fadd fast %broadcast.splat, %broadcast.splatinsert2 - %3 = shufflevector %r, poison, zeroinitializer - ret %3 -} - -define <4 x float> @fadd_fixed_shuffle_insert_a_insert_b(float %0, float %1) { -; CHECK-LABEL: @fadd_fixed_shuffle_insert_a_insert_b( -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP1:%.*]], i64 0 -; CHECK-NEXT: [[R:%.*]] = fadd fast <4 x float> [[BROADCAST_SPLAT]], [[BROADCAST_SPLATINSERT2]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[R]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: ret <4 x float> [[TMP3]] -; - %broadcast.splatinsert = insertelement <4 x float> poison, float %0, i64 0 - %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer - %broadcast.splatinsert2 = insertelement <4 x float> poison, float %1, i64 0 - %r = fadd fast <4 x float> %broadcast.splat, %broadcast.splatinsert2 - %3 = shufflevector <4 x float> %r, <4 x float> poison, <4 x i32> zeroinitializer - ret <4 x float> %3 -} - -define @fsub_vscale_shuffle_insert_a_insert_b(float %0, float %1) { -; CHECK-LABEL: @fsub_vscale_shuffle_insert_a_insert_b( -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, float [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, float [[TMP1:%.*]], i64 0 -; CHECK-NEXT: [[R:%.*]] = fsub fast [[BROADCAST_SPLAT]], [[BROADCAST_SPLATINSERT2]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector [[R]], poison, zeroinitializer -; CHECK-NEXT: ret [[TMP3]] -; - %broadcast.splatinsert = insertelement poison, float %0, i64 0 - %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer - %broadcast.splatinsert2 = insertelement poison, float %1, i64 0 - %r = fsub fast %broadcast.splat, %broadcast.splatinsert2 - %3 = shufflevector %r, poison, zeroinitializer - ret %3 -} - -define <4 x float> @fsub_fixed_shuffle_insert_a_insert_b(float %0, float %1) { -; CHECK-LABEL: @fsub_fixed_shuffle_insert_a_insert_b( -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP1:%.*]], i64 0 -; CHECK-NEXT: [[R:%.*]] = fsub fast <4 x float> [[BROADCAST_SPLAT]], [[BROADCAST_SPLATINSERT2]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[R]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: ret <4 x float> [[TMP3]] -; - %broadcast.splatinsert = insertelement <4 x float> poison, float %0, i64 0 - %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer - %broadcast.splatinsert2 = insertelement <4 x float> poison, float %1, i64 0 - %r = fsub fast <4 x float> %broadcast.splat, %broadcast.splatinsert2 - %3 = shufflevector <4 x float> %r, <4 x float> poison, <4 x i32> zeroinitializer - ret <4 x float> %3 -} - -define @fadd_vscale_shuffle_insert_a_shuffle_insert_b(float %0, float %1) { -; CHECK-LABEL: @fadd_vscale_shuffle_insert_a_shuffle_insert_b( -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, float [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, float [[TMP1:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT2]], poison, zeroinitializer -; CHECK-NEXT: [[R:%.*]] = fadd fast [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector [[R]], poison, zeroinitializer -; CHECK-NEXT: ret [[TMP3]] -; - %broadcast.splatinsert = insertelement poison, float %0, i64 0 - %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer - %broadcast.splatinsert2 = insertelement poison, float %1, i64 0 - %broadcast.splat2 = shufflevector %broadcast.splatinsert2, poison, zeroinitializer - %r = fadd fast %broadcast.splat, %broadcast.splat2 - %3 = shufflevector %r, poison, zeroinitializer - ret %3 -} - -define <4 x float> @fadd_fixed_shuffle_insert_a_shuffle_insert_b(float %0, float %1) { -; CHECK-LABEL: @fadd_fixed_shuffle_insert_a_shuffle_insert_b( -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP1:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[R:%.*]] = fadd fast <4 x float> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[R]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: ret <4 x float> [[TMP3]] -; - %broadcast.splatinsert = insertelement <4 x float> poison, float %0, i64 0 - %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer - %broadcast.splatinsert2 = insertelement <4 x float> poison, float %1, i64 0 - %broadcast.splat2 = shufflevector <4 x float> %broadcast.splatinsert2, <4 x float> poison, <4 x i32> zeroinitializer - %r = fadd fast <4 x float> %broadcast.splat, %broadcast.splat2 - %3 = shufflevector <4 x float> %r, <4 x float> poison, <4 x i32> zeroinitializer - ret <4 x float> %3 -} - -define @fsub_vscale_shuffle_insert_a_shuffle_insert_b(float %0, float %1) { -; CHECK-LABEL: @fsub_vscale_shuffle_insert_a_shuffle_insert_b( -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, float [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, float [[TMP1:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT2]], poison, zeroinitializer -; CHECK-NEXT: [[R:%.*]] = fsub fast [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector [[R]], poison, zeroinitializer -; CHECK-NEXT: ret [[TMP3]] -; - %broadcast.splatinsert = insertelement poison, float %0, i64 0 - %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer - %broadcast.splatinsert2 = insertelement poison, float %1, i64 0 - %broadcast.splat2 = shufflevector %broadcast.splatinsert2, poison, zeroinitializer - %r = fsub fast %broadcast.splat, %broadcast.splat2 - %3 = shufflevector %r, poison, zeroinitializer - ret %3 -} - -define <4 x float> @fsub_fixed_shuffle_insert_a_shuffle_insert_b(float %0, float %1) { -; CHECK-LABEL: @fsub_fixed_shuffle_insert_a_shuffle_insert_b( -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP1:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[R:%.*]] = fsub fast <4 x float> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[R]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: ret <4 x float> [[TMP3]] -; - %broadcast.splatinsert = insertelement <4 x float> poison, float %0, i64 0 - %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer - %broadcast.splatinsert2 = insertelement <4 x float> poison, float %1, i64 0 - %broadcast.splat2 = shufflevector <4 x float> %broadcast.splatinsert2, <4 x float> poison, <4 x i32> zeroinitializer - %r = fsub fast <4 x float> %broadcast.splat, %broadcast.splat2 - %3 = shufflevector <4 x float> %r, <4 x float> poison, <4 x i32> zeroinitializer - ret <4 x float> %3 -} diff --git a/llvm/test/Transforms/VectorCombine/X86/load-widening.ll b/llvm/test/Transforms/VectorCombine/X86/load-widening.ll index abc1a194bb8f2..40f610d3343c6 100644 --- a/llvm/test/Transforms/VectorCombine/X86/load-widening.ll +++ b/llvm/test/Transforms/VectorCombine/X86/load-widening.ll @@ -8,72 +8,72 @@ ; Here we know we can load 128 bits as per dereferenceability and alignment. ; We don't widen scalar loads per-se. -define <1 x float> @scalar(<1 x float>* align 16 dereferenceable(16) %p) { +define <1 x float> @scalar(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @scalar( -; CHECK-NEXT: [[R:%.*]] = load <1 x float>, <1 x float>* [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = load <1 x float>, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret <1 x float> [[R]] ; - %r = load <1 x float>, <1 x float>* %p, align 16 + %r = load <1 x float>, ptr %p, align 16 ret <1 x float> %r } ; We don't widen single-element loads, these get scalarized. -define <1 x float> @vec_with_1elt(<1 x float>* align 16 dereferenceable(16) %p) { +define <1 x float> @vec_with_1elt(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @vec_with_1elt( -; CHECK-NEXT: [[R:%.*]] = load <1 x float>, <1 x float>* [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = load <1 x float>, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret <1 x float> [[R]] ; - %r = load <1 x float>, <1 x float>* %p, align 16 + %r = load <1 x float>, ptr %p, align 16 ret <1 x float> %r } -define <2 x float> @vec_with_2elts(<2 x float>* align 16 dereferenceable(16) %p) { +define <2 x float> @vec_with_2elts(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @vec_with_2elts( -; CHECK-NEXT: [[R:%.*]] = load <2 x float>, <2 x float>* [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = load <2 x float>, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret <2 x float> [[R]] ; - %r = load <2 x float>, <2 x float>* %p, align 16 + %r = load <2 x float>, ptr %p, align 16 ret <2 x float> %r } -define <3 x float> @vec_with_3elts(<3 x float>* align 16 dereferenceable(16) %p) { +define <3 x float> @vec_with_3elts(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @vec_with_3elts( -; CHECK-NEXT: [[R:%.*]] = load <3 x float>, <3 x float>* [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = load <3 x float>, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret <3 x float> [[R]] ; - %r = load <3 x float>, <3 x float>* %p, align 16 + %r = load <3 x float>, ptr %p, align 16 ret <3 x float> %r } ; Full-vector load. All good already. -define <4 x float> @vec_with_4elts(<4 x float>* align 16 dereferenceable(16) %p) { +define <4 x float> @vec_with_4elts(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @vec_with_4elts( -; CHECK-NEXT: [[R:%.*]] = load <4 x float>, <4 x float>* [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret <4 x float> [[R]] ; - %r = load <4 x float>, <4 x float>* %p, align 16 + %r = load <4 x float>, ptr %p, align 16 ret <4 x float> %r } ; We don't know we can load 256 bits though. -define <5 x float> @vec_with_5elts(<5 x float>* align 16 dereferenceable(16) %p) { +define <5 x float> @vec_with_5elts(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @vec_with_5elts( -; CHECK-NEXT: [[R:%.*]] = load <5 x float>, <5 x float>* [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = load <5 x float>, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret <5 x float> [[R]] ; - %r = load <5 x float>, <5 x float>* %p, align 16 + %r = load <5 x float>, ptr %p, align 16 ret <5 x float> %r } ;------------------------------------------------------------------------------- ; We can load 128 bits, and the fact that it's underaligned isn't relevant. -define <3 x float> @vec_with_3elts_underaligned(<3 x float>* align 8 dereferenceable(16) %p) { +define <3 x float> @vec_with_3elts_underaligned(ptr align 8 dereferenceable(16) %p) { ; CHECK-LABEL: @vec_with_3elts_underaligned( -; CHECK-NEXT: [[R:%.*]] = load <3 x float>, <3 x float>* [[P:%.*]], align 8 +; CHECK-NEXT: [[R:%.*]] = load <3 x float>, ptr [[P:%.*]], align 8 ; CHECK-NEXT: ret <3 x float> [[R]] ; - %r = load <3 x float>, <3 x float>* %p, align 8 + %r = load <3 x float>, ptr %p, align 8 ret <3 x float> %r } @@ -81,145 +81,145 @@ define <3 x float> @vec_with_3elts_underaligned(<3 x float>* align 8 dereference ; FIXME: this should still get widened. define <3 x float> @vec_with_3elts_underdereferenceable(<3 x float>* align 16 dereferenceable(12) %p) { ; CHECK-LABEL: @vec_with_3elts_underdereferenceable( -; CHECK-NEXT: [[R:%.*]] = load <3 x float>, <3 x float>* [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = load <3 x float>, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret <3 x float> [[R]] ; - %r = load <3 x float>, <3 x float>* %p, align 16 + %r = load <3 x float>, ptr %p, align 16 ret <3 x float> %r } ; We can't tell if we can load 128 bits. -define <3 x float> @vec_with_3elts_underaligned_underdereferenceable(<3 x float>* align 8 dereferenceable(12) %p) { +define <3 x float> @vec_with_3elts_underaligned_underdereferenceable(ptr align 8 dereferenceable(12) %p) { ; CHECK-LABEL: @vec_with_3elts_underaligned_underdereferenceable( -; CHECK-NEXT: [[R:%.*]] = load <3 x float>, <3 x float>* [[P:%.*]], align 8 +; CHECK-NEXT: [[R:%.*]] = load <3 x float>, ptr [[P:%.*]], align 8 ; CHECK-NEXT: ret <3 x float> [[R]] ; - %r = load <3 x float>, <3 x float>* %p, align 8 + %r = load <3 x float>, ptr %p, align 8 ret <3 x float> %r } ;------------------------------------------------------------------------------- ; Here we know we can load 256 bits as per dereferenceability and alignment. -define <1 x float> @vec_with_1elt_256bits(<1 x float>* align 32 dereferenceable(32) %p) { +define <1 x float> @vec_with_1elt_256bits(ptr align 32 dereferenceable(32) %p) { ; CHECK-LABEL: @vec_with_1elt_256bits( -; CHECK-NEXT: [[R:%.*]] = load <1 x float>, <1 x float>* [[P:%.*]], align 32 +; CHECK-NEXT: [[R:%.*]] = load <1 x float>, ptr [[P:%.*]], align 32 ; CHECK-NEXT: ret <1 x float> [[R]] ; - %r = load <1 x float>, <1 x float>* %p, align 32 + %r = load <1 x float>, ptr %p, align 32 ret <1 x float> %r } -define <2 x float> @vec_with_2elts_256bits(<2 x float>* align 32 dereferenceable(32) %p) { +define <2 x float> @vec_with_2elts_256bits(ptr align 32 dereferenceable(32) %p) { ; CHECK-LABEL: @vec_with_2elts_256bits( -; CHECK-NEXT: [[R:%.*]] = load <2 x float>, <2 x float>* [[P:%.*]], align 32 +; CHECK-NEXT: [[R:%.*]] = load <2 x float>, ptr [[P:%.*]], align 32 ; CHECK-NEXT: ret <2 x float> [[R]] ; - %r = load <2 x float>, <2 x float>* %p, align 32 + %r = load <2 x float>, ptr %p, align 32 ret <2 x float> %r } -define <3 x float> @vec_with_3elts_256bits(<3 x float>* align 32 dereferenceable(32) %p) { +define <3 x float> @vec_with_3elts_256bits(ptr align 32 dereferenceable(32) %p) { ; CHECK-LABEL: @vec_with_3elts_256bits( -; CHECK-NEXT: [[R:%.*]] = load <3 x float>, <3 x float>* [[P:%.*]], align 32 +; CHECK-NEXT: [[R:%.*]] = load <3 x float>, ptr [[P:%.*]], align 32 ; CHECK-NEXT: ret <3 x float> [[R]] ; - %r = load <3 x float>, <3 x float>* %p, align 32 + %r = load <3 x float>, ptr %p, align 32 ret <3 x float> %r } -define <4 x float> @vec_with_4elts_256bits(<4 x float>* align 32 dereferenceable(32) %p) { +define <4 x float> @vec_with_4elts_256bits(ptr align 32 dereferenceable(32) %p) { ; CHECK-LABEL: @vec_with_4elts_256bits( -; CHECK-NEXT: [[R:%.*]] = load <4 x float>, <4 x float>* [[P:%.*]], align 32 +; CHECK-NEXT: [[R:%.*]] = load <4 x float>, ptr [[P:%.*]], align 32 ; CHECK-NEXT: ret <4 x float> [[R]] ; - %r = load <4 x float>, <4 x float>* %p, align 32 + %r = load <4 x float>, ptr %p, align 32 ret <4 x float> %r } -define <5 x float> @vec_with_5elts_256bits(<5 x float>* align 32 dereferenceable(32) %p) { +define <5 x float> @vec_with_5elts_256bits(ptr align 32 dereferenceable(32) %p) { ; CHECK-LABEL: @vec_with_5elts_256bits( -; CHECK-NEXT: [[R:%.*]] = load <5 x float>, <5 x float>* [[P:%.*]], align 32 +; CHECK-NEXT: [[R:%.*]] = load <5 x float>, ptr [[P:%.*]], align 32 ; CHECK-NEXT: ret <5 x float> [[R]] ; - %r = load <5 x float>, <5 x float>* %p, align 32 + %r = load <5 x float>, ptr %p, align 32 ret <5 x float> %r } -define <6 x float> @vec_with_6elts_256bits(<6 x float>* align 32 dereferenceable(32) %p) { +define <6 x float> @vec_with_6elts_256bits(ptr align 32 dereferenceable(32) %p) { ; CHECK-LABEL: @vec_with_6elts_256bits( -; CHECK-NEXT: [[R:%.*]] = load <6 x float>, <6 x float>* [[P:%.*]], align 32 +; CHECK-NEXT: [[R:%.*]] = load <6 x float>, ptr [[P:%.*]], align 32 ; CHECK-NEXT: ret <6 x float> [[R]] ; - %r = load <6 x float>, <6 x float>* %p, align 32 + %r = load <6 x float>, ptr %p, align 32 ret <6 x float> %r } -define <7 x float> @vec_with_7elts_256bits(<7 x float>* align 32 dereferenceable(32) %p) { +define <7 x float> @vec_with_7elts_256bits(ptr align 32 dereferenceable(32) %p) { ; CHECK-LABEL: @vec_with_7elts_256bits( -; CHECK-NEXT: [[R:%.*]] = load <7 x float>, <7 x float>* [[P:%.*]], align 32 +; CHECK-NEXT: [[R:%.*]] = load <7 x float>, ptr [[P:%.*]], align 32 ; CHECK-NEXT: ret <7 x float> [[R]] ; - %r = load <7 x float>, <7 x float>* %p, align 32 + %r = load <7 x float>, ptr %p, align 32 ret <7 x float> %r } ; Full-vector load. All good already. -define <8 x float> @vec_with_8elts_256bits(<8 x float>* align 32 dereferenceable(32) %p) { +define <8 x float> @vec_with_8elts_256bits(ptr align 32 dereferenceable(32) %p) { ; CHECK-LABEL: @vec_with_8elts_256bits( -; CHECK-NEXT: [[R:%.*]] = load <8 x float>, <8 x float>* [[P:%.*]], align 32 +; CHECK-NEXT: [[R:%.*]] = load <8 x float>, ptr [[P:%.*]], align 32 ; CHECK-NEXT: ret <8 x float> [[R]] ; - %r = load <8 x float>, <8 x float>* %p, align 32 + %r = load <8 x float>, ptr %p, align 32 ret <8 x float> %r } ; We can't tell if we can load more than 256 bits. -define <9 x float> @vec_with_9elts_256bits(<9 x float>* align 32 dereferenceable(32) %p) { +define <9 x float> @vec_with_9elts_256bits(ptr align 32 dereferenceable(32) %p) { ; CHECK-LABEL: @vec_with_9elts_256bits( -; CHECK-NEXT: [[R:%.*]] = load <9 x float>, <9 x float>* [[P:%.*]], align 32 +; CHECK-NEXT: [[R:%.*]] = load <9 x float>, ptr [[P:%.*]], align 32 ; CHECK-NEXT: ret <9 x float> [[R]] ; - %r = load <9 x float>, <9 x float>* %p, align 32 + %r = load <9 x float>, ptr %p, align 32 ret <9 x float> %r } ;------------------------------------------------------------------------------- ; Weird types we don't deal with -define <2 x i7> @vec_with_two_subbyte_elts(<2 x i7>* align 16 dereferenceable(16) %p) { +define <2 x i7> @vec_with_two_subbyte_elts(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @vec_with_two_subbyte_elts( -; CHECK-NEXT: [[R:%.*]] = load <2 x i7>, <2 x i7>* [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = load <2 x i7>, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret <2 x i7> [[R]] ; - %r = load <2 x i7>, <2 x i7>* %p, align 16 + %r = load <2 x i7>, ptr %p, align 16 ret <2 x i7> %r } -define <2 x i9> @vec_with_two_nonbyte_sized_elts(<2 x i9>* align 16 dereferenceable(16) %p) { +define <2 x i9> @vec_with_two_nonbyte_sized_elts(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @vec_with_two_nonbyte_sized_elts( -; CHECK-NEXT: [[R:%.*]] = load <2 x i9>, <2 x i9>* [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = load <2 x i9>, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret <2 x i9> [[R]] ; - %r = load <2 x i9>, <2 x i9>* %p, align 16 + %r = load <2 x i9>, ptr %p, align 16 ret <2 x i9> %r } -define <2 x i24> @vec_with_two_nonpoweroftwo_sized_elts(<2 x i24>* align 16 dereferenceable(16) %p) { +define <2 x i24> @vec_with_two_nonpoweroftwo_sized_elts(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @vec_with_two_nonpoweroftwo_sized_elts( -; CHECK-NEXT: [[R:%.*]] = load <2 x i24>, <2 x i24>* [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = load <2 x i24>, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret <2 x i24> [[R]] ; - %r = load <2 x i24>, <2 x i24>* %p, align 16 + %r = load <2 x i24>, ptr %p, align 16 ret <2 x i24> %r } -define <2 x float> @vec_with_2elts_addressspace(<2 x float> addrspace(2)* align 16 dereferenceable(16) %p) { +define <2 x float> @vec_with_2elts_addressspace(ptr addrspace(2) align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @vec_with_2elts_addressspace( -; CHECK-NEXT: [[R:%.*]] = load <2 x float>, <2 x float> addrspace(2)* [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = load <2 x float>, ptr addrspace(2) [[P:%.*]], align 16 ; CHECK-NEXT: ret <2 x float> [[R]] ; - %r = load <2 x float>, <2 x float> addrspace(2)* %p, align 16 + %r = load <2 x float>, ptr addrspace(2) %p, align 16 ret <2 x float> %r } @@ -227,27 +227,126 @@ define <2 x float> @vec_with_2elts_addressspace(<2 x float> addrspace(2)* align ; Widening these would change the legalized type, so leave them alone. -define <2 x i1> @vec_with_2elts_128bits_i1(<2 x i1>* align 16 dereferenceable(16) %p) { +define <2 x i1> @vec_with_2elts_128bits_i1(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @vec_with_2elts_128bits_i1( -; CHECK-NEXT: [[R:%.*]] = load <2 x i1>, <2 x i1>* [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = load <2 x i1>, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret <2 x i1> [[R]] ; - %r = load <2 x i1>, <2 x i1>* %p, align 16 + %r = load <2 x i1>, ptr %p, align 16 ret <2 x i1> %r } -define <2 x i2> @vec_with_2elts_128bits_i2(<2 x i2>* align 16 dereferenceable(16) %p) { +define <2 x i2> @vec_with_2elts_128bits_i2(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @vec_with_2elts_128bits_i2( -; CHECK-NEXT: [[R:%.*]] = load <2 x i2>, <2 x i2>* [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = load <2 x i2>, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret <2 x i2> [[R]] ; - %r = load <2 x i2>, <2 x i2>* %p, align 16 + %r = load <2 x i2>, ptr %p, align 16 ret <2 x i2> %r } -define <2 x i4> @vec_with_2elts_128bits_i4(<2 x i4>* align 16 dereferenceable(16) %p) { +define <2 x i4> @vec_with_2elts_128bits_i4(ptr align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @vec_with_2elts_128bits_i4( -; CHECK-NEXT: [[R:%.*]] = load <2 x i4>, <2 x i4>* [[P:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = load <2 x i4>, ptr [[P:%.*]], align 16 ; CHECK-NEXT: ret <2 x i4> [[R]] ; - %r = load <2 x i4>, <2 x i4>* %p, align 16 + %r = load <2 x i4>, ptr %p, align 16 ret <2 x i4> %r } + +define <4 x float> @load_v1f32_v4f32(ptr dereferenceable(16) %p) { +; CHECK-LABEL: @load_v1f32_v4f32( +; CHECK-NEXT: [[L:%.*]] = load <1 x float>, ptr [[P:%.*]], align 16 +; CHECK-NEXT: [[S:%.*]] = shufflevector <1 x float> [[L]], <1 x float> poison, <4 x i32> +; CHECK-NEXT: ret <4 x float> [[S]] +; + %l = load <1 x float>, ptr %p, align 16 + %s = shufflevector <1 x float> %l, <1 x float> poison, <4 x i32> + ret <4 x float> %s +} + +define <4 x float> @load_v2f32_v4f32(ptr align 16 dereferenceable(16) %p) { +; CHECK-LABEL: @load_v2f32_v4f32( +; CHECK-NEXT: [[L:%.*]] = load <2 x float>, ptr [[P:%.*]], align 1 +; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x float> [[L]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: ret <4 x float> [[S]] +; + %l = load <2 x float>, ptr %p, align 1 + %s = shufflevector <2 x float> %l, <2 x float> poison, <4 x i32> + ret <4 x float> %s +} + +define <4 x float> @load_v3f32_v4f32(ptr dereferenceable(16) %p) { +; CHECK-LABEL: @load_v3f32_v4f32( +; CHECK-NEXT: [[L:%.*]] = load <3 x float>, ptr [[P:%.*]], align 1 +; CHECK-NEXT: [[S:%.*]] = shufflevector <3 x float> [[L]], <3 x float> poison, <4 x i32> +; CHECK-NEXT: ret <4 x float> [[S]] +; + %l = load <3 x float>, ptr %p, align 1 + %s = shufflevector <3 x float> %l, <3 x float> poison, <4 x i32> + ret <4 x float> %s +} + +define <4 x float> @load_v3f32_v4f32_wrong_mask(ptr dereferenceable(16) %p) { +; CHECK-LABEL: @load_v3f32_v4f32_wrong_mask( +; CHECK-NEXT: [[L:%.*]] = load <3 x float>, ptr [[P:%.*]], align 1 +; CHECK-NEXT: [[S:%.*]] = shufflevector <3 x float> [[L]], <3 x float> poison, <4 x i32> +; CHECK-NEXT: ret <4 x float> [[S]] +; + %l = load <3 x float>, ptr %p, align 1 + %s = shufflevector <3 x float> %l, <3 x float> poison, <4 x i32> + ret <4 x float> %s +} + +define <4 x float> @load_v3f32_v4f32_not_deref(ptr dereferenceable(15) %p) { +; CHECK-LABEL: @load_v3f32_v4f32_not_deref( +; CHECK-NEXT: [[L:%.*]] = load <3 x float>, ptr [[P:%.*]], align 16 +; CHECK-NEXT: [[S:%.*]] = shufflevector <3 x float> [[L]], <3 x float> poison, <4 x i32> +; CHECK-NEXT: ret <4 x float> [[S]] +; + %l = load <3 x float>, ptr %p, align 16 + %s = shufflevector <3 x float> %l, <3 x float> poison, <4 x i32> + ret <4 x float> %s +} + +define <8 x float> @load_v2f32_v8f32(ptr dereferenceable(32) %p) { +; CHECK-LABEL: @load_v2f32_v8f32( +; CHECK-NEXT: [[L:%.*]] = load <2 x float>, ptr [[P:%.*]], align 1 +; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x float> [[L]], <2 x float> poison, <8 x i32> +; CHECK-NEXT: ret <8 x float> [[S]] +; + %l = load <2 x float>, ptr %p, align 1 + %s = shufflevector <2 x float> %l, <2 x float> poison, <8 x i32> + ret <8 x float> %s +} + +define <4 x i32> @load_v2i32_v4i32(ptr dereferenceable(16) %p) { +; CHECK-LABEL: @load_v2i32_v4i32( +; CHECK-NEXT: [[L:%.*]] = load <2 x i32>, ptr [[P:%.*]], align 1 +; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x i32> [[L]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[S]] +; + %l = load <2 x i32>, ptr %p, align 1 + %s = shufflevector <2 x i32> %l, <2 x i32> poison, <4 x i32> + ret <4 x i32> %s +} + +define <4 x i32> @load_v2i32_v4i32_non_canonical_mask(ptr dereferenceable(16) %p) { +; CHECK-LABEL: @load_v2i32_v4i32_non_canonical_mask( +; CHECK-NEXT: [[L:%.*]] = load <2 x i32>, ptr [[P:%.*]], align 1 +; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x i32> [[L]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[S]] +; + %l = load <2 x i32>, ptr %p, align 1 + %s = shufflevector <2 x i32> %l, <2 x i32> poison, <4 x i32> + ret <4 x i32> %s +} + +define <4 x i32> @load_v2i32_v4i32_non_canonical_mask_commute(ptr dereferenceable(16) %p) { +; CHECK-LABEL: @load_v2i32_v4i32_non_canonical_mask_commute( +; CHECK-NEXT: [[L:%.*]] = load <2 x i32>, ptr [[P:%.*]], align 1 +; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x i32> poison, <2 x i32> [[L]], <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[S]] +; + %l = load <2 x i32>, ptr %p, align 1 + %s = shufflevector <2 x i32> poison, <2 x i32> %l, <4 x i32> + ret <4 x i32> %s +} diff --git a/llvm/test/Verifier/alias.ll b/llvm/test/Verifier/alias.ll index e14406550dbbd..b7675a18c0ed1 100644 --- a/llvm/test/Verifier/alias.ll +++ b/llvm/test/Verifier/alias.ll @@ -1,4 +1,4 @@ -; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s +; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s --implicit-check-not=alias --implicit-check-not=Alias declare void @f() @@ -31,3 +31,15 @@ define available_externally void @f2() { @test3_c = alias i32, i32* @test3_b ; CHECK: Alias cannot point to an interposable alias ; CHECK-NEXT: i32* @test3_c + +@test4_a = available_externally global i32 42 +@test4_b = available_externally alias i32, i32* @test4_a +@test4_c = available_externally alias void(), void()* @f2 +@test4_d = available_externally alias i32, i32* @test4_b + +@test4_e = available_externally alias i32, i32* @test3_a +@test4_f = available_externally alias i32, inttoptr (i64 sub (i64 ptrtoint (i32* @test4_a to i64), i64 ptrtoint (i32* @test4_a to i64)) to i32*) +; CHECK: available_externally alias must point to available_externally global value +; CHECK-NEXT: i32* @test4_e +; CHECK: available_externally alias must point to available_externally global value +; CHECK-NEXT: i32* @test4_f diff --git a/llvm/test/Verifier/fp-intrinsics.ll b/llvm/test/Verifier/fp-intrinsics.ll index 744c215ce23e1..670acc58550e8 100644 --- a/llvm/test/Verifier/fp-intrinsics.ll +++ b/llvm/test/Verifier/fp-intrinsics.ll @@ -12,7 +12,7 @@ declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadat ; attached to the FP intrinsic. ; CHECK1: declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) #[[ATTR:[0-9]+]] ; CHECK1: declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata) #[[ATTR]] -; CHECK1: attributes #[[ATTR]] = { inaccessiblememonly nocallback nofree nosync nounwind willreturn } +; CHECK1: attributes #[[ATTR]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } ; Note: FP exceptions aren't usually caught through normal unwind mechanisms, ; but we may want to revisit this for asynchronous exception handling. define double @f1(double %a, double %b) #0 { diff --git a/llvm/test/Verifier/writeonly.ll b/llvm/test/Verifier/writeonly.ll deleted file mode 100644 index 0eeaebbc3a889..0000000000000 --- a/llvm/test/Verifier/writeonly.ll +++ /dev/null @@ -1,13 +0,0 @@ -; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s - -declare void @a() readnone writeonly -; CHECK: Attributes {{.*}} are incompatible - -declare void @b() readonly writeonly -; CHECK: Attributes {{.*}} are incompatible - -declare void @c(i32* readnone writeonly %p) -; CHECK: Attributes {{.*}} are incompatible - -declare void @d(i32* readonly writeonly %p) -; CHECK: Attributes {{.*}} are incompatible diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/check_attrs.ll.funcattrs.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/check_attrs.ll.funcattrs.expected index 4fff2d2836f15..0177b6f0306ff 100644 --- a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/check_attrs.ll.funcattrs.expected +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/check_attrs.ll.funcattrs.expected @@ -6,7 +6,7 @@ %struct.ST = type { i32, double, %struct.RT } define i32* @foo(%struct.ST* %s) nounwind uwtable readnone optsize ssp { -; CHECK: Function Attrs: nofree norecurse nosync nounwind optsize readnone ssp willreturn uwtable +; CHECK: Function Attrs: nofree norecurse nosync nounwind optsize ssp willreturn memory(none) uwtable ; CHECK-LABEL: define {{[^@]+}}@foo ; CHECK-SAME: (%struct.ST* nofree readnone [[S:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values.ll.funcsig.globals.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values.ll.funcsig.globals.expected index 3f56cfed2b925..345a5223fedb4 100644 --- a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values.ll.funcsig.globals.expected +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values.ll.funcsig.globals.expected @@ -249,8 +249,8 @@ attributes #3 = { nounwind } !61 = !{!"branch_weights", i32 1, i32 1048575} ;. ; CHECK: attributes #[[ATTR0]] = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="ieee,ieee" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind readnone speculatable willreturn } -; CHECK: attributes #[[ATTR2:[0-9]+]] = { argmemonly nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } ; CHECK: attributes #[[ATTR3]] = { nounwind } ;. ; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 11.0.0 (git@github.com:llvm/llvm-project.git 1d5da8cd30fce1c0a2c2fa6ba656dbfaa36192c8)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) diff --git a/llvm/test/tools/llvm-debuginfo-analyzer/DWARF/06-dwarf-full-logical-view.test b/llvm/test/tools/llvm-debuginfo-analyzer/DWARF/06-dwarf-full-logical-view.test index 742b2e0c3b11e..e83592afd2ffa 100644 --- a/llvm/test/tools/llvm-debuginfo-analyzer/DWARF/06-dwarf-full-logical-view.test +++ b/llvm/test/tools/llvm-debuginfo-analyzer/DWARF/06-dwarf-full-logical-view.test @@ -1,8 +1,5 @@ ; REQUIRES: x86-registered-target -; FIXME: Test failure https://reviews.llvm.org/D125783 -; UNSUPPORTED: arm - ; Test case 6 - Full logical view ; test.cpp diff --git a/llvm/test/tools/llvm-debuginfod-find/Inputs/capture_req.py b/llvm/test/tools/llvm-debuginfod-find/Inputs/capture_req.py new file mode 100644 index 0000000000000..56fa2d08a0897 --- /dev/null +++ b/llvm/test/tools/llvm-debuginfod-find/Inputs/capture_req.py @@ -0,0 +1,23 @@ +import http.server +import os +import subprocess +import sys +import threading + +class TrivialHandler(http.server.BaseHTTPRequestHandler): + def do_GET(self): + self.send_response(501) + + def log_request(self, *args, **kwargs): + print(self.requestline) + print(self.headers) + +httpd = http.server.HTTPServer(('', 0), TrivialHandler) +port = httpd.socket.getsockname()[1] + +try: + t = threading.Thread(target=httpd.serve_forever).start() + os.environ['DEBUGINFOD_URLS'] =f'http://localhost:{port}' + subprocess.run(sys.argv[1:], capture_output = True) +finally: + httpd.shutdown() diff --git a/llvm/test/tools/llvm-debuginfod-find/Inputs/headers b/llvm/test/tools/llvm-debuginfod-find/Inputs/headers new file mode 100644 index 0000000000000..9f66ac2821c09 --- /dev/null +++ b/llvm/test/tools/llvm-debuginfod-find/Inputs/headers @@ -0,0 +1,12 @@ + + +A: +:A +: +A :B + +A:B +C: D +E:F +hi!$: j k + diff --git a/llvm/test/tools/llvm-debuginfod-find/headers.test b/llvm/test/tools/llvm-debuginfod-find/headers.test new file mode 100644 index 0000000000000..6fe814db51799 --- /dev/null +++ b/llvm/test/tools/llvm-debuginfod-find/headers.test @@ -0,0 +1,27 @@ +REQUIRES: curl + +RUN: %python %S/Inputs/capture_req.py llvm-debuginfod-find --debuginfo 0 \ +RUN: | FileCheck --check-prefix NO-HEADERS %s +RUN: DEBUGINFOD_HEADERS_FILE=bad %python %S/Inputs/capture_req.py \ +RUN: llvm-debuginfod-find --debuginfo 0 \ +RUN: | FileCheck --check-prefix NO-HEADERS %s +RUN: DEBUGINFOD_HEADERS_FILE=%S/Inputs/headers %python %S/Inputs/capture_req.py \ +RUN: llvm-debuginfod-find --debuginfo 0 \ +RUN: | FileCheck --check-prefix HEADERS %s +RUN: DEBUGINFOD_HEADERS_FILE=%S/Inputs/headers DEBUGINFOD_URLS=fake not llvm-debuginfod-find --debuginfo 0 2>&1 \ +RUN: | FileCheck --check-prefix ERR -DHEADER_FILE=%S/Inputs/headers %s + +NO-HEADERS: Accept: */* +NO-HEADERS-NOT: {{.}} + +HEADERS: Accept: */* +HEADERS-NEXT: A: B +HEADERS-NEXT: C: D +HEADERS-NEXT: E: F +HEADERS-NEXT: hi!$: j k +HEADERS-NOT: {{.}} + +ERR: warning: could not parse debuginfod header: [[HEADER_FILE]]:3 +ERR-NEXT: warning: could not parse debuginfod header: [[HEADER_FILE]]:4 +ERR-NEXT: warning: could not parse debuginfod header: [[HEADER_FILE]]:5 +ERR-NEXT: warning: could not parse debuginfod header: [[HEADER_FILE]]:6 diff --git a/llvm/test/tools/llvm-diff/loop.ll b/llvm/test/tools/llvm-diff/loop.ll new file mode 100644 index 0000000000000..8c50bc616d5cf --- /dev/null +++ b/llvm/test/tools/llvm-diff/loop.ll @@ -0,0 +1,49 @@ +; Diff file with itself +; Due to a current limitation in llvm-diff, a diff is reported here. +; RUN: not llvm-diff %s %s 2>&1 | FileCheck --check-prefix=SAME-FILE %s + +; Replace %newvar1 with %newvar2 in the phi node. This can only +; be detected to be different once BB1 has been processed. +; RUN: rm -f %t.ll +; RUN: cat %s | sed -e 's/ %newvar1, %BB1 / %newvar2, %BB1 /' > %t.ll +; RUN: not llvm-diff %s %t.ll 2>&1 | FileCheck --check-prefix DIFFERENT-VAR %s + +; SAME-FILE: in function func: +; SAME-FILE-NEXT: in block %BB0: +; SAME-FILE-NEXT: > %var = phi i32 [ 0, %ENTRY ], [ %newvar1, %BB1 ] +; SAME-FILE-NEXT: > %cnd = icmp eq i32 %var, 0 +; SAME-FILE-NEXT: > br i1 %cnd, label %BB1, label %END +; SAME-FILE-NEXT: < %var = phi i32 [ 0, %ENTRY ], [ %newvar1, %BB1 ] +; SAME-FILE-NEXT: < %cnd = icmp eq i32 %var, 0 +; SAME-FILE-NEXT: < br i1 %cnd, label %BB1, label %END + +; DIFFERENT-VAR: in function func: +; DIFFERENT-VAR-NEXT: in block %BB0: +; DIFFERENT-VAR-NEXT: > %var = phi i32 [ 0, %ENTRY ], [ %newvar2, %BB1 ] +; DIFFERENT-VAR-NEXT: > %cnd = icmp eq i32 %var, 0 +; DIFFERENT-VAR-NEXT: > br i1 %cnd, label %BB1, label %END +; DIFFERENT-VAR-NEXT: < %var = phi i32 [ 0, %ENTRY ], [ %newvar1, %BB1 ] +; DIFFERENT-VAR-NEXT: < %cnd = icmp eq i32 %var, 0 +; DIFFERENT-VAR-NEXT: < br i1 %cnd, label %BB1, label %END +define i32 @func() { +ENTRY: + br label %BB0 + +BB0: + ; When diffing this phi node, we need to detect whether + ; %newvar1 is equivalent, which is not known until BB1 has been processed. + %var = phi i32 [ 0, %ENTRY ], [ %newvar1, %BB1 ] + %cnd = icmp eq i32 %var, 0 + br i1 %cnd, label %BB1, label %END + +BB1: + %newvar1 = add i32 %var, 1 + %newvar2 = add i32 %var, 2 + br label %BB0 + +END: + ; Equivalence of the ret depends on equivalence of %var. + ; Even if %var differs, we do not report a diff here, because + ; this is an indirect diff caused by another diff. + ret i32 %var +} diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse1.s index 39a99e8a12408..291b8cd43cb4e 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse1.s @@ -218,14 +218,14 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 3 10 1.00 * cvtsi2ssq (%rax), %xmm2 # CHECK-NEXT: 2 5 1.00 cvtss2si %xmm0, %ecx # CHECK-NEXT: 2 5 1.00 cvtss2si %xmm0, %rcx -# CHECK-NEXT: 3 9 1.00 * cvtss2si (%rax), %ecx -# CHECK-NEXT: 3 9 1.00 * cvtss2si (%rax), %rcx +# CHECK-NEXT: 3 10 1.00 * cvtss2si (%rax), %ecx +# CHECK-NEXT: 3 10 1.00 * cvtss2si (%rax), %rcx # CHECK-NEXT: 1 3 1.00 cvttps2pi %xmm0, %mm2 # CHECK-NEXT: 2 9 1.00 * cvttps2pi (%rax), %mm2 # CHECK-NEXT: 2 5 1.00 cvttss2si %xmm0, %ecx # CHECK-NEXT: 2 5 1.00 cvttss2si %xmm0, %rcx -# CHECK-NEXT: 3 9 1.00 * cvttss2si (%rax), %ecx -# CHECK-NEXT: 3 9 1.00 * cvttss2si (%rax), %rcx +# CHECK-NEXT: 3 10 1.00 * cvttss2si (%rax), %ecx +# CHECK-NEXT: 3 10 1.00 * cvttss2si (%rax), %rcx # CHECK-NEXT: 1 14 14.00 divps %xmm0, %xmm2 # CHECK-NEXT: 2 20 14.00 * divps (%rax), %xmm2 # CHECK-NEXT: 1 14 14.00 divss %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s index 7a8d4b03a9356..904454a547077 100644 --- a/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s @@ -440,8 +440,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 7 1.00 * cvtps2pd (%rax), %xmm2 # CHECK-NEXT: 2 5 1.00 cvtsd2si %xmm0, %ecx # CHECK-NEXT: 2 5 1.00 cvtsd2si %xmm0, %rcx -# CHECK-NEXT: 3 9 1.00 * cvtsd2si (%rax), %ecx -# CHECK-NEXT: 3 9 1.00 * cvtsd2si (%rax), %rcx +# CHECK-NEXT: 3 10 1.00 * cvtsd2si (%rax), %ecx +# CHECK-NEXT: 3 10 1.00 * cvtsd2si (%rax), %rcx # CHECK-NEXT: 2 4 1.00 cvtsd2ss %xmm0, %xmm2 # CHECK-NEXT: 3 10 1.00 * cvtsd2ss (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvtsi2sd %ecx, %xmm2 @@ -458,8 +458,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 9 1.00 * cvttps2dq (%rax), %xmm2 # CHECK-NEXT: 2 5 1.00 cvttsd2si %xmm0, %ecx # CHECK-NEXT: 2 5 1.00 cvttsd2si %xmm0, %rcx -# CHECK-NEXT: 3 9 1.00 * cvttsd2si (%rax), %ecx -# CHECK-NEXT: 3 9 1.00 * cvttsd2si (%rax), %rcx +# CHECK-NEXT: 3 10 1.00 * cvttsd2si (%rax), %ecx +# CHECK-NEXT: 3 10 1.00 * cvttsd2si (%rax), %rcx # CHECK-NEXT: 1 22 22.00 divpd %xmm0, %xmm2 # CHECK-NEXT: 2 28 22.00 * divpd (%rax), %xmm2 # CHECK-NEXT: 1 22 22.00 divsd %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/resources-x86_64.s b/llvm/test/tools/llvm-mca/X86/BdVer2/resources-x86_64.s index b60a873b59d4c..20828a798b297 100644 --- a/llvm/test/tools/llvm-mca/X86/BdVer2/resources-x86_64.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/resources-x86_64.s @@ -1396,26 +1396,26 @@ xorq (%rax), %rdi # CHECK-NEXT: 1 100 0.50 U movsq (%rsi), %es:(%rdi) # CHECK-NEXT: 1 1 1.00 movsbw %al, %di # CHECK-NEXT: 1 1 1.00 movzbw %al, %di -# CHECK-NEXT: 1 5 1.50 * movsbw (%rax), %di -# CHECK-NEXT: 1 5 1.50 * movzbw (%rax), %di +# CHECK-NEXT: 1 5 1.00 * movsbw (%rax), %di +# CHECK-NEXT: 1 5 1.00 * movzbw (%rax), %di # CHECK-NEXT: 1 1 1.00 movsbl %al, %edi # CHECK-NEXT: 1 1 1.00 movzbl %al, %edi -# CHECK-NEXT: 1 5 1.50 * movsbl (%rax), %edi -# CHECK-NEXT: 1 5 1.50 * movzbl (%rax), %edi +# CHECK-NEXT: 1 5 1.00 * movsbl (%rax), %edi +# CHECK-NEXT: 1 5 1.00 * movzbl (%rax), %edi # CHECK-NEXT: 1 1 1.00 movsbq %al, %rdi # CHECK-NEXT: 1 1 1.00 movzbq %al, %rdi -# CHECK-NEXT: 1 5 1.50 * movsbq (%rax), %rdi -# CHECK-NEXT: 1 5 1.50 * movzbq (%rax), %rdi +# CHECK-NEXT: 1 5 1.00 * movsbq (%rax), %rdi +# CHECK-NEXT: 1 5 1.00 * movzbq (%rax), %rdi # CHECK-NEXT: 1 1 1.00 movswl %ax, %edi # CHECK-NEXT: 1 1 1.00 movzwl %ax, %edi -# CHECK-NEXT: 1 5 1.50 * movswl (%rax), %edi -# CHECK-NEXT: 1 5 1.50 * movzwl (%rax), %edi +# CHECK-NEXT: 1 5 1.00 * movswl (%rax), %edi +# CHECK-NEXT: 1 5 1.00 * movzwl (%rax), %edi # CHECK-NEXT: 1 1 1.00 movswq %ax, %rdi # CHECK-NEXT: 1 1 1.00 movzwq %ax, %rdi -# CHECK-NEXT: 1 5 1.50 * movswq (%rax), %rdi -# CHECK-NEXT: 1 5 1.50 * movzwq (%rax), %rdi +# CHECK-NEXT: 1 5 1.00 * movswq (%rax), %rdi +# CHECK-NEXT: 1 5 1.00 * movzwq (%rax), %rdi # CHECK-NEXT: 1 1 1.00 movslq %eax, %rdi -# CHECK-NEXT: 1 5 1.50 * movslq (%rax), %rdi +# CHECK-NEXT: 1 5 1.00 * movslq (%rax), %rdi # CHECK-NEXT: 1 4 4.00 mulb %dil # CHECK-NEXT: 1 8 4.00 * mulb (%rax) # CHECK-NEXT: 2 4 5.00 mulw %si @@ -1968,7 +1968,7 @@ xorq (%rax), %rdi # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] -# CHECK-NEXT: 769.50 769.50 - - 246.00 1815.50 2220.50 - - - - - - - - - - - - 616.50 616.50 136.00 306.00 +# CHECK-NEXT: 764.00 764.00 - - 246.00 1804.50 2209.50 - - - - - - - - - - - - 611.00 611.00 136.00 306.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions: @@ -2338,26 +2338,26 @@ xorq (%rax), %rdi # CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - movsq (%rsi), %es:(%rdi) # CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - movsbw %al, %di # CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - movzbw %al, %di -# CHECK-NEXT: 1.50 1.50 - - - 1.00 1.00 - - - - - - - - - - - - 1.50 1.50 - - movsbw (%rax), %di -# CHECK-NEXT: 1.50 1.50 - - - 1.00 1.00 - - - - - - - - - - - - 1.50 1.50 - - movzbw (%rax), %di +# CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - - - - - - - 1.00 1.00 - - movsbw (%rax), %di +# CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - - - - - - - 1.00 1.00 - - movzbw (%rax), %di # CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - movsbl %al, %edi # CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - movzbl %al, %edi -# CHECK-NEXT: 1.50 1.50 - - - 1.00 1.00 - - - - - - - - - - - - 1.50 1.50 - - movsbl (%rax), %edi -# CHECK-NEXT: 1.50 1.50 - - - 1.00 1.00 - - - - - - - - - - - - 1.50 1.50 - - movzbl (%rax), %edi +# CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - - - - - - - 1.00 1.00 - - movsbl (%rax), %edi +# CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - - - - - - - 1.00 1.00 - - movzbl (%rax), %edi # CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - movsbq %al, %rdi # CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - movzbq %al, %rdi -# CHECK-NEXT: 1.50 1.50 - - - 1.00 1.00 - - - - - - - - - - - - 1.50 1.50 - - movsbq (%rax), %rdi -# CHECK-NEXT: 1.50 1.50 - - - 1.00 1.00 - - - - - - - - - - - - 1.50 1.50 - - movzbq (%rax), %rdi +# CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - - - - - - - 1.00 1.00 - - movsbq (%rax), %rdi +# CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - - - - - - - 1.00 1.00 - - movzbq (%rax), %rdi # CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - movswl %ax, %edi # CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - movzwl %ax, %edi -# CHECK-NEXT: 1.50 1.50 - - - 1.00 1.00 - - - - - - - - - - - - 1.50 1.50 - - movswl (%rax), %edi -# CHECK-NEXT: 1.50 1.50 - - - 1.00 1.00 - - - - - - - - - - - - 1.50 1.50 - - movzwl (%rax), %edi +# CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - - - - - - - 1.00 1.00 - - movswl (%rax), %edi +# CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - - - - - - - 1.00 1.00 - - movzwl (%rax), %edi # CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - movswq %ax, %rdi # CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - movzwq %ax, %rdi -# CHECK-NEXT: 1.50 1.50 - - - 1.00 1.00 - - - - - - - - - - - - 1.50 1.50 - - movswq (%rax), %rdi -# CHECK-NEXT: 1.50 1.50 - - - 1.00 1.00 - - - - - - - - - - - - 1.50 1.50 - - movzwq (%rax), %rdi +# CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - - - - - - - 1.00 1.00 - - movswq (%rax), %rdi +# CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - - - - - - - 1.00 1.00 - - movzwq (%rax), %rdi # CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - movslq %eax, %rdi -# CHECK-NEXT: 1.50 1.50 - - - 1.00 1.00 - - - - - - - - - - - - 1.50 1.50 - - movslq (%rax), %rdi +# CHECK-NEXT: 1.00 1.00 - - - - - - - - - - - - - - - - - 1.00 1.00 - - movslq (%rax), %rdi # CHECK-NEXT: - - - - - - 1.00 - - - - - - - - - - - - - - 4.00 - mulb %dil # CHECK-NEXT: 1.50 1.50 - - - - 1.00 - - - - - - - - - - - - 1.50 1.50 4.00 - mulb (%rax) # CHECK-NEXT: - - - - - - 1.00 - - - - - - - - - - - - - - 5.00 - mulw %si diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s index bc98992d516bc..27c6120d84987 100644 --- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s @@ -1123,13 +1123,13 @@ vzeroupper # CHECK-NEXT: 1 3 1.00 vcvtdq2ps %ymm0, %ymm2 # CHECK-NEXT: 2 9 1.00 * vcvtdq2ps (%rax), %ymm2 # CHECK-NEXT: 2 4 1.00 vcvtpd2dq %xmm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * vcvtpd2dqx (%rax), %xmm2 +# CHECK-NEXT: 3 9 1.00 * vcvtpd2dqx (%rax), %xmm2 # CHECK-NEXT: 2 6 1.00 vcvtpd2dq %ymm0, %xmm2 # CHECK-NEXT: 2 8 1.00 * vcvtpd2dqy (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 vcvtpd2ps %xmm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * vcvtpd2psx (%rax), %xmm2 +# CHECK-NEXT: 3 9 1.00 * vcvtpd2psx (%rax), %xmm2 # CHECK-NEXT: 2 6 1.00 vcvtpd2ps %ymm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * vcvtpd2psy (%rax), %xmm2 +# CHECK-NEXT: 3 12 1.00 * vcvtpd2psy (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 vcvtps2dq %xmm0, %xmm2 # CHECK-NEXT: 2 8 1.00 * vcvtps2dq (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 vcvtps2dq %ymm0, %ymm2 @@ -1159,7 +1159,7 @@ vzeroupper # CHECK-NEXT: 3 9 1.00 * vcvtss2si (%rax), %ecx # CHECK-NEXT: 3 9 1.00 * vcvtss2si (%rax), %rcx # CHECK-NEXT: 2 4 1.00 vcvttpd2dq %xmm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * vcvttpd2dqx (%rax), %xmm2 +# CHECK-NEXT: 3 9 1.00 * vcvttpd2dqx (%rax), %xmm2 # CHECK-NEXT: 2 6 1.00 vcvttpd2dq %ymm0, %xmm2 # CHECK-NEXT: 2 8 1.00 * vcvttpd2dqy (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 vcvttps2dq %xmm0, %xmm2 @@ -1736,7 +1736,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 257.00 215.25 235.25 176.17 176.17 38.00 426.25 2.25 12.67 +# CHECK-NEXT: - 257.00 215.25 235.25 176.17 176.17 38.00 430.25 2.25 12.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -1833,13 +1833,13 @@ vzeroupper # CHECK-NEXT: - - - 1.00 - - - - - - vcvtdq2ps %ymm0, %ymm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtdq2ps (%rax), %ymm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtpd2dq %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2dqx (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtpd2dqx (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtpd2dq %ymm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2dqy (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtpd2ps %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2psx (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtpd2psx (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtpd2ps %ymm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2psy (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtpd2psy (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - vcvtps2dq %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtps2dq (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - vcvtps2dq %ymm0, %ymm2 @@ -1869,7 +1869,7 @@ vzeroupper # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvtss2si (%rax), %ecx # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvtss2si (%rax), %rcx # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvttpd2dq %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvttpd2dqx (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvttpd2dqx (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvttpd2dq %ymm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvttpd2dqy (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - vcvttps2dq %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-x86_64.s b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-x86_64.s index b0c89f017731d..6d750008119ad 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-x86_64.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-x86_64.s @@ -1396,26 +1396,26 @@ xorq (%rax), %rdi # CHECK-NEXT: 1 100 0.50 U movsq (%rsi), %es:(%rdi) # CHECK-NEXT: 1 1 0.50 movsbw %al, %di # CHECK-NEXT: 1 1 0.50 movzbw %al, %di -# CHECK-NEXT: 1 4 1.00 * movsbw (%rax), %di -# CHECK-NEXT: 1 4 1.00 * movzbw (%rax), %di +# CHECK-NEXT: 1 3 1.00 * movsbw (%rax), %di +# CHECK-NEXT: 1 3 1.00 * movzbw (%rax), %di # CHECK-NEXT: 1 1 0.50 movsbl %al, %edi # CHECK-NEXT: 1 1 0.50 movzbl %al, %edi -# CHECK-NEXT: 1 4 1.00 * movsbl (%rax), %edi -# CHECK-NEXT: 1 4 1.00 * movzbl (%rax), %edi +# CHECK-NEXT: 1 3 1.00 * movsbl (%rax), %edi +# CHECK-NEXT: 1 3 1.00 * movzbl (%rax), %edi # CHECK-NEXT: 1 1 0.50 movsbq %al, %rdi # CHECK-NEXT: 1 1 0.50 movzbq %al, %rdi -# CHECK-NEXT: 1 4 1.00 * movsbq (%rax), %rdi -# CHECK-NEXT: 1 4 1.00 * movzbq (%rax), %rdi +# CHECK-NEXT: 1 3 1.00 * movsbq (%rax), %rdi +# CHECK-NEXT: 1 3 1.00 * movzbq (%rax), %rdi # CHECK-NEXT: 1 1 0.50 movswl %ax, %edi # CHECK-NEXT: 1 1 0.50 movzwl %ax, %edi -# CHECK-NEXT: 1 4 1.00 * movswl (%rax), %edi -# CHECK-NEXT: 1 4 1.00 * movzwl (%rax), %edi +# CHECK-NEXT: 1 3 1.00 * movswl (%rax), %edi +# CHECK-NEXT: 1 3 1.00 * movzwl (%rax), %edi # CHECK-NEXT: 1 1 0.50 movswq %ax, %rdi # CHECK-NEXT: 1 1 0.50 movzwq %ax, %rdi -# CHECK-NEXT: 1 4 1.00 * movswq (%rax), %rdi -# CHECK-NEXT: 1 4 1.00 * movzwq (%rax), %rdi +# CHECK-NEXT: 1 3 1.00 * movswq (%rax), %rdi +# CHECK-NEXT: 1 3 1.00 * movzwq (%rax), %rdi # CHECK-NEXT: 1 1 0.50 movslq %eax, %rdi -# CHECK-NEXT: 1 4 1.00 * movslq (%rax), %rdi +# CHECK-NEXT: 1 3 1.00 * movslq (%rax), %rdi # CHECK-NEXT: 1 3 1.00 mulb %dil # CHECK-NEXT: 1 6 1.00 * mulb (%rax) # CHECK-NEXT: 3 3 3.00 mulw %si @@ -1959,7 +1959,7 @@ xorq (%rax), %rdi # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] -# CHECK-NEXT: 722.50 772.50 380.00 - - - - 992.00 80.00 893.00 - - - - +# CHECK-NEXT: 717.00 767.00 380.00 - - - - 992.00 80.00 893.00 - - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: @@ -2329,26 +2329,26 @@ xorq (%rax), %rdi # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movsq (%rsi), %es:(%rdi) # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movsbw %al, %di # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movzbw %al, %di -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - - - movsbw (%rax), %di -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - - - movzbw (%rax), %di +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - movsbw (%rax), %di +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - movzbw (%rax), %di # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movsbl %al, %edi # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movzbl %al, %edi -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - - - movsbl (%rax), %edi -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - - - movzbl (%rax), %edi +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - movsbl (%rax), %edi +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - movzbl (%rax), %edi # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movsbq %al, %rdi # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movzbq %al, %rdi -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - - - movsbq (%rax), %rdi -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - - - movzbq (%rax), %rdi +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - movsbq (%rax), %rdi +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - movzbq (%rax), %rdi # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movswl %ax, %edi # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movzwl %ax, %edi -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - - - movswl (%rax), %edi -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - - - movzwl (%rax), %edi +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - movswl (%rax), %edi +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - movzwl (%rax), %edi # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movswq %ax, %rdi # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movzwq %ax, %rdi -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - - - movswq (%rax), %rdi -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - - - movzwq (%rax), %rdi +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - movswq (%rax), %rdi +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - movzwq (%rax), %rdi # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movslq %eax, %rdi -# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - - - movslq (%rax), %rdi +# CHECK-NEXT: - - - - - - - 1.00 - - - - - - movslq (%rax), %rdi # CHECK-NEXT: - 1.00 - - - - - - 1.00 - - - - - mulb %dil # CHECK-NEXT: - 1.00 - - - - - 1.00 1.00 - - - - - mulb (%rax) # CHECK-NEXT: - 1.00 - - - - - - 3.00 - - - - - mulw %si diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s index e500f8a91f6a3..9282df15b5f0b 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s @@ -127,6 +127,56 @@ vcvtdq2ps %zmm16, %zmm19 {z}{k1} vcvtdq2ps (%rax), %zmm19 {z}{k1} vcvtdq2ps (%rax){1to16}, %zmm19 {z}{k1} +vcvtpd2dq %zmm16, %ymm19 +vcvtpd2dq (%rax), %ymm19 +vcvtpd2dq (%rax){1to8}, %ymm19 +vcvtpd2dq %zmm16, %ymm19 {k1} +vcvtpd2dq (%rax), %ymm19 {k1} +vcvtpd2dq (%rax){1to8}, %ymm19 {k1} +vcvtpd2dq %zmm16, %ymm19 {z}{k1} +vcvtpd2dq (%rax), %ymm19 {z}{k1} +vcvtpd2dq (%rax){1to8}, %ymm19 {z}{k1} + +vcvtpd2udq %zmm16, %ymm19 +vcvtpd2udq (%rax), %ymm19 +vcvtpd2udq (%rax){1to8}, %ymm19 +vcvtpd2udq %zmm16, %ymm19 {k1} +vcvtpd2udq (%rax), %ymm19 {k1} +vcvtpd2udq (%rax){1to8}, %ymm19 {k1} +vcvtpd2udq %zmm16, %ymm19 {z}{k1} +vcvtpd2udq (%rax), %ymm19 {z}{k1} +vcvtpd2udq (%rax){1to8}, %ymm19 {z}{k1} + +vcvttpd2dq %zmm16, %ymm19 +vcvttpd2dq (%rax), %ymm19 +vcvttpd2dq (%rax){1to8}, %ymm19 +vcvttpd2dq %zmm16, %ymm19 {k1} +vcvttpd2dq (%rax), %ymm19 {k1} +vcvttpd2dq (%rax){1to8}, %ymm19 {k1} +vcvttpd2dq %zmm16, %ymm19 {z}{k1} +vcvttpd2dq (%rax), %ymm19 {z}{k1} +vcvttpd2dq (%rax){1to8}, %ymm19 {z}{k1} + +vcvttpd2udq %zmm16, %ymm19 +vcvttpd2udq (%rax), %ymm19 +vcvttpd2udq (%rax){1to8}, %ymm19 +vcvttpd2udq %zmm16, %ymm19 {k1} +vcvttpd2udq (%rax), %ymm19 {k1} +vcvttpd2udq (%rax){1to8}, %ymm19 {k1} +vcvttpd2udq %zmm16, %ymm19 {z}{k1} +vcvttpd2udq (%rax), %ymm19 {z}{k1} +vcvttpd2udq (%rax){1to8}, %ymm19 {z}{k1} + +vcvtpd2ps %zmm16, %ymm19 +vcvtpd2ps (%rax), %ymm19 +vcvtpd2ps (%rax){1to8}, %ymm19 +vcvtpd2ps %zmm16, %ymm19 {k1} +vcvtpd2ps (%rax), %ymm19 {k1} +vcvtpd2ps (%rax){1to8}, %ymm19 {k1} +vcvtpd2ps %zmm16, %ymm19 {z}{k1} +vcvtpd2ps (%rax), %ymm19 {z}{k1} +vcvtpd2ps (%rax){1to8}, %ymm19 {z}{k1} + vcvtps2dq %zmm16, %zmm19 vcvtps2dq (%rax), %zmm19 vcvtps2dq (%rax){1to16}, %zmm19 @@ -147,6 +197,56 @@ vcvttps2dq %zmm16, %zmm19 {z}{k1} vcvttps2dq (%rax), %zmm19 {z}{k1} vcvttps2dq (%rax){1to16}, %zmm19 {z}{k1} +vcvtps2pd %ymm16, %zmm19 +vcvtps2pd (%rax), %zmm19 +vcvtps2pd (%rax){1to8}, %zmm19 +vcvtps2pd %ymm16, %zmm19 {k1} +vcvtps2pd (%rax), %zmm19 {k1} +vcvtps2pd (%rax){1to8}, %zmm19 {k1} +vcvtps2pd %ymm16, %zmm19 {z}{k1} +vcvtps2pd (%rax), %zmm19 {z}{k1} +vcvtps2pd (%rax){1to8}, %zmm19 {z}{k1} + +vcvtps2udq %zmm16, %zmm19 +vcvtps2udq (%rax), %zmm19 +vcvtps2udq (%rax){1to16}, %zmm19 +vcvtps2udq %zmm16, %zmm19 {k1} +vcvtps2udq (%rax), %zmm19 {k1} +vcvtps2udq (%rax){1to16}, %zmm19 {k1} +vcvtps2udq %zmm16, %zmm19 {z}{k1} +vcvtps2udq (%rax), %zmm19 {z}{k1} +vcvtps2udq (%rax){1to16}, %zmm19 {z}{k1} + +vcvttps2udq %zmm16, %zmm19 +vcvttps2udq (%rax), %zmm19 +vcvttps2udq (%rax){1to16}, %zmm19 +vcvttps2udq %zmm16, %zmm19 {k1} +vcvttps2udq (%rax), %zmm19 {k1} +vcvttps2udq (%rax){1to16}, %zmm19 {k1} +vcvttps2udq %zmm16, %zmm19 {z}{k1} +vcvttps2udq (%rax), %zmm19 {z}{k1} +vcvttps2udq (%rax){1to16}, %zmm19 {z}{k1} + +vcvtudq2pd %ymm16, %zmm19 +vcvtudq2pd (%rax), %zmm19 +vcvtudq2pd (%rax){1to8}, %zmm19 +vcvtudq2pd %ymm16, %zmm19 {k1} +vcvtudq2pd (%rax), %zmm19 {k1} +vcvtudq2pd (%rax){1to8}, %zmm19 {k1} +vcvtudq2pd %ymm16, %zmm19 {z}{k1} +vcvtudq2pd (%rax), %zmm19 {z}{k1} +vcvtudq2pd (%rax){1to8}, %zmm19 {z}{k1} + +vcvtudq2ps %zmm16, %zmm19 +vcvtudq2ps (%rax), %zmm19 +vcvtudq2ps (%rax){1to16}, %zmm19 +vcvtudq2ps %zmm16, %zmm19 {k1} +vcvtudq2ps (%rax), %zmm19 {k1} +vcvtudq2ps (%rax){1to16}, %zmm19 {k1} +vcvtudq2ps %zmm16, %zmm19 {z}{k1} +vcvtudq2ps (%rax), %zmm19 {z}{k1} +vcvtudq2ps (%rax){1to16}, %zmm19 {z}{k1} + vdivpd %zmm16, %zmm17, %zmm19 vdivpd (%rax), %zmm17, %zmm19 vdivpd (%rax){1to8}, %zmm17, %zmm19 @@ -992,6 +1092,51 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1 3 1.00 vcvtdq2ps %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 10 1.00 * vcvtdq2ps (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 2 10 1.00 * vcvtdq2ps (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vcvtpd2dq %zmm16, %ymm19 +# CHECK-NEXT: 3 11 1.00 * vcvtpd2dq (%rax), %ymm19 +# CHECK-NEXT: 3 11 1.00 * vcvtpd2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 2 4 1.00 vcvtpd2dq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 2 4 1.00 vcvtpd2dq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vcvtpd2udq %zmm16, %ymm19 +# CHECK-NEXT: 3 11 1.00 * vcvtpd2udq (%rax), %ymm19 +# CHECK-NEXT: 3 11 1.00 * vcvtpd2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 2 4 1.00 vcvtpd2udq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 2 4 1.00 vcvtpd2udq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vcvttpd2dq %zmm16, %ymm19 +# CHECK-NEXT: 3 11 1.00 * vcvttpd2dq (%rax), %ymm19 +# CHECK-NEXT: 3 11 1.00 * vcvttpd2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 2 4 1.00 vcvttpd2dq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvttpd2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvttpd2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 2 4 1.00 vcvttpd2dq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvttpd2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvttpd2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vcvttpd2udq %zmm16, %ymm19 +# CHECK-NEXT: 3 11 1.00 * vcvttpd2udq (%rax), %ymm19 +# CHECK-NEXT: 3 11 1.00 * vcvttpd2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 2 4 1.00 vcvttpd2udq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvttpd2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvttpd2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 2 4 1.00 vcvttpd2udq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvttpd2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvttpd2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vcvtpd2ps %zmm16, %ymm19 +# CHECK-NEXT: 3 11 1.00 * vcvtpd2ps (%rax), %ymm19 +# CHECK-NEXT: 3 11 1.00 * vcvtpd2ps (%rax){1to8}, %ymm19 +# CHECK-NEXT: 2 4 1.00 vcvtpd2ps %zmm16, %ymm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2ps (%rax), %ymm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2ps (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 2 4 1.00 vcvtpd2ps %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2ps (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2ps (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 1 3 1.00 vcvtps2dq %zmm16, %zmm19 # CHECK-NEXT: 2 10 1.00 * vcvtps2dq (%rax), %zmm19 # CHECK-NEXT: 2 10 1.00 * vcvtps2dq (%rax){1to16}, %zmm19 @@ -1010,6 +1155,51 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1 3 1.00 vcvttps2dq %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 10 1.00 * vcvttps2dq (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 2 10 1.00 * vcvttps2dq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 2 1.00 vcvtps2pd %ymm16, %zmm19 +# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %zmm19 +# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: 2 2 1.00 vcvtps2pd %ymm16, %zmm19 {%k1} +# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 2 2 1.00 vcvtps2pd %ymm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vcvtps2udq %zmm16, %zmm19 +# CHECK-NEXT: 2 10 1.00 * vcvtps2udq (%rax), %zmm19 +# CHECK-NEXT: 2 10 1.00 * vcvtps2udq (%rax){1to16}, %zmm19 +# CHECK-NEXT: 1 3 1.00 vcvtps2udq %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 2 10 1.00 * vcvtps2udq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 10 1.00 * vcvtps2udq (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 1 3 1.00 vcvtps2udq %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 10 1.00 * vcvtps2udq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 2 10 1.00 * vcvtps2udq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vcvttps2udq %zmm16, %zmm19 +# CHECK-NEXT: 2 10 1.00 * vcvttps2udq (%rax), %zmm19 +# CHECK-NEXT: 2 10 1.00 * vcvttps2udq (%rax){1to16}, %zmm19 +# CHECK-NEXT: 1 3 1.00 vcvttps2udq %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 2 10 1.00 * vcvttps2udq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 10 1.00 * vcvttps2udq (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 1 3 1.00 vcvttps2udq %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 10 1.00 * vcvttps2udq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 2 10 1.00 * vcvttps2udq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vcvtudq2pd %ymm16, %zmm19 +# CHECK-NEXT: 3 10 1.00 * vcvtudq2pd (%rax), %zmm19 +# CHECK-NEXT: 3 10 1.00 * vcvtudq2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: 2 4 1.00 vcvtudq2pd %ymm16, %zmm19 {%k1} +# CHECK-NEXT: 3 10 1.00 * vcvtudq2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 3 10 1.00 * vcvtudq2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 2 4 1.00 vcvtudq2pd %ymm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 3 10 1.00 * vcvtudq2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 3 10 1.00 * vcvtudq2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vcvtudq2ps %zmm16, %zmm19 +# CHECK-NEXT: 2 10 1.00 * vcvtudq2ps (%rax), %zmm19 +# CHECK-NEXT: 2 10 1.00 * vcvtudq2ps (%rax){1to16}, %zmm19 +# CHECK-NEXT: 1 3 1.00 vcvtudq2ps %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 2 10 1.00 * vcvtudq2ps (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 10 1.00 * vcvtudq2ps (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 1 3 1.00 vcvtudq2ps %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 10 1.00 * vcvtudq2ps (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 2 10 1.00 * vcvtudq2ps (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: 3 45 44.00 vdivpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: 4 52 44.00 * vdivpd (%rax), %zmm17, %zmm19 # CHECK-NEXT: 4 52 44.00 * vdivpd (%rax){1to8}, %zmm17, %zmm19 @@ -1667,7 +1857,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - 1506.00 144.67 201.67 16.00 456.67 245.50 245.50 +# CHECK-NEXT: - 1506.00 153.67 282.67 16.00 513.67 275.50 275.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -1779,6 +1969,51 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - - 1.00 - - - - vcvtdq2ps %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtdq2ps (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtdq2ps (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2dq %zmm16, %ymm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2dq (%rax), %ymm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2dq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2dq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2udq %zmm16, %ymm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2udq (%rax), %ymm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2udq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2udq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvttpd2dq %zmm16, %ymm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2dq (%rax), %ymm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvttpd2dq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvttpd2dq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvttpd2udq %zmm16, %ymm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2udq (%rax), %ymm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvttpd2udq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvttpd2udq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2ps %zmm16, %ymm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2ps (%rax), %ymm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2ps (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2ps %zmm16, %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2ps (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2ps (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2ps %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2ps (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2ps (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: - - - 1.00 - - - - vcvtps2dq %zmm16, %zmm19 # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2dq (%rax), %zmm19 # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2dq (%rax){1to16}, %zmm19 @@ -1797,6 +2032,51 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - - 1.00 - - - - vcvttps2dq %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2dq (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2dq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %ymm16, %zmm19 +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %zmm19 +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %ymm16, %zmm19 {%k1} +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %ymm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - - - vcvtps2udq %zmm16, %zmm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax), %zmm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax){1to16}, %zmm19 +# CHECK-NEXT: - - - 1.00 - - - - vcvtps2udq %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - - - vcvtps2udq %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - - - vcvttps2udq %zmm16, %zmm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2udq (%rax), %zmm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2udq (%rax){1to16}, %zmm19 +# CHECK-NEXT: - - - 1.00 - - - - vcvttps2udq %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2udq (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2udq (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - - - vcvttps2udq %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2udq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2udq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtudq2pd %ymm16, %zmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtudq2pd (%rax), %zmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtudq2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtudq2pd %ymm16, %zmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtudq2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtudq2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtudq2pd %ymm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtudq2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtudq2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - - - vcvtudq2ps %zmm16, %zmm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtudq2ps (%rax), %zmm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtudq2ps (%rax){1to16}, %zmm19 +# CHECK-NEXT: - - - 1.00 - - - - vcvtudq2ps %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtudq2ps (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtudq2ps (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - - - vcvtudq2ps %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtudq2ps (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtudq2ps (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: - 44.00 2.50 - - 0.50 - - vdivpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: - 44.00 2.50 - - 0.50 0.50 0.50 vdivpd (%rax), %zmm17, %zmm19 # CHECK-NEXT: - 44.00 2.50 - - 0.50 0.50 0.50 vdivpd (%rax){1to8}, %zmm17, %zmm19 diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s index 20dc9e2fca618..84852a2a8b156 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s @@ -178,6 +178,206 @@ vcvtdq2ps %ymm16, %ymm19 {z}{k1} vcvtdq2ps (%rax), %ymm19 {z}{k1} vcvtdq2ps (%rax){1to8}, %ymm19 {z}{k1} +vcvtpd2dqy %ymm16, %xmm19 +vcvtpd2dqy (%rax), %xmm19 +vcvtpd2dqy (%rax){1to4}, %xmm19 +vcvtpd2dqy %ymm16, %xmm19 {k1} +vcvtpd2dqy (%rax), %xmm19 {k1} +vcvtpd2dqy (%rax){1to4}, %xmm19 {k1} +vcvtpd2dqy %ymm16, %xmm19 {z}{k1} +vcvtpd2dqy (%rax), %xmm19 {z}{k1} +vcvtpd2dqy (%rax){1to4}, %xmm19 {z}{k1} + +vcvtpd2dqx %xmm16, %xmm19 +vcvtpd2dqx (%rax), %xmm19 +vcvtpd2dqx (%rax){1to2}, %xmm19 +vcvtpd2dqx %xmm16, %xmm19 {k1} +vcvtpd2dqx (%rax), %xmm19 {k1} +vcvtpd2dqx (%rax){1to2},%xmm19 {k1} +vcvtpd2dqx %xmm16, %xmm19 {z}{k1} +vcvtpd2dqx (%rax), %xmm19 {z}{k1} +vcvtpd2dqx (%rax){1to2}, %xmm19 {z}{k1} + +vcvtpd2psy %ymm16, %xmm19 +vcvtpd2psy (%rax), %xmm19 +vcvtpd2psy (%rax){1to4}, %xmm19 +vcvtpd2psy %ymm16, %xmm19 {k1} +vcvtpd2psy (%rax), %xmm19 {k1} +vcvtpd2psy (%rax){1to4}, %xmm19 {k1} +vcvtpd2psy %ymm16, %xmm19 {z}{k1} +vcvtpd2psy (%rax), %xmm19 {z}{k1} +vcvtpd2psy (%rax){1to4}, %xmm19 {z}{k1} + +vcvtpd2psx %xmm16, %xmm19 +vcvtpd2psx (%rax), %xmm19 +vcvtpd2psx (%rax){1to2}, %xmm19 +vcvtpd2psx %xmm16, %xmm19 {k1} +vcvtpd2psx (%rax), %xmm19 {k1} +vcvtpd2psx (%rax){1to2},%xmm19 {k1} +vcvtpd2psx %xmm16, %xmm19 {z}{k1} +vcvtpd2psx (%rax), %xmm19 {z}{k1} +vcvtpd2psx (%rax){1to2}, %xmm19 {z}{k1} + +vcvtpd2udqy %ymm16, %xmm19 +vcvtpd2udqy (%rax), %xmm19 +vcvtpd2udqy (%rax){1to4}, %xmm19 +vcvtpd2udqy %ymm16, %xmm19 {k1} +vcvtpd2udqy (%rax), %xmm19 {k1} +vcvtpd2udqy (%rax){1to4}, %xmm19 {k1} +vcvtpd2udqy %ymm16, %xmm19 {z}{k1} +vcvtpd2udqy (%rax), %xmm19 {z}{k1} +vcvtpd2udqy (%rax){1to4}, %xmm19 {z}{k1} + +vcvtpd2udqx %xmm16, %xmm19 +vcvtpd2udqx (%rax), %xmm19 +vcvtpd2udqx (%rax){1to2}, %xmm19 +vcvtpd2udqx %xmm16, %xmm19 {k1} +vcvtpd2udqx (%rax), %xmm19 {k1} +vcvtpd2udqx (%rax){1to2},%xmm19 {k1} +vcvtpd2udqx %xmm16, %xmm19 {z}{k1} +vcvtpd2udqx (%rax), %xmm19 {z}{k1} +vcvtpd2udqx (%rax){1to2}, %xmm19 {z}{k1} + +vcvtps2dq %xmm16, %xmm19 +vcvtps2dq (%rax), %xmm19 +vcvtps2dq (%rax){1to4}, %xmm19 +vcvtps2dq %xmm16, %xmm19 {k1} +vcvtps2dq (%rax), %xmm19 {k1} +vcvtps2dq (%rax){1to4},%xmm19 {k1} +vcvtps2dq %xmm16, %xmm19 {z}{k1} +vcvtps2dq (%rax), %xmm19 {z}{k1} +vcvtps2dq (%rax){1to4}, %xmm19 {z}{k1} + +vcvtps2dq %ymm16, %ymm19 +vcvtps2dq (%rax), %ymm19 +vcvtps2dq (%rax){1to8}, %ymm19 +vcvtps2dq %ymm16,%ymm19 {k1} +vcvtps2dq (%rax),%ymm19 {k1} +vcvtps2dq (%rax){1to8}, %ymm19 {k1} +vcvtps2dq %ymm16, %ymm19 {z}{k1} +vcvtps2dq (%rax), %ymm19 {z}{k1} +vcvtps2dq (%rax){1to8}, %ymm19 {z}{k1} + +vcvtps2pd %xmm16, %xmm19 +vcvtps2pd (%rax), %xmm19 +vcvtps2pd (%rax){1to2}, %xmm19 +vcvtps2pd %xmm16, %xmm19 {k1} +vcvtps2pd (%rax), %xmm19 {k1} +vcvtps2pd (%rax){1to2},%xmm19 {k1} +vcvtps2pd %xmm16, %xmm19 {z}{k1} +vcvtps2pd (%rax), %xmm19 {z}{k1} +vcvtps2pd (%rax){1to2}, %xmm19 {z}{k1} + +vcvtps2pd %xmm16, %ymm19 +vcvtps2pd (%rax), %ymm19 +vcvtps2pd (%rax){1to4}, %ymm19 +vcvtps2pd %xmm16,%ymm19 {k1} +vcvtps2pd (%rax),%ymm19 {k1} +vcvtps2pd (%rax){1to4}, %ymm19 {k1} +vcvtps2pd %xmm16, %ymm19 {z}{k1} +vcvtps2pd (%rax), %ymm19 {z}{k1} +vcvtps2pd (%rax){1to4}, %ymm19 {z}{k1} + +vcvtps2udq %xmm16, %xmm19 +vcvtps2udq (%rax), %xmm19 +vcvtps2udq (%rax){1to4}, %xmm19 +vcvtps2udq %xmm16, %xmm19 {k1} +vcvtps2udq (%rax), %xmm19 {k1} +vcvtps2udq (%rax){1to4},%xmm19 {k1} +vcvtps2udq %xmm16, %xmm19 {z}{k1} +vcvtps2udq (%rax), %xmm19 {z}{k1} +vcvtps2udq (%rax){1to4}, %xmm19 {z}{k1} + +vcvtps2udq %ymm16, %ymm19 +vcvtps2udq (%rax), %ymm19 +vcvtps2udq (%rax){1to8}, %ymm19 +vcvtps2udq %ymm16,%ymm19 {k1} +vcvtps2udq (%rax),%ymm19 {k1} +vcvtps2udq (%rax){1to8}, %ymm19 {k1} +vcvtps2udq %ymm16, %ymm19 {z}{k1} +vcvtps2udq (%rax), %ymm19 {z}{k1} +vcvtps2udq (%rax){1to8}, %ymm19 {z}{k1} + +vcvttpd2dqy %ymm16, %xmm19 +vcvttpd2dqy (%rax), %xmm19 +vcvttpd2dqy (%rax){1to4}, %xmm19 +vcvttpd2dqy %ymm16, %xmm19 {k1} +vcvttpd2dqy (%rax), %xmm19 {k1} +vcvttpd2dqy (%rax){1to4}, %xmm19 {k1} +vcvttpd2dqy %ymm16, %xmm19 {z}{k1} +vcvttpd2dqy (%rax), %xmm19 {z}{k1} +vcvttpd2dqy (%rax){1to4}, %xmm19 {z}{k1} + +vcvttpd2dqx %xmm16, %xmm19 +vcvttpd2dqx (%rax), %xmm19 +vcvttpd2dqx (%rax){1to2}, %xmm19 +vcvttpd2dqx %xmm16, %xmm19 {k1} +vcvttpd2dqx (%rax), %xmm19 {k1} +vcvttpd2dqx (%rax){1to2},%xmm19 {k1} +vcvttpd2dqx %xmm16, %xmm19 {z}{k1} +vcvttpd2dqx (%rax), %xmm19 {z}{k1} +vcvttpd2dqx (%rax){1to2}, %xmm19 {z}{k1} + +vcvttps2dq %xmm16, %xmm19 +vcvttps2dq (%rax), %xmm19 +vcvttps2dq (%rax){1to4}, %xmm19 +vcvttps2dq %xmm16, %xmm19 {k1} +vcvttps2dq (%rax), %xmm19 {k1} +vcvttps2dq (%rax){1to4},%xmm19 {k1} +vcvttps2dq %xmm16, %xmm19 {z}{k1} +vcvttps2dq (%rax), %xmm19 {z}{k1} +vcvttps2dq (%rax){1to4}, %xmm19 {z}{k1} + +vcvttps2dq %ymm16, %ymm19 +vcvttps2dq (%rax), %ymm19 +vcvttps2dq (%rax){1to8}, %ymm19 +vcvttps2dq %ymm16,%ymm19 {k1} +vcvttps2dq (%rax),%ymm19 {k1} +vcvttps2dq (%rax){1to8}, %ymm19 {k1} +vcvttps2dq %ymm16, %ymm19 {z}{k1} +vcvttps2dq (%rax), %ymm19 {z}{k1} +vcvttps2dq (%rax){1to8}, %ymm19 {z}{k1} + +vcvttpd2udqy %ymm16, %xmm19 +vcvttpd2udqy (%rax), %xmm19 +vcvttpd2udqy (%rax){1to4}, %xmm19 +vcvttpd2udqy %ymm16, %xmm19 {k1} +vcvttpd2udqy (%rax), %xmm19 {k1} +vcvttpd2udqy (%rax){1to4}, %xmm19 {k1} +vcvttpd2udqy %ymm16, %xmm19 {z}{k1} +vcvttpd2udqy (%rax), %xmm19 {z}{k1} +vcvttpd2udqy (%rax){1to4}, %xmm19 {z}{k1} + +vcvttpd2udqx %xmm16, %xmm19 +vcvttpd2udqx (%rax), %xmm19 +vcvttpd2udqx (%rax){1to2}, %xmm19 +vcvttpd2udqx %xmm16, %xmm19 {k1} +vcvttpd2udqx (%rax), %xmm19 {k1} +vcvttpd2udqx (%rax){1to2},%xmm19 {k1} +vcvttpd2udqx %xmm16, %xmm19 {z}{k1} +vcvttpd2udqx (%rax), %xmm19 {z}{k1} +vcvttpd2udqx (%rax){1to2}, %xmm19 {z}{k1} + +vcvttps2udq %xmm16, %xmm19 +vcvttps2udq (%rax), %xmm19 +vcvttps2udq (%rax){1to4}, %xmm19 +vcvttps2udq %xmm16, %xmm19 {k1} +vcvttps2udq (%rax), %xmm19 {k1} +vcvttps2udq (%rax){1to4},%xmm19 {k1} +vcvttps2udq %xmm16, %xmm19 {z}{k1} +vcvttps2udq (%rax), %xmm19 {z}{k1} +vcvttps2udq (%rax){1to4}, %xmm19 {z}{k1} + +vcvttps2udq %ymm16, %ymm19 +vcvttps2udq (%rax), %ymm19 +vcvttps2udq (%rax){1to8}, %ymm19 +vcvttps2udq %ymm16,%ymm19 {k1} +vcvttps2udq (%rax),%ymm19 {k1} +vcvttps2udq (%rax){1to8}, %ymm19 {k1} +vcvttps2udq %ymm16, %ymm19 {z}{k1} +vcvttps2udq (%rax), %ymm19 {z}{k1} +vcvttps2udq (%rax){1to8}, %ymm19 {z}{k1} + vdivpd %xmm16, %xmm17, %xmm19 vdivpd (%rax), %xmm17, %xmm19 vdivpd (%rax){1to2}, %xmm17, %xmm19 @@ -1545,6 +1745,186 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 1 3 1.00 vcvtdq2ps %ymm16, %ymm19 {%k1} {z} # CHECK-NEXT: 2 10 1.00 * vcvtdq2ps (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: 2 10 1.00 * vcvtdq2ps (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vcvtpd2dq %ymm16, %xmm19 +# CHECK-NEXT: 3 11 1.00 * vcvtpd2dqy (%rax), %xmm19 +# CHECK-NEXT: 3 11 1.00 * vcvtpd2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 2 4 1.00 vcvtpd2dq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2dqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 2 4 1.00 vcvtpd2dq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2dqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vcvtpd2dq %xmm16, %xmm19 +# CHECK-NEXT: 3 10 1.00 * vcvtpd2dqx (%rax), %xmm19 +# CHECK-NEXT: 3 10 1.00 * vcvtpd2dq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 2 4 1.00 vcvtpd2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 3 10 1.00 * vcvtpd2dqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 3 10 1.00 * vcvtpd2dq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 2 4 1.00 vcvtpd2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 3 10 1.00 * vcvtpd2dqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 3 10 1.00 * vcvtpd2dq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vcvtpd2ps %ymm16, %xmm19 +# CHECK-NEXT: 3 11 1.00 * vcvtpd2psy (%rax), %xmm19 +# CHECK-NEXT: 3 11 1.00 * vcvtpd2ps (%rax){1to4}, %xmm19 +# CHECK-NEXT: 2 4 1.00 vcvtpd2ps %ymm16, %xmm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2psy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2ps (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 2 4 1.00 vcvtpd2ps %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2psy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2ps (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vcvtpd2ps %xmm16, %xmm19 +# CHECK-NEXT: 3 10 1.00 * vcvtpd2psx (%rax), %xmm19 +# CHECK-NEXT: 3 10 1.00 * vcvtpd2ps (%rax){1to2}, %xmm19 +# CHECK-NEXT: 2 4 1.00 vcvtpd2ps %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 3 10 1.00 * vcvtpd2psx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 3 10 1.00 * vcvtpd2ps (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 2 4 1.00 vcvtpd2ps %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 3 10 1.00 * vcvtpd2psx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 3 10 1.00 * vcvtpd2ps (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vcvtpd2udq %ymm16, %xmm19 +# CHECK-NEXT: 3 11 1.00 * vcvtpd2udqy (%rax), %xmm19 +# CHECK-NEXT: 3 11 1.00 * vcvtpd2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 2 4 1.00 vcvtpd2udq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2udqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 2 4 1.00 vcvtpd2udq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2udqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vcvtpd2udq %xmm16, %xmm19 +# CHECK-NEXT: 3 10 1.00 * vcvtpd2udqx (%rax), %xmm19 +# CHECK-NEXT: 3 10 1.00 * vcvtpd2udq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 2 4 1.00 vcvtpd2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 3 10 1.00 * vcvtpd2udqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 3 10 1.00 * vcvtpd2udq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 2 4 1.00 vcvtpd2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 3 10 1.00 * vcvtpd2udqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 3 10 1.00 * vcvtpd2udq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vcvtps2dq %xmm16, %xmm19 +# CHECK-NEXT: 2 9 1.00 * vcvtps2dq (%rax), %xmm19 +# CHECK-NEXT: 2 9 1.00 * vcvtps2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 1 3 1.00 vcvtps2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 9 1.00 * vcvtps2dq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 9 1.00 * vcvtps2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 1 3 1.00 vcvtps2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 9 1.00 * vcvtps2dq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 9 1.00 * vcvtps2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vcvtps2dq %ymm16, %ymm19 +# CHECK-NEXT: 2 10 1.00 * vcvtps2dq (%rax), %ymm19 +# CHECK-NEXT: 2 10 1.00 * vcvtps2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 3 1.00 vcvtps2dq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 10 1.00 * vcvtps2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 10 1.00 * vcvtps2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 3 1.00 vcvtps2dq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 10 1.00 * vcvtps2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 2 10 1.00 * vcvtps2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm16, %xmm19 +# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %xmm19 +# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax){1to2}, %xmm19 +# CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm16, %ymm19 +# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %ymm19 +# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax){1to4}, %ymm19 +# CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm16, %ymm19 {%k1} +# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vcvtps2udq %xmm16, %xmm19 +# CHECK-NEXT: 2 9 1.00 * vcvtps2udq (%rax), %xmm19 +# CHECK-NEXT: 2 9 1.00 * vcvtps2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 1 3 1.00 vcvtps2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 9 1.00 * vcvtps2udq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 9 1.00 * vcvtps2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 1 3 1.00 vcvtps2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 9 1.00 * vcvtps2udq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 9 1.00 * vcvtps2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vcvtps2udq %ymm16, %ymm19 +# CHECK-NEXT: 2 10 1.00 * vcvtps2udq (%rax), %ymm19 +# CHECK-NEXT: 2 10 1.00 * vcvtps2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 3 1.00 vcvtps2udq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 10 1.00 * vcvtps2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 10 1.00 * vcvtps2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 3 1.00 vcvtps2udq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 10 1.00 * vcvtps2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 2 10 1.00 * vcvtps2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vcvttpd2dq %ymm16, %xmm19 +# CHECK-NEXT: 3 11 1.00 * vcvttpd2dqy (%rax), %xmm19 +# CHECK-NEXT: 3 11 1.00 * vcvttpd2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 2 4 1.00 vcvttpd2dq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvttpd2dqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvttpd2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 2 4 1.00 vcvttpd2dq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvttpd2dqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvttpd2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vcvttpd2dq %xmm16, %xmm19 +# CHECK-NEXT: 3 10 1.00 * vcvttpd2dqx (%rax), %xmm19 +# CHECK-NEXT: 3 10 1.00 * vcvttpd2dq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 2 4 1.00 vcvttpd2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 3 10 1.00 * vcvttpd2dqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 3 10 1.00 * vcvttpd2dq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 2 4 1.00 vcvttpd2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 3 10 1.00 * vcvttpd2dqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 3 10 1.00 * vcvttpd2dq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vcvttps2dq %xmm16, %xmm19 +# CHECK-NEXT: 2 9 1.00 * vcvttps2dq (%rax), %xmm19 +# CHECK-NEXT: 2 9 1.00 * vcvttps2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 1 3 1.00 vcvttps2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 9 1.00 * vcvttps2dq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 9 1.00 * vcvttps2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 1 3 1.00 vcvttps2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 9 1.00 * vcvttps2dq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 9 1.00 * vcvttps2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vcvttps2dq %ymm16, %ymm19 +# CHECK-NEXT: 2 10 1.00 * vcvttps2dq (%rax), %ymm19 +# CHECK-NEXT: 2 10 1.00 * vcvttps2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 3 1.00 vcvttps2dq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 10 1.00 * vcvttps2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 10 1.00 * vcvttps2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 3 1.00 vcvttps2dq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 10 1.00 * vcvttps2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 2 10 1.00 * vcvttps2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vcvttpd2udq %ymm16, %xmm19 +# CHECK-NEXT: 3 11 1.00 * vcvttpd2udqy (%rax), %xmm19 +# CHECK-NEXT: 3 11 1.00 * vcvttpd2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 2 4 1.00 vcvttpd2udq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvttpd2udqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvttpd2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 2 4 1.00 vcvttpd2udq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvttpd2udqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvttpd2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 4 1.00 vcvttpd2udq %xmm16, %xmm19 +# CHECK-NEXT: 3 10 1.00 * vcvttpd2udqx (%rax), %xmm19 +# CHECK-NEXT: 3 10 1.00 * vcvttpd2udq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 2 4 1.00 vcvttpd2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 3 10 1.00 * vcvttpd2udqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 3 10 1.00 * vcvttpd2udq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 2 4 1.00 vcvttpd2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 3 10 1.00 * vcvttpd2udqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 3 10 1.00 * vcvttpd2udq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vcvttps2udq %xmm16, %xmm19 +# CHECK-NEXT: 2 9 1.00 * vcvttps2udq (%rax), %xmm19 +# CHECK-NEXT: 2 9 1.00 * vcvttps2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 1 3 1.00 vcvttps2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 9 1.00 * vcvttps2udq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 9 1.00 * vcvttps2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 1 3 1.00 vcvttps2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 9 1.00 * vcvttps2udq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 9 1.00 * vcvttps2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vcvttps2udq %ymm16, %ymm19 +# CHECK-NEXT: 2 10 1.00 * vcvttps2udq (%rax), %ymm19 +# CHECK-NEXT: 2 10 1.00 * vcvttps2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 3 1.00 vcvttps2udq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 10 1.00 * vcvttps2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 10 1.00 * vcvttps2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 3 1.00 vcvttps2udq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 10 1.00 * vcvttps2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 2 10 1.00 * vcvttps2udq (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 1 22 22.00 vdivpd %xmm16, %xmm17, %xmm19 # CHECK-NEXT: 2 28 22.00 * vdivpd (%rax), %xmm17, %xmm19 # CHECK-NEXT: 2 28 22.00 * vdivpd (%rax){1to2}, %xmm17, %xmm19 @@ -2620,7 +3000,7 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - 1935.00 206.00 363.50 32.00 642.50 390.50 390.50 +# CHECK-NEXT: - 1935.00 224.00 525.50 32.00 738.50 450.50 450.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -2780,6 +3160,186 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - - 1.00 - - - - vcvtdq2ps %ymm16, %ymm19 {%k1} {z} # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtdq2ps (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtdq2ps (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2dq %ymm16, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2dqy (%rax), %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2dq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2dqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2dq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2dqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2dq %xmm16, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2dqx (%rax), %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2dq (%rax){1to2}, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2dqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2dq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2dqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2dq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2ps %ymm16, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2psy (%rax), %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2ps (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2ps %ymm16, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2psy (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2ps (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2ps %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2psy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2ps (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2ps %xmm16, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2psx (%rax), %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2ps (%rax){1to2}, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2ps %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2psx (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2ps (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2ps %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2psx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2ps (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2udq %ymm16, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2udqy (%rax), %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2udq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2udqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2udq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2udqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2udq %xmm16, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2udqx (%rax), %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2udq (%rax){1to2}, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2udqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2udq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtpd2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2udqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtpd2udq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - - - vcvtps2dq %xmm16, %xmm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2dq (%rax), %xmm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - - 1.00 - - - - vcvtps2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2dq (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - - - vcvtps2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2dq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - - - vcvtps2dq %ymm16, %ymm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2dq (%rax), %ymm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - - 1.00 - - - - vcvtps2dq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - - - vcvtps2dq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %xmm16, %xmm19 +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %xmm19 +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax){1to2}, %xmm19 +# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %xmm16, %ymm19 +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %ymm19 +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax){1to4}, %ymm19 +# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %xmm16, %ymm19 {%k1} +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtps2pd %xmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtps2pd (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - - - vcvtps2udq %xmm16, %xmm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax), %xmm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - - 1.00 - - - - vcvtps2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - - - vcvtps2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - - - vcvtps2udq %ymm16, %ymm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax), %ymm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - - 1.00 - - - - vcvtps2udq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - - - vcvtps2udq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtps2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvttpd2dq %ymm16, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2dqy (%rax), %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvttpd2dq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2dqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvttpd2dq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2dqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvttpd2dq %xmm16, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2dqx (%rax), %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2dq (%rax){1to2}, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvttpd2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2dqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2dq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvttpd2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2dqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2dq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - - - vcvttps2dq %xmm16, %xmm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2dq (%rax), %xmm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - - 1.00 - - - - vcvttps2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2dq (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - - - vcvttps2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2dq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - - - vcvttps2dq %ymm16, %ymm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2dq (%rax), %ymm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - - 1.00 - - - - vcvttps2dq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - - - vcvttps2dq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvttpd2udq %ymm16, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2udqy (%rax), %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvttpd2udq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2udqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvttpd2udq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2udqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvttpd2udq %xmm16, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2udqx (%rax), %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2udq (%rax){1to2}, %xmm19 +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvttpd2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2udqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2udq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - 1.00 - - vcvttpd2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2udqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvttpd2udq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - - - vcvttps2udq %xmm16, %xmm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2udq (%rax), %xmm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - - 1.00 - - - - vcvttps2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2udq (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - - - vcvttps2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2udq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - - - vcvttps2udq %ymm16, %ymm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2udq (%rax), %ymm19 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - - 1.00 - - - - vcvttps2udq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - - 1.00 - - - - vcvttps2udq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvttps2udq (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: - 22.00 1.00 - - - - - vdivpd %xmm16, %xmm17, %xmm19 # CHECK-NEXT: - 22.00 1.00 - - - 0.50 0.50 vdivpd (%rax), %xmm17, %xmm19 # CHECK-NEXT: - 22.00 1.00 - - - 0.50 0.50 vdivpd (%rax){1to2}, %xmm17, %xmm19 diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-sse1.s index 39a99e8a12408..291b8cd43cb4e 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-sse1.s @@ -218,14 +218,14 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 3 10 1.00 * cvtsi2ssq (%rax), %xmm2 # CHECK-NEXT: 2 5 1.00 cvtss2si %xmm0, %ecx # CHECK-NEXT: 2 5 1.00 cvtss2si %xmm0, %rcx -# CHECK-NEXT: 3 9 1.00 * cvtss2si (%rax), %ecx -# CHECK-NEXT: 3 9 1.00 * cvtss2si (%rax), %rcx +# CHECK-NEXT: 3 10 1.00 * cvtss2si (%rax), %ecx +# CHECK-NEXT: 3 10 1.00 * cvtss2si (%rax), %rcx # CHECK-NEXT: 1 3 1.00 cvttps2pi %xmm0, %mm2 # CHECK-NEXT: 2 9 1.00 * cvttps2pi (%rax), %mm2 # CHECK-NEXT: 2 5 1.00 cvttss2si %xmm0, %ecx # CHECK-NEXT: 2 5 1.00 cvttss2si %xmm0, %rcx -# CHECK-NEXT: 3 9 1.00 * cvttss2si (%rax), %ecx -# CHECK-NEXT: 3 9 1.00 * cvttss2si (%rax), %rcx +# CHECK-NEXT: 3 10 1.00 * cvttss2si (%rax), %ecx +# CHECK-NEXT: 3 10 1.00 * cvttss2si (%rax), %rcx # CHECK-NEXT: 1 14 14.00 divps %xmm0, %xmm2 # CHECK-NEXT: 2 20 14.00 * divps (%rax), %xmm2 # CHECK-NEXT: 1 14 14.00 divss %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-sse2.s index 7a8d4b03a9356..904454a547077 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-sse2.s @@ -440,8 +440,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 7 1.00 * cvtps2pd (%rax), %xmm2 # CHECK-NEXT: 2 5 1.00 cvtsd2si %xmm0, %ecx # CHECK-NEXT: 2 5 1.00 cvtsd2si %xmm0, %rcx -# CHECK-NEXT: 3 9 1.00 * cvtsd2si (%rax), %ecx -# CHECK-NEXT: 3 9 1.00 * cvtsd2si (%rax), %rcx +# CHECK-NEXT: 3 10 1.00 * cvtsd2si (%rax), %ecx +# CHECK-NEXT: 3 10 1.00 * cvtsd2si (%rax), %rcx # CHECK-NEXT: 2 4 1.00 cvtsd2ss %xmm0, %xmm2 # CHECK-NEXT: 3 10 1.00 * cvtsd2ss (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvtsi2sd %ecx, %xmm2 @@ -458,8 +458,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 9 1.00 * cvttps2dq (%rax), %xmm2 # CHECK-NEXT: 2 5 1.00 cvttsd2si %xmm0, %ecx # CHECK-NEXT: 2 5 1.00 cvttsd2si %xmm0, %rcx -# CHECK-NEXT: 3 9 1.00 * cvttsd2si (%rax), %ecx -# CHECK-NEXT: 3 9 1.00 * cvttsd2si (%rax), %rcx +# CHECK-NEXT: 3 10 1.00 * cvttsd2si (%rax), %ecx +# CHECK-NEXT: 3 10 1.00 * cvttsd2si (%rax), %rcx # CHECK-NEXT: 1 22 22.00 divpd %xmm0, %xmm2 # CHECK-NEXT: 2 28 22.00 * divpd (%rax), %xmm2 # CHECK-NEXT: 1 22 22.00 divsd %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s index 997ea6156a0ae..ea7d251ffccef 100644 --- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s @@ -1123,13 +1123,13 @@ vzeroupper # CHECK-NEXT: 1 3 1.00 vcvtdq2ps %ymm0, %ymm2 # CHECK-NEXT: 2 10 1.00 * vcvtdq2ps (%rax), %ymm2 # CHECK-NEXT: 2 4 1.00 vcvtpd2dq %xmm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * vcvtpd2dqx (%rax), %xmm2 +# CHECK-NEXT: 3 10 1.00 * vcvtpd2dqx (%rax), %xmm2 # CHECK-NEXT: 2 6 1.00 vcvtpd2dq %ymm0, %xmm2 # CHECK-NEXT: 2 8 1.00 * vcvtpd2dqy (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 vcvtpd2ps %xmm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * vcvtpd2psx (%rax), %xmm2 +# CHECK-NEXT: 3 10 1.00 * vcvtpd2psx (%rax), %xmm2 # CHECK-NEXT: 2 6 1.00 vcvtpd2ps %ymm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * vcvtpd2psy (%rax), %xmm2 +# CHECK-NEXT: 3 12 1.00 * vcvtpd2psy (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 vcvtps2dq %xmm0, %xmm2 # CHECK-NEXT: 2 9 1.00 * vcvtps2dq (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 vcvtps2dq %ymm0, %ymm2 @@ -1159,7 +1159,7 @@ vzeroupper # CHECK-NEXT: 3 9 1.00 * vcvtss2si (%rax), %ecx # CHECK-NEXT: 3 9 1.00 * vcvtss2si (%rax), %rcx # CHECK-NEXT: 2 4 1.00 vcvttpd2dq %xmm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * vcvttpd2dqx (%rax), %xmm2 +# CHECK-NEXT: 3 10 1.00 * vcvttpd2dqx (%rax), %xmm2 # CHECK-NEXT: 2 6 1.00 vcvttpd2dq %ymm0, %xmm2 # CHECK-NEXT: 2 8 1.00 * vcvttpd2dqy (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 vcvttps2dq %xmm0, %xmm2 @@ -1269,7 +1269,7 @@ vzeroupper # CHECK-NEXT: 1 1 1.00 vmovd %xmm0, %ecx # CHECK-NEXT: 2 1 1.00 * vmovd %xmm0, (%rax) # CHECK-NEXT: 1 1 1.00 vmovddup %xmm0, %xmm2 -# CHECK-NEXT: 1 5 0.50 * vmovddup (%rax), %xmm2 +# CHECK-NEXT: 1 6 0.50 * vmovddup (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 vmovddup %ymm0, %ymm2 # CHECK-NEXT: 1 7 0.50 * vmovddup (%rax), %ymm2 # CHECK-NEXT: 1 1 0.33 vmovdqa %xmm0, %xmm2 @@ -1736,7 +1736,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 336.00 214.58 236.58 176.17 176.17 38.00 429.58 2.25 12.67 +# CHECK-NEXT: - 336.00 214.58 236.58 176.17 176.17 38.00 433.58 2.25 12.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -1833,13 +1833,13 @@ vzeroupper # CHECK-NEXT: - - - 1.00 - - - - - - vcvtdq2ps %ymm0, %ymm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtdq2ps (%rax), %ymm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtpd2dq %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2dqx (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtpd2dqx (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtpd2dq %ymm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2dqy (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtpd2ps %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2psx (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtpd2psx (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtpd2ps %ymm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2psy (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtpd2psy (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - vcvtps2dq %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtps2dq (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - vcvtps2dq %ymm0, %ymm2 @@ -1869,7 +1869,7 @@ vzeroupper # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvtss2si (%rax), %ecx # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvtss2si (%rax), %rcx # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvttpd2dq %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvttpd2dqx (%rax), %xmm2 +# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvttpd2dqx (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvttpd2dq %ymm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvttpd2dqy (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - - - vcvttps2dq %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse1.s index 17203584ea3fe..a79a47724f603 100644 --- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse1.s @@ -211,7 +211,7 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 cvtpi2ps %mm0, %xmm2 # CHECK-NEXT: 2 8 1.00 * cvtpi2ps (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvtps2pi %xmm0, %mm2 -# CHECK-NEXT: 2 8 1.00 * cvtps2pi (%rax), %mm2 +# CHECK-NEXT: 2 9 1.00 * cvtps2pi (%rax), %mm2 # CHECK-NEXT: 2 4 1.00 cvtsi2ss %ecx, %xmm2 # CHECK-NEXT: 3 5 2.00 cvtsi2ss %rcx, %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtsi2ssl (%rax), %xmm2 @@ -221,7 +221,7 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 3 9 1.00 * cvtss2si (%rax), %ecx # CHECK-NEXT: 3 9 1.00 * cvtss2si (%rax), %rcx # CHECK-NEXT: 2 4 1.00 cvttps2pi %xmm0, %mm2 -# CHECK-NEXT: 2 8 1.00 * cvttps2pi (%rax), %mm2 +# CHECK-NEXT: 2 9 1.00 * cvttps2pi (%rax), %mm2 # CHECK-NEXT: 2 4 1.00 cvttss2si %xmm0, %ecx # CHECK-NEXT: 2 4 1.00 cvttss2si %xmm0, %rcx # CHECK-NEXT: 3 9 1.00 * cvttss2si (%rax), %ecx diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse3.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse3.s index 6832defc50e59..7085718405a44 100644 --- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse3.s +++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse3.s @@ -58,7 +58,7 @@ mwait # CHECK-NEXT: 1 6 0.50 * lddqu (%rax), %xmm2 # CHECK-NEXT: 1 100 0.25 U monitor # CHECK-NEXT: 1 1 1.00 movddup %xmm0, %xmm2 -# CHECK-NEXT: 1 5 0.50 * movddup (%rax), %xmm2 +# CHECK-NEXT: 1 6 0.50 * movddup (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 movshdup %xmm0, %xmm2 # CHECK-NEXT: 1 6 0.50 * movshdup (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 movsldup %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s index 2219cf0ebfc88..383ddac8d16d0 100644 --- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s @@ -1127,9 +1127,9 @@ vzeroupper # CHECK-NEXT: 2 7 1.00 vcvtpd2dq %ymm0, %xmm2 # CHECK-NEXT: 2 8 0.50 * vcvtpd2dqy (%rax), %xmm2 # CHECK-NEXT: 2 5 1.00 vcvtpd2ps %xmm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * vcvtpd2psx (%rax), %xmm2 +# CHECK-NEXT: 3 11 1.00 * vcvtpd2psx (%rax), %xmm2 # CHECK-NEXT: 2 7 1.00 vcvtpd2ps %ymm0, %xmm2 -# CHECK-NEXT: 3 8 1.00 * vcvtpd2psy (%rax), %xmm2 +# CHECK-NEXT: 3 14 1.00 * vcvtpd2psy (%rax), %xmm2 # CHECK-NEXT: 1 4 0.50 vcvtps2dq %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * vcvtps2dq (%rax), %xmm2 # CHECK-NEXT: 1 4 0.50 vcvtps2dq %ymm0, %ymm2 @@ -1269,7 +1269,7 @@ vzeroupper # CHECK-NEXT: 1 2 1.00 vmovd %xmm0, %ecx # CHECK-NEXT: 2 1 1.00 * vmovd %xmm0, (%rax) # CHECK-NEXT: 1 1 1.00 vmovddup %xmm0, %xmm2 -# CHECK-NEXT: 1 5 0.50 * vmovddup (%rax), %xmm2 +# CHECK-NEXT: 1 6 0.50 * vmovddup (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 vmovddup %ymm0, %ymm2 # CHECK-NEXT: 1 7 0.50 * vmovddup (%rax), %ymm2 # CHECK-NEXT: 1 1 0.33 vmovdqa %xmm0, %xmm2 @@ -1738,7 +1738,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: - 126.00 318.08 228.08 173.17 173.17 34.00 305.58 6.25 12.67 - - +# CHECK-NEXT: - 126.00 319.25 228.25 173.17 173.17 34.00 305.25 6.25 12.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -1838,9 +1838,9 @@ vzeroupper # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2dqx (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtpd2dq %ymm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2dqy (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtpd2ps %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - - - vcvtpd2psx (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtpd2ps %ymm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - - - vcvtpd2ps %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - - - vcvtpd2psx (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - - - vcvtpd2ps %ymm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - - - vcvtpd2psy (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvtps2dq %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2dq (%rax), %xmm2 @@ -1854,8 +1854,8 @@ vzeroupper # CHECK-NEXT: - - 1.00 1.00 - - - - - - - - vcvtsd2si %xmm0, %rcx # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - - - vcvtsd2si (%rax), %ecx # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - - - vcvtsd2si (%rax), %rcx -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtsd2ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvtsd2ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - - - vcvtsd2ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - - - vcvtsd2ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtsi2sd %ecx, %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtsi2sd %rcx, %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - - - vcvtsi2sdl (%rax), %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s index 13327794d2b4e..eb370576f2c13 100644 --- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s +++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s @@ -127,6 +127,56 @@ vcvtdq2ps %zmm16, %zmm19 {z}{k1} vcvtdq2ps (%rax), %zmm19 {z}{k1} vcvtdq2ps (%rax){1to16}, %zmm19 {z}{k1} +vcvtpd2dq %zmm16, %ymm19 +vcvtpd2dq (%rax), %ymm19 +vcvtpd2dq (%rax){1to8}, %ymm19 +vcvtpd2dq %zmm16, %ymm19 {k1} +vcvtpd2dq (%rax), %ymm19 {k1} +vcvtpd2dq (%rax){1to8}, %ymm19 {k1} +vcvtpd2dq %zmm16, %ymm19 {z}{k1} +vcvtpd2dq (%rax), %ymm19 {z}{k1} +vcvtpd2dq (%rax){1to8}, %ymm19 {z}{k1} + +vcvtpd2udq %zmm16, %ymm19 +vcvtpd2udq (%rax), %ymm19 +vcvtpd2udq (%rax){1to8}, %ymm19 +vcvtpd2udq %zmm16, %ymm19 {k1} +vcvtpd2udq (%rax), %ymm19 {k1} +vcvtpd2udq (%rax){1to8}, %ymm19 {k1} +vcvtpd2udq %zmm16, %ymm19 {z}{k1} +vcvtpd2udq (%rax), %ymm19 {z}{k1} +vcvtpd2udq (%rax){1to8}, %ymm19 {z}{k1} + +vcvttpd2dq %zmm16, %ymm19 +vcvttpd2dq (%rax), %ymm19 +vcvttpd2dq (%rax){1to8}, %ymm19 +vcvttpd2dq %zmm16, %ymm19 {k1} +vcvttpd2dq (%rax), %ymm19 {k1} +vcvttpd2dq (%rax){1to8}, %ymm19 {k1} +vcvttpd2dq %zmm16, %ymm19 {z}{k1} +vcvttpd2dq (%rax), %ymm19 {z}{k1} +vcvttpd2dq (%rax){1to8}, %ymm19 {z}{k1} + +vcvttpd2udq %zmm16, %ymm19 +vcvttpd2udq (%rax), %ymm19 +vcvttpd2udq (%rax){1to8}, %ymm19 +vcvttpd2udq %zmm16, %ymm19 {k1} +vcvttpd2udq (%rax), %ymm19 {k1} +vcvttpd2udq (%rax){1to8}, %ymm19 {k1} +vcvttpd2udq %zmm16, %ymm19 {z}{k1} +vcvttpd2udq (%rax), %ymm19 {z}{k1} +vcvttpd2udq (%rax){1to8}, %ymm19 {z}{k1} + +vcvtpd2ps %zmm16, %ymm19 +vcvtpd2ps (%rax), %ymm19 +vcvtpd2ps (%rax){1to8}, %ymm19 +vcvtpd2ps %zmm16, %ymm19 {k1} +vcvtpd2ps (%rax), %ymm19 {k1} +vcvtpd2ps (%rax){1to8}, %ymm19 {k1} +vcvtpd2ps %zmm16, %ymm19 {z}{k1} +vcvtpd2ps (%rax), %ymm19 {z}{k1} +vcvtpd2ps (%rax){1to8}, %ymm19 {z}{k1} + vcvtps2dq %zmm16, %zmm19 vcvtps2dq (%rax), %zmm19 vcvtps2dq (%rax){1to16}, %zmm19 @@ -147,6 +197,56 @@ vcvttps2dq %zmm16, %zmm19 {z}{k1} vcvttps2dq (%rax), %zmm19 {z}{k1} vcvttps2dq (%rax){1to16}, %zmm19 {z}{k1} +vcvtps2pd %ymm16, %zmm19 +vcvtps2pd (%rax), %zmm19 +vcvtps2pd (%rax){1to8}, %zmm19 +vcvtps2pd %ymm16, %zmm19 {k1} +vcvtps2pd (%rax), %zmm19 {k1} +vcvtps2pd (%rax){1to8}, %zmm19 {k1} +vcvtps2pd %ymm16, %zmm19 {z}{k1} +vcvtps2pd (%rax), %zmm19 {z}{k1} +vcvtps2pd (%rax){1to8}, %zmm19 {z}{k1} + +vcvtps2udq %zmm16, %zmm19 +vcvtps2udq (%rax), %zmm19 +vcvtps2udq (%rax){1to16}, %zmm19 +vcvtps2udq %zmm16, %zmm19 {k1} +vcvtps2udq (%rax), %zmm19 {k1} +vcvtps2udq (%rax){1to16}, %zmm19 {k1} +vcvtps2udq %zmm16, %zmm19 {z}{k1} +vcvtps2udq (%rax), %zmm19 {z}{k1} +vcvtps2udq (%rax){1to16}, %zmm19 {z}{k1} + +vcvttps2udq %zmm16, %zmm19 +vcvttps2udq (%rax), %zmm19 +vcvttps2udq (%rax){1to16}, %zmm19 +vcvttps2udq %zmm16, %zmm19 {k1} +vcvttps2udq (%rax), %zmm19 {k1} +vcvttps2udq (%rax){1to16}, %zmm19 {k1} +vcvttps2udq %zmm16, %zmm19 {z}{k1} +vcvttps2udq (%rax), %zmm19 {z}{k1} +vcvttps2udq (%rax){1to16}, %zmm19 {z}{k1} + +vcvtudq2pd %ymm16, %zmm19 +vcvtudq2pd (%rax), %zmm19 +vcvtudq2pd (%rax){1to8}, %zmm19 +vcvtudq2pd %ymm16, %zmm19 {k1} +vcvtudq2pd (%rax), %zmm19 {k1} +vcvtudq2pd (%rax){1to8}, %zmm19 {k1} +vcvtudq2pd %ymm16, %zmm19 {z}{k1} +vcvtudq2pd (%rax), %zmm19 {z}{k1} +vcvtudq2pd (%rax){1to8}, %zmm19 {z}{k1} + +vcvtudq2ps %zmm16, %zmm19 +vcvtudq2ps (%rax), %zmm19 +vcvtudq2ps (%rax){1to16}, %zmm19 +vcvtudq2ps %zmm16, %zmm19 {k1} +vcvtudq2ps (%rax), %zmm19 {k1} +vcvtudq2ps (%rax){1to16}, %zmm19 {k1} +vcvtudq2ps %zmm16, %zmm19 {z}{k1} +vcvtudq2ps (%rax), %zmm19 {z}{k1} +vcvtudq2ps (%rax){1to16}, %zmm19 {z}{k1} + vdivpd %zmm16, %zmm17, %zmm19 vdivpd (%rax), %zmm17, %zmm19 vdivpd (%rax){1to8}, %zmm17, %zmm19 @@ -992,6 +1092,51 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1 4 0.50 vcvtdq2ps %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vcvtdq2ps (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vcvtdq2ps (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvtpd2dq %zmm16, %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvtpd2dq (%rax), %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvtpd2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 3 0.50 vcvtpd2dq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 3 0.50 vcvtpd2dq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvtpd2udq %zmm16, %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvtpd2udq (%rax), %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvtpd2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 3 0.50 vcvtpd2udq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 3 0.50 vcvtpd2udq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvttpd2dq %zmm16, %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvttpd2dq (%rax), %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvttpd2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 3 0.50 vcvttpd2dq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvttpd2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvttpd2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 3 0.50 vcvttpd2dq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvttpd2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvttpd2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvttpd2udq %zmm16, %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvttpd2udq (%rax), %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvttpd2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 3 0.50 vcvttpd2udq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvttpd2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvttpd2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 3 0.50 vcvttpd2udq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvttpd2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvttpd2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvtpd2ps %zmm16, %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvtpd2ps (%rax), %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvtpd2ps (%rax){1to8}, %ymm19 +# CHECK-NEXT: 2 7 1.00 vcvtpd2ps %zmm16, %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2ps (%rax), %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2ps (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 2 7 1.00 vcvtpd2ps %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2ps (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2ps (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 1 4 0.50 vcvtps2dq %zmm16, %zmm19 # CHECK-NEXT: 2 11 0.50 * vcvtps2dq (%rax), %zmm19 # CHECK-NEXT: 2 11 0.50 * vcvtps2dq (%rax){1to16}, %zmm19 @@ -1010,6 +1155,51 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1 3 0.50 vcvttps2dq %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vcvttps2dq (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vcvttps2dq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvtps2pd %ymm16, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax), %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: 2 3 1.00 vcvtps2pd %ymm16, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 2 3 1.00 vcvtps2pd %ymm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvtps2udq %zmm16, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax), %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax){1to16}, %zmm19 +# CHECK-NEXT: 1 3 0.50 vcvtps2udq %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 1 3 0.50 vcvtps2udq %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvttps2udq %zmm16, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax), %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax){1to16}, %zmm19 +# CHECK-NEXT: 1 3 0.50 vcvttps2udq %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 1 3 0.50 vcvttps2udq %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvtudq2pd %ymm16, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvtudq2pd (%rax), %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvtudq2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: 1 4 0.50 vcvtudq2pd %ymm16, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtudq2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtudq2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvtudq2pd %ymm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtudq2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtudq2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvtudq2ps %zmm16, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvtudq2ps (%rax), %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvtudq2ps (%rax){1to16}, %zmm19 +# CHECK-NEXT: 1 4 0.50 vcvtudq2ps %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtudq2ps (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtudq2ps (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvtudq2ps %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtudq2ps (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtudq2ps (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: 3 23 16.00 vdivpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: 4 30 16.00 * vdivpd (%rax), %zmm17, %zmm19 # CHECK-NEXT: 4 30 16.00 * vdivpd (%rax){1to8}, %zmm17, %zmm19 @@ -1671,7 +1861,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: - 612.00 240.67 49.67 278.83 278.83 16.00 559.67 2.00 5.33 - - +# CHECK-NEXT: - 612.00 282.17 67.67 308.83 308.83 16.00 631.17 2.00 5.33 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -1783,6 +1973,51 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvtdq2ps %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtdq2ps (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtdq2ps (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 1.50 - - - - vcvtpd2dq %zmm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvtpd2dq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvtpd2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvtpd2dq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvtpd2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvtpd2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvtpd2dq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvtpd2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvtpd2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 1.50 - - - - vcvtpd2udq %zmm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvtpd2udq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvtpd2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvtpd2udq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvtpd2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvtpd2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvtpd2udq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvtpd2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvtpd2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 1.50 - - - - vcvttpd2dq %zmm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvttpd2dq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvttpd2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvttpd2dq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvttpd2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvttpd2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvttpd2dq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvttpd2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvttpd2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 1.50 - - - - vcvttpd2udq %zmm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvttpd2udq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvttpd2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvttpd2udq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvttpd2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvttpd2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvttpd2udq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvttpd2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - vcvttpd2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - - - - 1.00 - - - - vcvtpd2ps %zmm16, %ymm19 +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - - - vcvtpd2ps (%rax), %ymm19 +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - - - vcvtpd2ps (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - 1.00 - - - - 1.00 - - - - vcvtpd2ps %zmm16, %ymm19 {%k1} +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - - - vcvtpd2ps (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - - - vcvtpd2ps (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - 1.00 - - - - 1.00 - - - - vcvtpd2ps %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - - - vcvtpd2ps (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - - - vcvtpd2ps (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvtps2dq %zmm16, %zmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2dq (%rax), %zmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2dq (%rax){1to16}, %zmm19 @@ -1801,6 +2036,51 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvttps2dq %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2dq (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2dq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 1.50 - - - - vcvtps2pd %ymm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2pd (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: - - 1.00 - - - - 1.00 - - - - vcvtps2pd %ymm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: - - 1.00 - - - - 1.00 - - - - vcvtps2pd %ymm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvtps2udq %zmm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2udq (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2udq (%rax){1to16}, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvtps2udq %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2udq (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2udq (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvtps2udq %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2udq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2udq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvttps2udq %zmm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2udq (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2udq (%rax){1to16}, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvttps2udq %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2udq (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2udq (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvttps2udq %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2udq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2udq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 1.50 - - - - vcvtudq2pd %ymm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtudq2pd (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtudq2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvtudq2pd %ymm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtudq2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtudq2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvtudq2pd %ymm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtudq2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtudq2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvtudq2ps %zmm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtudq2ps (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtudq2ps (%rax){1to16}, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvtudq2ps %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtudq2ps (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtudq2ps (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - - - vcvtudq2ps %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtudq2ps (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtudq2ps (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: - 16.00 2.00 - - - - 1.00 - - - - vdivpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: - 16.00 2.00 - 0.50 0.50 - 1.00 - - - - vdivpd (%rax), %zmm17, %zmm19 # CHECK-NEXT: - 16.00 2.00 - 0.50 0.50 - 1.00 - - - - vdivpd (%rax){1to8}, %zmm17, %zmm19 diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512vl.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512vl.s index 39c8b9921dbe7..8ee8f9e6c72f3 100644 --- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512vl.s +++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512vl.s @@ -178,6 +178,206 @@ vcvtdq2ps %ymm16, %ymm19 {z}{k1} vcvtdq2ps (%rax), %ymm19 {z}{k1} vcvtdq2ps (%rax){1to8}, %ymm19 {z}{k1} +vcvtpd2dqy %ymm16, %xmm19 +vcvtpd2dqy (%rax), %xmm19 +vcvtpd2dqy (%rax){1to4}, %xmm19 +vcvtpd2dqy %ymm16, %xmm19 {k1} +vcvtpd2dqy (%rax), %xmm19 {k1} +vcvtpd2dqy (%rax){1to4}, %xmm19 {k1} +vcvtpd2dqy %ymm16, %xmm19 {z}{k1} +vcvtpd2dqy (%rax), %xmm19 {z}{k1} +vcvtpd2dqy (%rax){1to4}, %xmm19 {z}{k1} + +vcvtpd2dqx %xmm16, %xmm19 +vcvtpd2dqx (%rax), %xmm19 +vcvtpd2dqx (%rax){1to2}, %xmm19 +vcvtpd2dqx %xmm16, %xmm19 {k1} +vcvtpd2dqx (%rax), %xmm19 {k1} +vcvtpd2dqx (%rax){1to2},%xmm19 {k1} +vcvtpd2dqx %xmm16, %xmm19 {z}{k1} +vcvtpd2dqx (%rax), %xmm19 {z}{k1} +vcvtpd2dqx (%rax){1to2}, %xmm19 {z}{k1} + +vcvtpd2psy %ymm16, %xmm19 +vcvtpd2psy (%rax), %xmm19 +vcvtpd2psy (%rax){1to4}, %xmm19 +vcvtpd2psy %ymm16, %xmm19 {k1} +vcvtpd2psy (%rax), %xmm19 {k1} +vcvtpd2psy (%rax){1to4}, %xmm19 {k1} +vcvtpd2psy %ymm16, %xmm19 {z}{k1} +vcvtpd2psy (%rax), %xmm19 {z}{k1} +vcvtpd2psy (%rax){1to4}, %xmm19 {z}{k1} + +vcvtpd2psx %xmm16, %xmm19 +vcvtpd2psx (%rax), %xmm19 +vcvtpd2psx (%rax){1to2}, %xmm19 +vcvtpd2psx %xmm16, %xmm19 {k1} +vcvtpd2psx (%rax), %xmm19 {k1} +vcvtpd2psx (%rax){1to2},%xmm19 {k1} +vcvtpd2psx %xmm16, %xmm19 {z}{k1} +vcvtpd2psx (%rax), %xmm19 {z}{k1} +vcvtpd2psx (%rax){1to2}, %xmm19 {z}{k1} + +vcvtpd2udqy %ymm16, %xmm19 +vcvtpd2udqy (%rax), %xmm19 +vcvtpd2udqy (%rax){1to4}, %xmm19 +vcvtpd2udqy %ymm16, %xmm19 {k1} +vcvtpd2udqy (%rax), %xmm19 {k1} +vcvtpd2udqy (%rax){1to4}, %xmm19 {k1} +vcvtpd2udqy %ymm16, %xmm19 {z}{k1} +vcvtpd2udqy (%rax), %xmm19 {z}{k1} +vcvtpd2udqy (%rax){1to4}, %xmm19 {z}{k1} + +vcvtpd2udqx %xmm16, %xmm19 +vcvtpd2udqx (%rax), %xmm19 +vcvtpd2udqx (%rax){1to2}, %xmm19 +vcvtpd2udqx %xmm16, %xmm19 {k1} +vcvtpd2udqx (%rax), %xmm19 {k1} +vcvtpd2udqx (%rax){1to2},%xmm19 {k1} +vcvtpd2udqx %xmm16, %xmm19 {z}{k1} +vcvtpd2udqx (%rax), %xmm19 {z}{k1} +vcvtpd2udqx (%rax){1to2}, %xmm19 {z}{k1} + +vcvtps2dq %xmm16, %xmm19 +vcvtps2dq (%rax), %xmm19 +vcvtps2dq (%rax){1to4}, %xmm19 +vcvtps2dq %xmm16, %xmm19 {k1} +vcvtps2dq (%rax), %xmm19 {k1} +vcvtps2dq (%rax){1to4},%xmm19 {k1} +vcvtps2dq %xmm16, %xmm19 {z}{k1} +vcvtps2dq (%rax), %xmm19 {z}{k1} +vcvtps2dq (%rax){1to4}, %xmm19 {z}{k1} + +vcvtps2dq %ymm16, %ymm19 +vcvtps2dq (%rax), %ymm19 +vcvtps2dq (%rax){1to8}, %ymm19 +vcvtps2dq %ymm16,%ymm19 {k1} +vcvtps2dq (%rax),%ymm19 {k1} +vcvtps2dq (%rax){1to8}, %ymm19 {k1} +vcvtps2dq %ymm16, %ymm19 {z}{k1} +vcvtps2dq (%rax), %ymm19 {z}{k1} +vcvtps2dq (%rax){1to8}, %ymm19 {z}{k1} + +vcvtps2pd %xmm16, %xmm19 +vcvtps2pd (%rax), %xmm19 +vcvtps2pd (%rax){1to2}, %xmm19 +vcvtps2pd %xmm16, %xmm19 {k1} +vcvtps2pd (%rax), %xmm19 {k1} +vcvtps2pd (%rax){1to2},%xmm19 {k1} +vcvtps2pd %xmm16, %xmm19 {z}{k1} +vcvtps2pd (%rax), %xmm19 {z}{k1} +vcvtps2pd (%rax){1to2}, %xmm19 {z}{k1} + +vcvtps2pd %xmm16, %ymm19 +vcvtps2pd (%rax), %ymm19 +vcvtps2pd (%rax){1to4}, %ymm19 +vcvtps2pd %xmm16,%ymm19 {k1} +vcvtps2pd (%rax),%ymm19 {k1} +vcvtps2pd (%rax){1to4}, %ymm19 {k1} +vcvtps2pd %xmm16, %ymm19 {z}{k1} +vcvtps2pd (%rax), %ymm19 {z}{k1} +vcvtps2pd (%rax){1to4}, %ymm19 {z}{k1} + +vcvtps2udq %xmm16, %xmm19 +vcvtps2udq (%rax), %xmm19 +vcvtps2udq (%rax){1to4}, %xmm19 +vcvtps2udq %xmm16, %xmm19 {k1} +vcvtps2udq (%rax), %xmm19 {k1} +vcvtps2udq (%rax){1to4},%xmm19 {k1} +vcvtps2udq %xmm16, %xmm19 {z}{k1} +vcvtps2udq (%rax), %xmm19 {z}{k1} +vcvtps2udq (%rax){1to4}, %xmm19 {z}{k1} + +vcvtps2udq %ymm16, %ymm19 +vcvtps2udq (%rax), %ymm19 +vcvtps2udq (%rax){1to8}, %ymm19 +vcvtps2udq %ymm16,%ymm19 {k1} +vcvtps2udq (%rax),%ymm19 {k1} +vcvtps2udq (%rax){1to8}, %ymm19 {k1} +vcvtps2udq %ymm16, %ymm19 {z}{k1} +vcvtps2udq (%rax), %ymm19 {z}{k1} +vcvtps2udq (%rax){1to8}, %ymm19 {z}{k1} + +vcvttpd2dqy %ymm16, %xmm19 +vcvttpd2dqy (%rax), %xmm19 +vcvttpd2dqy (%rax){1to4}, %xmm19 +vcvttpd2dqy %ymm16, %xmm19 {k1} +vcvttpd2dqy (%rax), %xmm19 {k1} +vcvttpd2dqy (%rax){1to4}, %xmm19 {k1} +vcvttpd2dqy %ymm16, %xmm19 {z}{k1} +vcvttpd2dqy (%rax), %xmm19 {z}{k1} +vcvttpd2dqy (%rax){1to4}, %xmm19 {z}{k1} + +vcvttpd2dqx %xmm16, %xmm19 +vcvttpd2dqx (%rax), %xmm19 +vcvttpd2dqx (%rax){1to2}, %xmm19 +vcvttpd2dqx %xmm16, %xmm19 {k1} +vcvttpd2dqx (%rax), %xmm19 {k1} +vcvttpd2dqx (%rax){1to2},%xmm19 {k1} +vcvttpd2dqx %xmm16, %xmm19 {z}{k1} +vcvttpd2dqx (%rax), %xmm19 {z}{k1} +vcvttpd2dqx (%rax){1to2}, %xmm19 {z}{k1} + +vcvttps2dq %xmm16, %xmm19 +vcvttps2dq (%rax), %xmm19 +vcvttps2dq (%rax){1to4}, %xmm19 +vcvttps2dq %xmm16, %xmm19 {k1} +vcvttps2dq (%rax), %xmm19 {k1} +vcvttps2dq (%rax){1to4},%xmm19 {k1} +vcvttps2dq %xmm16, %xmm19 {z}{k1} +vcvttps2dq (%rax), %xmm19 {z}{k1} +vcvttps2dq (%rax){1to4}, %xmm19 {z}{k1} + +vcvttps2dq %ymm16, %ymm19 +vcvttps2dq (%rax), %ymm19 +vcvttps2dq (%rax){1to8}, %ymm19 +vcvttps2dq %ymm16,%ymm19 {k1} +vcvttps2dq (%rax),%ymm19 {k1} +vcvttps2dq (%rax){1to8}, %ymm19 {k1} +vcvttps2dq %ymm16, %ymm19 {z}{k1} +vcvttps2dq (%rax), %ymm19 {z}{k1} +vcvttps2dq (%rax){1to8}, %ymm19 {z}{k1} + +vcvttpd2udqy %ymm16, %xmm19 +vcvttpd2udqy (%rax), %xmm19 +vcvttpd2udqy (%rax){1to4}, %xmm19 +vcvttpd2udqy %ymm16, %xmm19 {k1} +vcvttpd2udqy (%rax), %xmm19 {k1} +vcvttpd2udqy (%rax){1to4}, %xmm19 {k1} +vcvttpd2udqy %ymm16, %xmm19 {z}{k1} +vcvttpd2udqy (%rax), %xmm19 {z}{k1} +vcvttpd2udqy (%rax){1to4}, %xmm19 {z}{k1} + +vcvttpd2udqx %xmm16, %xmm19 +vcvttpd2udqx (%rax), %xmm19 +vcvttpd2udqx (%rax){1to2}, %xmm19 +vcvttpd2udqx %xmm16, %xmm19 {k1} +vcvttpd2udqx (%rax), %xmm19 {k1} +vcvttpd2udqx (%rax){1to2},%xmm19 {k1} +vcvttpd2udqx %xmm16, %xmm19 {z}{k1} +vcvttpd2udqx (%rax), %xmm19 {z}{k1} +vcvttpd2udqx (%rax){1to2}, %xmm19 {z}{k1} + +vcvttps2udq %xmm16, %xmm19 +vcvttps2udq (%rax), %xmm19 +vcvttps2udq (%rax){1to4}, %xmm19 +vcvttps2udq %xmm16, %xmm19 {k1} +vcvttps2udq (%rax), %xmm19 {k1} +vcvttps2udq (%rax){1to4},%xmm19 {k1} +vcvttps2udq %xmm16, %xmm19 {z}{k1} +vcvttps2udq (%rax), %xmm19 {z}{k1} +vcvttps2udq (%rax){1to4}, %xmm19 {z}{k1} + +vcvttps2udq %ymm16, %ymm19 +vcvttps2udq (%rax), %ymm19 +vcvttps2udq (%rax){1to8}, %ymm19 +vcvttps2udq %ymm16,%ymm19 {k1} +vcvttps2udq (%rax),%ymm19 {k1} +vcvttps2udq (%rax){1to8}, %ymm19 {k1} +vcvttps2udq %ymm16, %ymm19 {z}{k1} +vcvttps2udq (%rax), %ymm19 {z}{k1} +vcvttps2udq (%rax){1to8}, %ymm19 {z}{k1} + vdivpd %xmm16, %xmm17, %xmm19 vdivpd (%rax), %xmm17, %xmm19 vdivpd (%rax){1to2}, %xmm17, %xmm19 @@ -1545,6 +1745,186 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 1 4 0.50 vcvtdq2ps %ymm16, %ymm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vcvtdq2ps (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vcvtdq2ps (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvtpd2dq %ymm16, %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dqy (%rax), %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 2 7 1.00 vcvtpd2dq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 2 7 1.00 vcvtpd2dq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 5 1.00 vcvtpd2dq %xmm16, %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dqx (%rax), %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 2 5 1.00 vcvtpd2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 2 5 1.00 vcvtpd2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvtpd2ps %ymm16, %xmm19 +# CHECK-NEXT: 3 14 1.00 * vcvtpd2psy (%rax), %xmm19 +# CHECK-NEXT: 3 14 1.00 * vcvtpd2ps (%rax){1to4}, %xmm19 +# CHECK-NEXT: 2 7 1.00 vcvtpd2ps %ymm16, %xmm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2psy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2ps (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 2 7 1.00 vcvtpd2ps %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2psy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2ps (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 5 1.00 vcvtpd2ps %xmm16, %xmm19 +# CHECK-NEXT: 3 11 1.00 * vcvtpd2psx (%rax), %xmm19 +# CHECK-NEXT: 3 11 1.00 * vcvtpd2ps (%rax){1to2}, %xmm19 +# CHECK-NEXT: 2 5 1.00 vcvtpd2ps %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2psx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2ps (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 2 5 1.00 vcvtpd2ps %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2psx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 3 11 1.00 * vcvtpd2ps (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvtpd2udq %ymm16, %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udqy (%rax), %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 2 7 1.00 vcvtpd2udq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 2 7 1.00 vcvtpd2udq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 5 1.00 vcvtpd2udq %xmm16, %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udqx (%rax), %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 2 5 1.00 vcvtpd2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 2 5 1.00 vcvtpd2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvtps2dq %xmm16, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvtps2dq (%rax), %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvtps2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 1 4 0.50 vcvtps2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvtps2dq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvtps2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvtps2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvtps2dq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvtps2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvtps2dq %ymm16, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2dq (%rax), %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 4 0.50 vcvtps2dq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvtps2dq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 5 1.00 vcvtps2pd %xmm16, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvtps2pd (%rax), %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvtps2pd (%rax){1to2}, %xmm19 +# CHECK-NEXT: 2 5 1.00 vcvtps2pd %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvtps2pd (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvtps2pd (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 2 5 1.00 vcvtps2pd %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvtps2pd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvtps2pd (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvtps2pd %xmm16, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax), %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax){1to4}, %ymm19 +# CHECK-NEXT: 2 7 1.00 vcvtps2pd %xmm16, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 2 7 1.00 vcvtps2pd %xmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvtps2udq %xmm16, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvtps2udq (%rax), %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvtps2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 1 4 0.50 vcvtps2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvtps2udq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvtps2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvtps2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvtps2udq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvtps2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvtps2udq %ymm16, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax), %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 4 0.50 vcvtps2udq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvtps2udq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvttpd2dq %ymm16, %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dqy (%rax), %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 2 7 1.00 vcvttpd2dq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 2 7 1.00 vcvttpd2dq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 5 1.00 vcvttpd2dq %xmm16, %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dqx (%rax), %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 2 5 1.00 vcvttpd2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 2 5 1.00 vcvttpd2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvttps2dq %xmm16, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvttps2dq (%rax), %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvttps2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 1 4 0.50 vcvttps2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvttps2dq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvttps2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvttps2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvttps2dq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvttps2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvttps2dq %ymm16, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvttps2dq (%rax), %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvttps2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 4 0.50 vcvttps2dq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvttps2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvttps2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvttps2dq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvttps2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvttps2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvttpd2udq %ymm16, %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udqy (%rax), %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 2 7 1.00 vcvttpd2udq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 2 7 1.00 vcvttpd2udq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 5 1.00 vcvttpd2udq %xmm16, %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udqx (%rax), %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 2 5 1.00 vcvttpd2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 2 5 1.00 vcvttpd2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvttps2udq %xmm16, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvttps2udq (%rax), %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvttps2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 1 4 0.50 vcvttps2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvttps2udq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvttps2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvttps2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvttps2udq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvttps2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvttps2udq %ymm16, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax), %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 4 0.50 vcvttps2udq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvttps2udq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 1 14 4.00 vdivpd %xmm16, %xmm17, %xmm19 # CHECK-NEXT: 2 20 4.00 * vdivpd (%rax), %xmm17, %xmm19 # CHECK-NEXT: 2 20 4.00 * vdivpd (%rax){1to2}, %xmm17, %xmm19 @@ -2624,7 +3004,7 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: - 423.00 289.33 264.33 407.17 407.17 32.00 684.33 4.00 10.67 - - +# CHECK-NEXT: - 423.00 364.33 339.33 467.17 467.17 32.00 762.33 4.00 10.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -2784,6 +3164,186 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvtdq2ps %ymm16, %ymm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtdq2ps (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtdq2ps (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtpd2dq %ymm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2dqy (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtpd2dq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2dqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtpd2dq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2dqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtpd2dq %xmm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2dqx (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2dq (%rax){1to2}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtpd2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2dqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2dq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtpd2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2dqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2dq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - - - vcvtpd2ps %ymm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - - - vcvtpd2psy (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - - - vcvtpd2ps (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - - - vcvtpd2ps %ymm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - - - vcvtpd2psy (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - - - vcvtpd2ps (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - - - vcvtpd2ps %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - - - vcvtpd2psy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - - - vcvtpd2ps (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - - - vcvtpd2ps %xmm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - - - vcvtpd2psx (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - - - vcvtpd2ps (%rax){1to2}, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - - - vcvtpd2ps %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - - - vcvtpd2psx (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - - - vcvtpd2ps (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - - - vcvtpd2ps %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - - - vcvtpd2psx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - - - vcvtpd2ps (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtpd2udq %ymm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2udqy (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtpd2udq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2udqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtpd2udq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2udqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtpd2udq %xmm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2udqx (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2udq (%rax){1to2}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtpd2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2udqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2udq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtpd2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2udqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvtpd2udq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvtps2dq %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2dq (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvtps2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2dq (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvtps2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2dq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvtps2dq %ymm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2dq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvtps2dq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvtps2dq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtps2pd %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2pd (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2pd (%rax){1to2}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtps2pd %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2pd (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2pd (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtps2pd %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2pd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2pd (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtps2pd %xmm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2pd (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2pd (%rax){1to4}, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtps2pd %xmm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2pd (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2pd (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvtps2pd %xmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2pd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2pd (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvtps2udq %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2udq (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvtps2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2udq (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvtps2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2udq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvtps2udq %ymm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2udq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvtps2udq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvtps2udq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvtps2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvttpd2dq %ymm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2dqy (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvttpd2dq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2dqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvttpd2dq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2dqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvttpd2dq %xmm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2dqx (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2dq (%rax){1to2}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvttpd2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2dqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2dq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvttpd2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2dqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2dq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvttps2dq %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2dq (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvttps2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2dq (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvttps2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2dq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvttps2dq %ymm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2dq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvttps2dq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvttps2dq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvttpd2udq %ymm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2udqy (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvttpd2udq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2udqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvttpd2udq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2udqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvttpd2udq %xmm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2udqx (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2udq (%rax){1to2}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvttpd2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2udqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2udq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - vcvttpd2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2udqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - vcvttpd2udq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvttps2udq %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2udq (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvttps2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2udq (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvttps2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2udq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvttps2udq %ymm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2udq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvttps2udq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - - - vcvttps2udq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - - - vcvttps2udq (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: - 4.00 1.00 - - - - - - - - - vdivpd %xmm16, %xmm17, %xmm19 # CHECK-NEXT: - 4.00 1.00 - 0.50 0.50 - - - - - - vdivpd (%rax), %xmm17, %xmm19 # CHECK-NEXT: - 4.00 1.00 - 0.50 0.50 - - - - - - vdivpd (%rax){1to2}, %xmm17, %xmm19 diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse2.s index cfdf730d86adf..4720831bf5f3d 100644 --- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse2.s @@ -691,7 +691,7 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: - 40.00 103.92 95.92 63.50 63.50 14.00 83.92 2.25 5.00 - - +# CHECK-NEXT: - 40.00 104.58 96.58 63.50 63.50 14.00 82.58 2.25 5.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -718,8 +718,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - cvtpd2dq (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - cvtpd2pi %xmm0, %mm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - cvtpd2pi (%rax), %mm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - cvtpd2ps %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - cvtpd2ps (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - - - cvtpd2ps %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - - - cvtpd2ps (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - - - cvtpi2pd %mm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - - - cvtpi2pd (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - - - cvtps2dq %xmm0, %xmm2 @@ -730,8 +730,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - 1.00 1.00 - - - - - - - - cvtsd2si %xmm0, %rcx # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - - - cvtsd2si (%rax), %ecx # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - - - cvtsd2si (%rax), %rcx -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - cvtsd2ss %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - - - cvtsd2ss (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - - - cvtsd2ss %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - - - cvtsd2ss (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - cvtsi2sd %ecx, %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - - - cvtsi2sd %rcx, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - - - cvtsi2sdl (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse3.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse3.s index e09b9e0f757b3..4d1942450ec63 100644 --- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse3.s +++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse3.s @@ -58,7 +58,7 @@ mwait # CHECK-NEXT: 1 6 0.50 * lddqu (%rax), %xmm2 # CHECK-NEXT: 1 100 0.25 U monitor # CHECK-NEXT: 1 1 1.00 movddup %xmm0, %xmm2 -# CHECK-NEXT: 1 5 0.50 * movddup (%rax), %xmm2 +# CHECK-NEXT: 1 6 0.50 * movddup (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 movshdup %xmm0, %xmm2 # CHECK-NEXT: 1 6 0.50 * movshdup (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 movsldup %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-x86_64.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-x86_64.s index f48ac11746092..1491da0f17a83 100644 --- a/llvm/test/tools/llvm-mca/X86/SLM/resources-x86_64.s +++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-x86_64.s @@ -1396,26 +1396,26 @@ xorq (%rax), %rdi # CHECK-NEXT: 1 100 1.00 U movsq (%rsi), %es:(%rdi) # CHECK-NEXT: 1 1 0.50 movsbw %al, %di # CHECK-NEXT: 1 1 0.50 movzbw %al, %di -# CHECK-NEXT: 1 4 1.00 * movsbw (%rax), %di -# CHECK-NEXT: 1 4 1.00 * movzbw (%rax), %di +# CHECK-NEXT: 1 3 1.00 * movsbw (%rax), %di +# CHECK-NEXT: 1 3 1.00 * movzbw (%rax), %di # CHECK-NEXT: 1 1 0.50 movsbl %al, %edi # CHECK-NEXT: 1 1 0.50 movzbl %al, %edi -# CHECK-NEXT: 1 4 1.00 * movsbl (%rax), %edi -# CHECK-NEXT: 1 4 1.00 * movzbl (%rax), %edi +# CHECK-NEXT: 1 3 1.00 * movsbl (%rax), %edi +# CHECK-NEXT: 1 3 1.00 * movzbl (%rax), %edi # CHECK-NEXT: 1 1 0.50 movsbq %al, %rdi # CHECK-NEXT: 1 1 0.50 movzbq %al, %rdi -# CHECK-NEXT: 1 4 1.00 * movsbq (%rax), %rdi -# CHECK-NEXT: 1 4 1.00 * movzbq (%rax), %rdi +# CHECK-NEXT: 1 3 1.00 * movsbq (%rax), %rdi +# CHECK-NEXT: 1 3 1.00 * movzbq (%rax), %rdi # CHECK-NEXT: 1 1 0.50 movswl %ax, %edi # CHECK-NEXT: 1 1 0.50 movzwl %ax, %edi -# CHECK-NEXT: 1 4 1.00 * movswl (%rax), %edi -# CHECK-NEXT: 1 4 1.00 * movzwl (%rax), %edi +# CHECK-NEXT: 1 3 1.00 * movswl (%rax), %edi +# CHECK-NEXT: 1 3 1.00 * movzwl (%rax), %edi # CHECK-NEXT: 1 1 0.50 movswq %ax, %rdi # CHECK-NEXT: 1 1 0.50 movzwq %ax, %rdi -# CHECK-NEXT: 1 4 1.00 * movswq (%rax), %rdi -# CHECK-NEXT: 1 4 1.00 * movzwq (%rax), %rdi +# CHECK-NEXT: 1 3 1.00 * movswq (%rax), %rdi +# CHECK-NEXT: 1 3 1.00 * movzwq (%rax), %rdi # CHECK-NEXT: 1 1 0.50 movslq %eax, %rdi -# CHECK-NEXT: 1 4 1.00 * movslq (%rax), %rdi +# CHECK-NEXT: 1 3 1.00 * movslq (%rax), %rdi # CHECK-NEXT: 3 5 5.00 mulb %dil # CHECK-NEXT: 3 8 5.00 * mulb (%rax) # CHECK-NEXT: 4 5 5.00 mulw %si @@ -1953,7 +1953,7 @@ xorq (%rax), %rdi # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] -# CHECK-NEXT: 400.00 - - 49.00 - 660.00 598.00 835.00 +# CHECK-NEXT: 400.00 - - 49.00 - 654.50 592.50 835.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions: @@ -2323,26 +2323,26 @@ xorq (%rax), %rdi # CHECK-NEXT: - - - 1.00 - - - - movsq (%rsi), %es:(%rdi) # CHECK-NEXT: - - - - - 0.50 0.50 - movsbw %al, %di # CHECK-NEXT: - - - - - 0.50 0.50 - movzbw %al, %di -# CHECK-NEXT: - - - - - 0.50 0.50 1.00 movsbw (%rax), %di -# CHECK-NEXT: - - - - - 0.50 0.50 1.00 movzbw (%rax), %di +# CHECK-NEXT: - - - - - - - 1.00 movsbw (%rax), %di +# CHECK-NEXT: - - - - - - - 1.00 movzbw (%rax), %di # CHECK-NEXT: - - - - - 0.50 0.50 - movsbl %al, %edi # CHECK-NEXT: - - - - - 0.50 0.50 - movzbl %al, %edi -# CHECK-NEXT: - - - - - 0.50 0.50 1.00 movsbl (%rax), %edi -# CHECK-NEXT: - - - - - 0.50 0.50 1.00 movzbl (%rax), %edi +# CHECK-NEXT: - - - - - - - 1.00 movsbl (%rax), %edi +# CHECK-NEXT: - - - - - - - 1.00 movzbl (%rax), %edi # CHECK-NEXT: - - - - - 0.50 0.50 - movsbq %al, %rdi # CHECK-NEXT: - - - - - 0.50 0.50 - movzbq %al, %rdi -# CHECK-NEXT: - - - - - 0.50 0.50 1.00 movsbq (%rax), %rdi -# CHECK-NEXT: - - - - - 0.50 0.50 1.00 movzbq (%rax), %rdi +# CHECK-NEXT: - - - - - - - 1.00 movsbq (%rax), %rdi +# CHECK-NEXT: - - - - - - - 1.00 movzbq (%rax), %rdi # CHECK-NEXT: - - - - - 0.50 0.50 - movswl %ax, %edi # CHECK-NEXT: - - - - - 0.50 0.50 - movzwl %ax, %edi -# CHECK-NEXT: - - - - - 0.50 0.50 1.00 movswl (%rax), %edi -# CHECK-NEXT: - - - - - 0.50 0.50 1.00 movzwl (%rax), %edi +# CHECK-NEXT: - - - - - - - 1.00 movswl (%rax), %edi +# CHECK-NEXT: - - - - - - - 1.00 movzwl (%rax), %edi # CHECK-NEXT: - - - - - 0.50 0.50 - movswq %ax, %rdi # CHECK-NEXT: - - - - - 0.50 0.50 - movzwq %ax, %rdi -# CHECK-NEXT: - - - - - 0.50 0.50 1.00 movswq (%rax), %rdi -# CHECK-NEXT: - - - - - 0.50 0.50 1.00 movzwq (%rax), %rdi +# CHECK-NEXT: - - - - - - - 1.00 movswq (%rax), %rdi +# CHECK-NEXT: - - - - - - - 1.00 movzwq (%rax), %rdi # CHECK-NEXT: - - - - - 0.50 0.50 - movslq %eax, %rdi -# CHECK-NEXT: - - - - - 0.50 0.50 1.00 movslq (%rax), %rdi +# CHECK-NEXT: - - - - - - - 1.00 movslq (%rax), %rdi # CHECK-NEXT: - - - - - - 5.00 - mulb %dil # CHECK-NEXT: - - - - - - 5.00 1.00 mulb (%rax) # CHECK-NEXT: - - - - - - 5.00 - mulw %si diff --git a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse1.s index 28915f49790d9..02b5810cda417 100644 --- a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse1.s @@ -218,14 +218,14 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 3 10 1.00 * cvtsi2ssq (%rax), %xmm2 # CHECK-NEXT: 2 5 1.00 cvtss2si %xmm0, %ecx # CHECK-NEXT: 2 5 1.00 cvtss2si %xmm0, %rcx -# CHECK-NEXT: 3 9 1.00 * cvtss2si (%rax), %ecx -# CHECK-NEXT: 3 9 1.00 * cvtss2si (%rax), %rcx +# CHECK-NEXT: 3 10 1.00 * cvtss2si (%rax), %ecx +# CHECK-NEXT: 3 10 1.00 * cvtss2si (%rax), %rcx # CHECK-NEXT: 1 3 1.00 cvttps2pi %xmm0, %mm2 # CHECK-NEXT: 2 9 1.00 * cvttps2pi (%rax), %mm2 # CHECK-NEXT: 2 5 1.00 cvttss2si %xmm0, %ecx # CHECK-NEXT: 2 5 1.00 cvttss2si %xmm0, %rcx -# CHECK-NEXT: 3 9 1.00 * cvttss2si (%rax), %ecx -# CHECK-NEXT: 3 9 1.00 * cvttss2si (%rax), %rcx +# CHECK-NEXT: 3 10 1.00 * cvttss2si (%rax), %ecx +# CHECK-NEXT: 3 10 1.00 * cvttss2si (%rax), %rcx # CHECK-NEXT: 1 14 14.00 divps %xmm0, %xmm2 # CHECK-NEXT: 2 20 14.00 * divps (%rax), %xmm2 # CHECK-NEXT: 1 14 14.00 divss %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse2.s index 2b6255c697fa1..c3b8b7389df4c 100644 --- a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse2.s @@ -440,8 +440,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 7 1.00 * cvtps2pd (%rax), %xmm2 # CHECK-NEXT: 2 5 1.00 cvtsd2si %xmm0, %ecx # CHECK-NEXT: 2 5 1.00 cvtsd2si %xmm0, %rcx -# CHECK-NEXT: 3 9 1.00 * cvtsd2si (%rax), %ecx -# CHECK-NEXT: 3 9 1.00 * cvtsd2si (%rax), %rcx +# CHECK-NEXT: 3 10 1.00 * cvtsd2si (%rax), %ecx +# CHECK-NEXT: 3 10 1.00 * cvtsd2si (%rax), %rcx # CHECK-NEXT: 2 4 1.00 cvtsd2ss %xmm0, %xmm2 # CHECK-NEXT: 3 10 1.00 * cvtsd2ss (%rax), %xmm2 # CHECK-NEXT: 2 4 1.00 cvtsi2sd %ecx, %xmm2 @@ -458,8 +458,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 9 1.00 * cvttps2dq (%rax), %xmm2 # CHECK-NEXT: 2 5 1.00 cvttsd2si %xmm0, %ecx # CHECK-NEXT: 2 5 1.00 cvttsd2si %xmm0, %rcx -# CHECK-NEXT: 3 9 1.00 * cvttsd2si (%rax), %ecx -# CHECK-NEXT: 3 9 1.00 * cvttsd2si (%rax), %rcx +# CHECK-NEXT: 3 10 1.00 * cvttsd2si (%rax), %ecx +# CHECK-NEXT: 3 10 1.00 * cvttsd2si (%rax), %rcx # CHECK-NEXT: 1 22 22.00 divpd %xmm0, %xmm2 # CHECK-NEXT: 2 28 22.00 * divpd (%rax), %xmm2 # CHECK-NEXT: 1 22 22.00 divsd %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s index eb70e8be3bb6a..f28cd83cf8d83 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s @@ -1127,9 +1127,9 @@ vzeroupper # CHECK-NEXT: 2 7 1.00 vcvtpd2dq %ymm0, %xmm2 # CHECK-NEXT: 2 8 1.00 * vcvtpd2dqy (%rax), %xmm2 # CHECK-NEXT: 2 5 1.00 vcvtpd2ps %xmm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * vcvtpd2psx (%rax), %xmm2 +# CHECK-NEXT: 3 11 1.00 * vcvtpd2psx (%rax), %xmm2 # CHECK-NEXT: 2 7 1.00 vcvtpd2ps %ymm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * vcvtpd2psy (%rax), %xmm2 +# CHECK-NEXT: 3 13 1.00 * vcvtpd2psy (%rax), %xmm2 # CHECK-NEXT: 1 4 0.50 vcvtps2dq %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * vcvtps2dq (%rax), %xmm2 # CHECK-NEXT: 1 4 0.50 vcvtps2dq %ymm0, %ymm2 @@ -1155,7 +1155,7 @@ vzeroupper # CHECK-NEXT: 2 5 1.00 vcvtss2sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vcvtss2sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 2 6 1.00 vcvtss2si %xmm0, %ecx -# CHECK-NEXT: 2 6 1.00 vcvtss2si %xmm0, %rcx +# CHECK-NEXT: 3 7 1.00 vcvtss2si %xmm0, %rcx # CHECK-NEXT: 3 11 1.00 * vcvtss2si (%rax), %ecx # CHECK-NEXT: 3 11 1.00 * vcvtss2si (%rax), %rcx # CHECK-NEXT: 2 5 1.00 vcvttpd2dq %xmm0, %xmm2 @@ -1170,7 +1170,7 @@ vzeroupper # CHECK-NEXT: 2 6 1.00 vcvttsd2si %xmm0, %rcx # CHECK-NEXT: 3 11 1.00 * vcvttsd2si (%rax), %ecx # CHECK-NEXT: 3 11 1.00 * vcvttsd2si (%rax), %rcx -# CHECK-NEXT: 3 7 1.00 vcvttss2si %xmm0, %ecx +# CHECK-NEXT: 2 6 1.00 vcvttss2si %xmm0, %ecx # CHECK-NEXT: 3 7 1.00 vcvttss2si %xmm0, %rcx # CHECK-NEXT: 3 11 1.00 * vcvttss2si (%rax), %ecx # CHECK-NEXT: 3 11 1.00 * vcvttss2si (%rax), %rcx @@ -1736,7 +1736,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 126.00 332.08 203.08 173.17 173.17 34.00 323.58 5.25 12.67 +# CHECK-NEXT: - 126.00 333.42 202.42 173.17 173.17 34.00 324.92 5.25 12.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -1836,10 +1836,10 @@ vzeroupper # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2dqx (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtpd2dq %ymm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2dqy (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2ps %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2psx (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtpd2ps %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2psx (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtpd2ps %ymm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2psy (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2psy (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2dq %xmm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtps2dq (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2dq %ymm0, %ymm2 @@ -1852,7 +1852,7 @@ vzeroupper # CHECK-NEXT: - - 1.50 0.50 - - - - - - vcvtsd2si %xmm0, %rcx # CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - vcvtsd2si (%rax), %ecx # CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - vcvtsd2si (%rax), %rcx -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtsd2ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtsd2ss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtsd2ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtsi2sd %ecx, %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtsi2sd %rcx, %xmm0, %xmm2 @@ -1865,7 +1865,7 @@ vzeroupper # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtss2sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtss2sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 1.50 0.50 - - - - - - vcvtss2si %xmm0, %ecx -# CHECK-NEXT: - - 1.50 0.50 - - - - - - vcvtss2si %xmm0, %rcx +# CHECK-NEXT: - - 1.50 0.50 - - - 1.00 - - vcvtss2si %xmm0, %rcx # CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - vcvtss2si (%rax), %ecx # CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - vcvtss2si (%rax), %rcx # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2dq %xmm0, %xmm2 @@ -1880,7 +1880,7 @@ vzeroupper # CHECK-NEXT: - - 1.50 0.50 - - - - - - vcvttsd2si %xmm0, %rcx # CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - vcvttsd2si (%rax), %ecx # CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - vcvttsd2si (%rax), %rcx -# CHECK-NEXT: - - 1.50 0.50 - - - 1.00 - - vcvttss2si %xmm0, %ecx +# CHECK-NEXT: - - 1.50 0.50 - - - - - - vcvttss2si %xmm0, %ecx # CHECK-NEXT: - - 1.50 0.50 - - - 1.00 - - vcvttss2si %xmm0, %rcx # CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - vcvttss2si (%rax), %ecx # CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - vcvttss2si (%rax), %rcx diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s index e93bda0e38c07..e25e56ce84184 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse1.s @@ -217,12 +217,12 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtsi2ssl (%rax), %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtsi2ssq (%rax), %xmm2 # CHECK-NEXT: 2 6 1.00 cvtss2si %xmm0, %ecx -# CHECK-NEXT: 2 6 1.00 cvtss2si %xmm0, %rcx +# CHECK-NEXT: 3 7 1.00 cvtss2si %xmm0, %rcx # CHECK-NEXT: 3 11 1.00 * cvtss2si (%rax), %ecx # CHECK-NEXT: 3 11 1.00 * cvtss2si (%rax), %rcx # CHECK-NEXT: 2 5 1.00 cvttps2pi %xmm0, %mm2 # CHECK-NEXT: 2 9 0.50 * cvttps2pi (%rax), %mm2 -# CHECK-NEXT: 3 7 1.00 cvttss2si %xmm0, %ecx +# CHECK-NEXT: 2 6 1.00 cvttss2si %xmm0, %ecx # CHECK-NEXT: 3 7 1.00 cvttss2si %xmm0, %rcx # CHECK-NEXT: 3 11 1.00 * cvttss2si (%rax), %ecx # CHECK-NEXT: 4 12 1.00 * cvttss2si (%rax), %rcx @@ -360,12 +360,12 @@ xorps (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtsi2ssl (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtsi2ssq (%rax), %xmm2 # CHECK-NEXT: - - 1.50 0.50 - - - - - - cvtss2si %xmm0, %ecx -# CHECK-NEXT: - - 1.50 0.50 - - - - - - cvtss2si %xmm0, %rcx +# CHECK-NEXT: - - 1.50 0.50 - - - 1.00 - - cvtss2si %xmm0, %rcx # CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - cvtss2si (%rax), %ecx # CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - cvtss2si (%rax), %rcx # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvttps2pi %xmm0, %mm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvttps2pi (%rax), %mm2 -# CHECK-NEXT: - - 1.50 0.50 - - - 1.00 - - cvttss2si %xmm0, %ecx +# CHECK-NEXT: - - 1.50 0.50 - - - - - - cvttss2si %xmm0, %ecx # CHECK-NEXT: - - 1.50 0.50 - - - 1.00 - - cvttss2si %xmm0, %rcx # CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - cvttss2si (%rax), %ecx # CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - 1.00 - - cvttss2si (%rax), %rcx diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s index 52ad5dbcdb25f..082346c542b47 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse2.s @@ -689,7 +689,7 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 40.00 110.92 78.92 63.50 63.50 14.00 96.92 2.25 5.00 +# CHECK-NEXT: - 40.00 111.25 79.25 63.50 63.50 14.00 96.25 2.25 5.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -716,7 +716,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvtpd2dq (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtpd2pi %xmm0, %mm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvtpd2pi (%rax), %mm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtpd2ps %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtpd2ps %xmm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvtpd2ps (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - 1.00 - - cvtpi2pd %mm0, %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - cvtpi2pd (%rax), %xmm2 @@ -728,7 +728,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - 1.50 0.50 - - - - - - cvtsd2si %xmm0, %rcx # CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - cvtsd2si (%rax), %ecx # CHECK-NEXT: - - 1.50 0.50 0.50 0.50 - - - - cvtsd2si (%rax), %rcx -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtsd2ss %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtsd2ss %xmm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvtsd2ss (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtsi2sd %ecx, %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtsi2sd %rcx, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s index 57565547002af..1c4939e32c3b1 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s @@ -1127,9 +1127,9 @@ vzeroupper # CHECK-NEXT: 2 7 1.00 vcvtpd2dq %ymm0, %xmm2 # CHECK-NEXT: 2 8 0.50 * vcvtpd2dqy (%rax), %xmm2 # CHECK-NEXT: 2 5 1.00 vcvtpd2ps %xmm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * vcvtpd2psx (%rax), %xmm2 +# CHECK-NEXT: 3 9 1.00 * vcvtpd2psx (%rax), %xmm2 # CHECK-NEXT: 2 7 1.00 vcvtpd2ps %ymm0, %xmm2 -# CHECK-NEXT: 3 8 1.00 * vcvtpd2psy (%rax), %xmm2 +# CHECK-NEXT: 3 14 1.00 * vcvtpd2psy (%rax), %xmm2 # CHECK-NEXT: 1 4 0.50 vcvtps2dq %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * vcvtps2dq (%rax), %xmm2 # CHECK-NEXT: 1 4 0.50 vcvtps2dq %ymm0, %ymm2 @@ -1736,7 +1736,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 126.00 317.42 196.42 173.17 173.17 34.00 337.92 6.25 12.67 +# CHECK-NEXT: - 126.00 318.58 196.58 173.17 173.17 34.00 337.58 6.25 12.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -1836,9 +1836,9 @@ vzeroupper # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2dqx (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2dq %ymm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2dqy (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2ps %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2psx (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2ps %ymm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtpd2ps %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2psx (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtpd2ps %ymm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2psy (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2dq %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax), %xmm2 @@ -1852,8 +1852,8 @@ vzeroupper # CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvtsd2si %xmm0, %rcx # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvtsd2si (%rax), %ecx # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvtsd2si (%rax), %rcx -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtsd2ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvtsd2ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtsd2ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtsd2ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtsi2sd %ecx, %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtsi2sd %rcx, %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtsi2sdl (%rax), %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s index 127be91c0deae..2420edc5b7080 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s @@ -127,6 +127,56 @@ vcvtdq2ps %zmm16, %zmm19 {z}{k1} vcvtdq2ps (%rax), %zmm19 {z}{k1} vcvtdq2ps (%rax){1to16}, %zmm19 {z}{k1} +vcvtpd2dq %zmm16, %ymm19 +vcvtpd2dq (%rax), %ymm19 +vcvtpd2dq (%rax){1to8}, %ymm19 +vcvtpd2dq %zmm16, %ymm19 {k1} +vcvtpd2dq (%rax), %ymm19 {k1} +vcvtpd2dq (%rax){1to8}, %ymm19 {k1} +vcvtpd2dq %zmm16, %ymm19 {z}{k1} +vcvtpd2dq (%rax), %ymm19 {z}{k1} +vcvtpd2dq (%rax){1to8}, %ymm19 {z}{k1} + +vcvtpd2udq %zmm16, %ymm19 +vcvtpd2udq (%rax), %ymm19 +vcvtpd2udq (%rax){1to8}, %ymm19 +vcvtpd2udq %zmm16, %ymm19 {k1} +vcvtpd2udq (%rax), %ymm19 {k1} +vcvtpd2udq (%rax){1to8}, %ymm19 {k1} +vcvtpd2udq %zmm16, %ymm19 {z}{k1} +vcvtpd2udq (%rax), %ymm19 {z}{k1} +vcvtpd2udq (%rax){1to8}, %ymm19 {z}{k1} + +vcvttpd2dq %zmm16, %ymm19 +vcvttpd2dq (%rax), %ymm19 +vcvttpd2dq (%rax){1to8}, %ymm19 +vcvttpd2dq %zmm16, %ymm19 {k1} +vcvttpd2dq (%rax), %ymm19 {k1} +vcvttpd2dq (%rax){1to8}, %ymm19 {k1} +vcvttpd2dq %zmm16, %ymm19 {z}{k1} +vcvttpd2dq (%rax), %ymm19 {z}{k1} +vcvttpd2dq (%rax){1to8}, %ymm19 {z}{k1} + +vcvttpd2udq %zmm16, %ymm19 +vcvttpd2udq (%rax), %ymm19 +vcvttpd2udq (%rax){1to8}, %ymm19 +vcvttpd2udq %zmm16, %ymm19 {k1} +vcvttpd2udq (%rax), %ymm19 {k1} +vcvttpd2udq (%rax){1to8}, %ymm19 {k1} +vcvttpd2udq %zmm16, %ymm19 {z}{k1} +vcvttpd2udq (%rax), %ymm19 {z}{k1} +vcvttpd2udq (%rax){1to8}, %ymm19 {z}{k1} + +vcvtpd2ps %zmm16, %ymm19 +vcvtpd2ps (%rax), %ymm19 +vcvtpd2ps (%rax){1to8}, %ymm19 +vcvtpd2ps %zmm16, %ymm19 {k1} +vcvtpd2ps (%rax), %ymm19 {k1} +vcvtpd2ps (%rax){1to8}, %ymm19 {k1} +vcvtpd2ps %zmm16, %ymm19 {z}{k1} +vcvtpd2ps (%rax), %ymm19 {z}{k1} +vcvtpd2ps (%rax){1to8}, %ymm19 {z}{k1} + vcvtps2dq %zmm16, %zmm19 vcvtps2dq (%rax), %zmm19 vcvtps2dq (%rax){1to16}, %zmm19 @@ -147,6 +197,56 @@ vcvttps2dq %zmm16, %zmm19 {z}{k1} vcvttps2dq (%rax), %zmm19 {z}{k1} vcvttps2dq (%rax){1to16}, %zmm19 {z}{k1} +vcvtps2pd %ymm16, %zmm19 +vcvtps2pd (%rax), %zmm19 +vcvtps2pd (%rax){1to8}, %zmm19 +vcvtps2pd %ymm16, %zmm19 {k1} +vcvtps2pd (%rax), %zmm19 {k1} +vcvtps2pd (%rax){1to8}, %zmm19 {k1} +vcvtps2pd %ymm16, %zmm19 {z}{k1} +vcvtps2pd (%rax), %zmm19 {z}{k1} +vcvtps2pd (%rax){1to8}, %zmm19 {z}{k1} + +vcvtps2udq %zmm16, %zmm19 +vcvtps2udq (%rax), %zmm19 +vcvtps2udq (%rax){1to16}, %zmm19 +vcvtps2udq %zmm16, %zmm19 {k1} +vcvtps2udq (%rax), %zmm19 {k1} +vcvtps2udq (%rax){1to16}, %zmm19 {k1} +vcvtps2udq %zmm16, %zmm19 {z}{k1} +vcvtps2udq (%rax), %zmm19 {z}{k1} +vcvtps2udq (%rax){1to16}, %zmm19 {z}{k1} + +vcvttps2udq %zmm16, %zmm19 +vcvttps2udq (%rax), %zmm19 +vcvttps2udq (%rax){1to16}, %zmm19 +vcvttps2udq %zmm16, %zmm19 {k1} +vcvttps2udq (%rax), %zmm19 {k1} +vcvttps2udq (%rax){1to16}, %zmm19 {k1} +vcvttps2udq %zmm16, %zmm19 {z}{k1} +vcvttps2udq (%rax), %zmm19 {z}{k1} +vcvttps2udq (%rax){1to16}, %zmm19 {z}{k1} + +vcvtudq2pd %ymm16, %zmm19 +vcvtudq2pd (%rax), %zmm19 +vcvtudq2pd (%rax){1to8}, %zmm19 +vcvtudq2pd %ymm16, %zmm19 {k1} +vcvtudq2pd (%rax), %zmm19 {k1} +vcvtudq2pd (%rax){1to8}, %zmm19 {k1} +vcvtudq2pd %ymm16, %zmm19 {z}{k1} +vcvtudq2pd (%rax), %zmm19 {z}{k1} +vcvtudq2pd (%rax){1to8}, %zmm19 {z}{k1} + +vcvtudq2ps %zmm16, %zmm19 +vcvtudq2ps (%rax), %zmm19 +vcvtudq2ps (%rax){1to16}, %zmm19 +vcvtudq2ps %zmm16, %zmm19 {k1} +vcvtudq2ps (%rax), %zmm19 {k1} +vcvtudq2ps (%rax){1to16}, %zmm19 {k1} +vcvtudq2ps %zmm16, %zmm19 {z}{k1} +vcvtudq2ps (%rax), %zmm19 {z}{k1} +vcvtudq2ps (%rax){1to16}, %zmm19 {z}{k1} + vdivpd %zmm16, %zmm17, %zmm19 vdivpd (%rax), %zmm17, %zmm19 vdivpd (%rax){1to8}, %zmm17, %zmm19 @@ -992,6 +1092,51 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1 4 0.50 vcvtdq2ps %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vcvtdq2ps (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vcvtdq2ps (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvtpd2dq %zmm16, %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvtpd2dq (%rax), %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvtpd2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 3 0.50 vcvtpd2dq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 3 0.50 vcvtpd2dq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvtpd2udq %zmm16, %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvtpd2udq (%rax), %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvtpd2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 3 0.50 vcvtpd2udq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 3 0.50 vcvtpd2udq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvttpd2dq %zmm16, %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvttpd2dq (%rax), %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvttpd2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 3 0.50 vcvttpd2dq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvttpd2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvttpd2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 3 0.50 vcvttpd2dq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvttpd2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvttpd2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvttpd2udq %zmm16, %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvttpd2udq (%rax), %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvttpd2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 3 0.50 vcvttpd2udq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvttpd2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvttpd2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 3 0.50 vcvttpd2udq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvttpd2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvttpd2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvtpd2ps %zmm16, %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvtpd2ps (%rax), %ymm19 +# CHECK-NEXT: 3 14 1.00 * vcvtpd2ps (%rax){1to8}, %ymm19 +# CHECK-NEXT: 2 7 1.00 vcvtpd2ps %zmm16, %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2ps (%rax), %ymm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2ps (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 2 7 1.00 vcvtpd2ps %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2ps (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2ps (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 1 4 0.50 vcvtps2dq %zmm16, %zmm19 # CHECK-NEXT: 2 11 0.50 * vcvtps2dq (%rax), %zmm19 # CHECK-NEXT: 2 11 0.50 * vcvtps2dq (%rax){1to16}, %zmm19 @@ -1010,6 +1155,51 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1 3 0.50 vcvttps2dq %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vcvttps2dq (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vcvttps2dq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvtps2pd %ymm16, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax), %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: 2 3 1.00 vcvtps2pd %ymm16, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 2 3 1.00 vcvtps2pd %ymm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvtps2udq %zmm16, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax), %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax){1to16}, %zmm19 +# CHECK-NEXT: 1 3 0.50 vcvtps2udq %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 1 3 0.50 vcvtps2udq %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvttps2udq %zmm16, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax), %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax){1to16}, %zmm19 +# CHECK-NEXT: 1 3 0.50 vcvttps2udq %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 1 3 0.50 vcvttps2udq %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvtudq2pd %ymm16, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvtudq2pd (%rax), %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvtudq2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: 1 4 0.50 vcvtudq2pd %ymm16, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtudq2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtudq2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvtudq2pd %ymm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtudq2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtudq2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvtudq2ps %zmm16, %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvtudq2ps (%rax), %zmm19 +# CHECK-NEXT: 2 11 0.50 * vcvtudq2ps (%rax){1to16}, %zmm19 +# CHECK-NEXT: 1 4 0.50 vcvtudq2ps %zmm16, %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtudq2ps (%rax), %zmm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtudq2ps (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvtudq2ps %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtudq2ps (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtudq2ps (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: 3 23 16.00 vdivpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: 4 30 16.00 * vdivpd (%rax), %zmm17, %zmm19 # CHECK-NEXT: 4 30 16.00 * vdivpd (%rax){1to8}, %zmm17, %zmm19 @@ -1669,7 +1859,7 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 612.00 240.67 49.67 278.83 278.83 16.00 559.67 2.00 5.33 +# CHECK-NEXT: - 612.00 277.67 67.67 308.83 308.83 16.00 635.67 2.00 5.33 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -1781,6 +1971,51 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvtdq2ps %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtdq2ps (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtdq2ps (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 1.50 - - vcvtpd2dq %zmm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvtpd2dq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvtpd2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvtpd2dq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvtpd2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvtpd2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvtpd2dq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvtpd2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvtpd2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 1.50 - - vcvtpd2udq %zmm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvtpd2udq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvtpd2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvtpd2udq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvtpd2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvtpd2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvtpd2udq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvtpd2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvtpd2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 1.50 - - vcvttpd2dq %zmm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvttpd2dq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvttpd2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvttpd2dq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvttpd2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvttpd2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvttpd2dq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvttpd2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvttpd2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 1.50 - - vcvttpd2udq %zmm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvttpd2udq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvttpd2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvttpd2udq %zmm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvttpd2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvttpd2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvttpd2udq %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvttpd2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvttpd2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 1.50 - - vcvtpd2ps %zmm16, %ymm19 +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 1.50 - - vcvtpd2ps (%rax), %ymm19 +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 1.50 - - vcvtpd2ps (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - 0.50 - - - - 1.50 - - vcvtpd2ps %zmm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 1.50 - - vcvtpd2ps (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 1.50 - - vcvtpd2ps (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 1.50 - - vcvtpd2ps %zmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 1.50 - - vcvtpd2ps (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 1.50 - - vcvtpd2ps (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvtps2dq %zmm16, %zmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax), %zmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax){1to16}, %zmm19 @@ -1799,6 +2034,51 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvttps2dq %zmm16, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2dq (%rax), %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2dq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 1.50 - - vcvtps2pd %ymm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtps2pd %ymm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtps2pd %ymm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvtps2udq %zmm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2udq (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2udq (%rax){1to16}, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvtps2udq %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2udq (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2udq (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvtps2udq %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2udq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2udq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvttps2udq %zmm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2udq (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2udq (%rax){1to16}, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvttps2udq %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2udq (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2udq (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvttps2udq %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2udq (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2udq (%rax){1to16}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 1.50 - - vcvtudq2pd %ymm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtudq2pd (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtudq2pd (%rax){1to8}, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvtudq2pd %ymm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtudq2pd (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtudq2pd (%rax){1to8}, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvtudq2pd %ymm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtudq2pd (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtudq2pd (%rax){1to8}, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvtudq2ps %zmm16, %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtudq2ps (%rax), %zmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtudq2ps (%rax){1to16}, %zmm19 +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvtudq2ps %zmm16, %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtudq2ps (%rax), %zmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtudq2ps (%rax){1to16}, %zmm19 {%k1} +# CHECK-NEXT: - - 0.50 - - - - 0.50 - - vcvtudq2ps %zmm16, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtudq2ps (%rax), %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtudq2ps (%rax){1to16}, %zmm19 {%k1} {z} # CHECK-NEXT: - 16.00 2.00 - - - - 1.00 - - vdivpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: - 16.00 2.00 - 0.50 0.50 - 1.00 - - vdivpd (%rax), %zmm17, %zmm19 # CHECK-NEXT: - 16.00 2.00 - 0.50 0.50 - 1.00 - - vdivpd (%rax){1to8}, %zmm17, %zmm19 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s index 848f1a54e82dd..30cc195dcda73 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s @@ -178,6 +178,206 @@ vcvtdq2ps %ymm16, %ymm19 {z}{k1} vcvtdq2ps (%rax), %ymm19 {z}{k1} vcvtdq2ps (%rax){1to8}, %ymm19 {z}{k1} +vcvtpd2dqy %ymm16, %xmm19 +vcvtpd2dqy (%rax), %xmm19 +vcvtpd2dqy (%rax){1to4}, %xmm19 +vcvtpd2dqy %ymm16, %xmm19 {k1} +vcvtpd2dqy (%rax), %xmm19 {k1} +vcvtpd2dqy (%rax){1to4}, %xmm19 {k1} +vcvtpd2dqy %ymm16, %xmm19 {z}{k1} +vcvtpd2dqy (%rax), %xmm19 {z}{k1} +vcvtpd2dqy (%rax){1to4}, %xmm19 {z}{k1} + +vcvtpd2dqx %xmm16, %xmm19 +vcvtpd2dqx (%rax), %xmm19 +vcvtpd2dqx (%rax){1to2}, %xmm19 +vcvtpd2dqx %xmm16, %xmm19 {k1} +vcvtpd2dqx (%rax), %xmm19 {k1} +vcvtpd2dqx (%rax){1to2},%xmm19 {k1} +vcvtpd2dqx %xmm16, %xmm19 {z}{k1} +vcvtpd2dqx (%rax), %xmm19 {z}{k1} +vcvtpd2dqx (%rax){1to2}, %xmm19 {z}{k1} + +vcvtpd2psy %ymm16, %xmm19 +vcvtpd2psy (%rax), %xmm19 +vcvtpd2psy (%rax){1to4}, %xmm19 +vcvtpd2psy %ymm16, %xmm19 {k1} +vcvtpd2psy (%rax), %xmm19 {k1} +vcvtpd2psy (%rax){1to4}, %xmm19 {k1} +vcvtpd2psy %ymm16, %xmm19 {z}{k1} +vcvtpd2psy (%rax), %xmm19 {z}{k1} +vcvtpd2psy (%rax){1to4}, %xmm19 {z}{k1} + +vcvtpd2psx %xmm16, %xmm19 +vcvtpd2psx (%rax), %xmm19 +vcvtpd2psx (%rax){1to2}, %xmm19 +vcvtpd2psx %xmm16, %xmm19 {k1} +vcvtpd2psx (%rax), %xmm19 {k1} +vcvtpd2psx (%rax){1to2},%xmm19 {k1} +vcvtpd2psx %xmm16, %xmm19 {z}{k1} +vcvtpd2psx (%rax), %xmm19 {z}{k1} +vcvtpd2psx (%rax){1to2}, %xmm19 {z}{k1} + +vcvtpd2udqy %ymm16, %xmm19 +vcvtpd2udqy (%rax), %xmm19 +vcvtpd2udqy (%rax){1to4}, %xmm19 +vcvtpd2udqy %ymm16, %xmm19 {k1} +vcvtpd2udqy (%rax), %xmm19 {k1} +vcvtpd2udqy (%rax){1to4}, %xmm19 {k1} +vcvtpd2udqy %ymm16, %xmm19 {z}{k1} +vcvtpd2udqy (%rax), %xmm19 {z}{k1} +vcvtpd2udqy (%rax){1to4}, %xmm19 {z}{k1} + +vcvtpd2udqx %xmm16, %xmm19 +vcvtpd2udqx (%rax), %xmm19 +vcvtpd2udqx (%rax){1to2}, %xmm19 +vcvtpd2udqx %xmm16, %xmm19 {k1} +vcvtpd2udqx (%rax), %xmm19 {k1} +vcvtpd2udqx (%rax){1to2},%xmm19 {k1} +vcvtpd2udqx %xmm16, %xmm19 {z}{k1} +vcvtpd2udqx (%rax), %xmm19 {z}{k1} +vcvtpd2udqx (%rax){1to2}, %xmm19 {z}{k1} + +vcvtps2dq %xmm16, %xmm19 +vcvtps2dq (%rax), %xmm19 +vcvtps2dq (%rax){1to4}, %xmm19 +vcvtps2dq %xmm16, %xmm19 {k1} +vcvtps2dq (%rax), %xmm19 {k1} +vcvtps2dq (%rax){1to4},%xmm19 {k1} +vcvtps2dq %xmm16, %xmm19 {z}{k1} +vcvtps2dq (%rax), %xmm19 {z}{k1} +vcvtps2dq (%rax){1to4}, %xmm19 {z}{k1} + +vcvtps2dq %ymm16, %ymm19 +vcvtps2dq (%rax), %ymm19 +vcvtps2dq (%rax){1to8}, %ymm19 +vcvtps2dq %ymm16,%ymm19 {k1} +vcvtps2dq (%rax),%ymm19 {k1} +vcvtps2dq (%rax){1to8}, %ymm19 {k1} +vcvtps2dq %ymm16, %ymm19 {z}{k1} +vcvtps2dq (%rax), %ymm19 {z}{k1} +vcvtps2dq (%rax){1to8}, %ymm19 {z}{k1} + +vcvtps2pd %xmm16, %xmm19 +vcvtps2pd (%rax), %xmm19 +vcvtps2pd (%rax){1to2}, %xmm19 +vcvtps2pd %xmm16, %xmm19 {k1} +vcvtps2pd (%rax), %xmm19 {k1} +vcvtps2pd (%rax){1to2},%xmm19 {k1} +vcvtps2pd %xmm16, %xmm19 {z}{k1} +vcvtps2pd (%rax), %xmm19 {z}{k1} +vcvtps2pd (%rax){1to2}, %xmm19 {z}{k1} + +vcvtps2pd %xmm16, %ymm19 +vcvtps2pd (%rax), %ymm19 +vcvtps2pd (%rax){1to4}, %ymm19 +vcvtps2pd %xmm16,%ymm19 {k1} +vcvtps2pd (%rax),%ymm19 {k1} +vcvtps2pd (%rax){1to4}, %ymm19 {k1} +vcvtps2pd %xmm16, %ymm19 {z}{k1} +vcvtps2pd (%rax), %ymm19 {z}{k1} +vcvtps2pd (%rax){1to4}, %ymm19 {z}{k1} + +vcvtps2udq %xmm16, %xmm19 +vcvtps2udq (%rax), %xmm19 +vcvtps2udq (%rax){1to4}, %xmm19 +vcvtps2udq %xmm16, %xmm19 {k1} +vcvtps2udq (%rax), %xmm19 {k1} +vcvtps2udq (%rax){1to4},%xmm19 {k1} +vcvtps2udq %xmm16, %xmm19 {z}{k1} +vcvtps2udq (%rax), %xmm19 {z}{k1} +vcvtps2udq (%rax){1to4}, %xmm19 {z}{k1} + +vcvtps2udq %ymm16, %ymm19 +vcvtps2udq (%rax), %ymm19 +vcvtps2udq (%rax){1to8}, %ymm19 +vcvtps2udq %ymm16,%ymm19 {k1} +vcvtps2udq (%rax),%ymm19 {k1} +vcvtps2udq (%rax){1to8}, %ymm19 {k1} +vcvtps2udq %ymm16, %ymm19 {z}{k1} +vcvtps2udq (%rax), %ymm19 {z}{k1} +vcvtps2udq (%rax){1to8}, %ymm19 {z}{k1} + +vcvttpd2dqy %ymm16, %xmm19 +vcvttpd2dqy (%rax), %xmm19 +vcvttpd2dqy (%rax){1to4}, %xmm19 +vcvttpd2dqy %ymm16, %xmm19 {k1} +vcvttpd2dqy (%rax), %xmm19 {k1} +vcvttpd2dqy (%rax){1to4}, %xmm19 {k1} +vcvttpd2dqy %ymm16, %xmm19 {z}{k1} +vcvttpd2dqy (%rax), %xmm19 {z}{k1} +vcvttpd2dqy (%rax){1to4}, %xmm19 {z}{k1} + +vcvttpd2dqx %xmm16, %xmm19 +vcvttpd2dqx (%rax), %xmm19 +vcvttpd2dqx (%rax){1to2}, %xmm19 +vcvttpd2dqx %xmm16, %xmm19 {k1} +vcvttpd2dqx (%rax), %xmm19 {k1} +vcvttpd2dqx (%rax){1to2},%xmm19 {k1} +vcvttpd2dqx %xmm16, %xmm19 {z}{k1} +vcvttpd2dqx (%rax), %xmm19 {z}{k1} +vcvttpd2dqx (%rax){1to2}, %xmm19 {z}{k1} + +vcvttps2dq %xmm16, %xmm19 +vcvttps2dq (%rax), %xmm19 +vcvttps2dq (%rax){1to4}, %xmm19 +vcvttps2dq %xmm16, %xmm19 {k1} +vcvttps2dq (%rax), %xmm19 {k1} +vcvttps2dq (%rax){1to4},%xmm19 {k1} +vcvttps2dq %xmm16, %xmm19 {z}{k1} +vcvttps2dq (%rax), %xmm19 {z}{k1} +vcvttps2dq (%rax){1to4}, %xmm19 {z}{k1} + +vcvttps2dq %ymm16, %ymm19 +vcvttps2dq (%rax), %ymm19 +vcvttps2dq (%rax){1to8}, %ymm19 +vcvttps2dq %ymm16,%ymm19 {k1} +vcvttps2dq (%rax),%ymm19 {k1} +vcvttps2dq (%rax){1to8}, %ymm19 {k1} +vcvttps2dq %ymm16, %ymm19 {z}{k1} +vcvttps2dq (%rax), %ymm19 {z}{k1} +vcvttps2dq (%rax){1to8}, %ymm19 {z}{k1} + +vcvttpd2udqy %ymm16, %xmm19 +vcvttpd2udqy (%rax), %xmm19 +vcvttpd2udqy (%rax){1to4}, %xmm19 +vcvttpd2udqy %ymm16, %xmm19 {k1} +vcvttpd2udqy (%rax), %xmm19 {k1} +vcvttpd2udqy (%rax){1to4}, %xmm19 {k1} +vcvttpd2udqy %ymm16, %xmm19 {z}{k1} +vcvttpd2udqy (%rax), %xmm19 {z}{k1} +vcvttpd2udqy (%rax){1to4}, %xmm19 {z}{k1} + +vcvttpd2udqx %xmm16, %xmm19 +vcvttpd2udqx (%rax), %xmm19 +vcvttpd2udqx (%rax){1to2}, %xmm19 +vcvttpd2udqx %xmm16, %xmm19 {k1} +vcvttpd2udqx (%rax), %xmm19 {k1} +vcvttpd2udqx (%rax){1to2},%xmm19 {k1} +vcvttpd2udqx %xmm16, %xmm19 {z}{k1} +vcvttpd2udqx (%rax), %xmm19 {z}{k1} +vcvttpd2udqx (%rax){1to2}, %xmm19 {z}{k1} + +vcvttps2udq %xmm16, %xmm19 +vcvttps2udq (%rax), %xmm19 +vcvttps2udq (%rax){1to4}, %xmm19 +vcvttps2udq %xmm16, %xmm19 {k1} +vcvttps2udq (%rax), %xmm19 {k1} +vcvttps2udq (%rax){1to4},%xmm19 {k1} +vcvttps2udq %xmm16, %xmm19 {z}{k1} +vcvttps2udq (%rax), %xmm19 {z}{k1} +vcvttps2udq (%rax){1to4}, %xmm19 {z}{k1} + +vcvttps2udq %ymm16, %ymm19 +vcvttps2udq (%rax), %ymm19 +vcvttps2udq (%rax){1to8}, %ymm19 +vcvttps2udq %ymm16,%ymm19 {k1} +vcvttps2udq (%rax),%ymm19 {k1} +vcvttps2udq (%rax){1to8}, %ymm19 {k1} +vcvttps2udq %ymm16, %ymm19 {z}{k1} +vcvttps2udq (%rax), %ymm19 {z}{k1} +vcvttps2udq (%rax){1to8}, %ymm19 {z}{k1} + vdivpd %xmm16, %xmm17, %xmm19 vdivpd (%rax), %xmm17, %xmm19 vdivpd (%rax){1to2}, %xmm17, %xmm19 @@ -1545,6 +1745,186 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 1 4 0.50 vcvtdq2ps %ymm16, %ymm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vcvtdq2ps (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vcvtdq2ps (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvtpd2dq %ymm16, %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dqy (%rax), %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 2 7 1.00 vcvtpd2dq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 2 7 1.00 vcvtpd2dq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 5 1.00 vcvtpd2dq %xmm16, %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dqx (%rax), %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 2 5 1.00 vcvtpd2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 2 5 1.00 vcvtpd2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvtpd2ps %ymm16, %xmm19 +# CHECK-NEXT: 3 14 1.00 * vcvtpd2psy (%rax), %xmm19 +# CHECK-NEXT: 3 14 1.00 * vcvtpd2ps (%rax){1to4}, %xmm19 +# CHECK-NEXT: 2 7 1.00 vcvtpd2ps %ymm16, %xmm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2psy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2ps (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 2 7 1.00 vcvtpd2ps %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2psy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 3 14 1.00 * vcvtpd2ps (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 5 1.00 vcvtpd2ps %xmm16, %xmm19 +# CHECK-NEXT: 3 9 1.00 * vcvtpd2psx (%rax), %xmm19 +# CHECK-NEXT: 3 9 1.00 * vcvtpd2ps (%rax){1to2}, %xmm19 +# CHECK-NEXT: 2 5 1.00 vcvtpd2ps %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 3 9 1.00 * vcvtpd2psx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 3 9 1.00 * vcvtpd2ps (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 2 5 1.00 vcvtpd2ps %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 3 9 1.00 * vcvtpd2psx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 3 9 1.00 * vcvtpd2ps (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvtpd2udq %ymm16, %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udqy (%rax), %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 2 7 1.00 vcvtpd2udq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 2 7 1.00 vcvtpd2udq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 5 1.00 vcvtpd2udq %xmm16, %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udqx (%rax), %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 2 5 1.00 vcvtpd2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 2 5 1.00 vcvtpd2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvtpd2udq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvtps2dq %xmm16, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvtps2dq (%rax), %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvtps2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 1 4 0.50 vcvtps2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvtps2dq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvtps2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvtps2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvtps2dq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvtps2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvtps2dq %ymm16, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2dq (%rax), %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 4 0.50 vcvtps2dq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvtps2dq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 5 1.00 vcvtps2pd %xmm16, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvtps2pd (%rax), %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvtps2pd (%rax){1to2}, %xmm19 +# CHECK-NEXT: 2 5 1.00 vcvtps2pd %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvtps2pd (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvtps2pd (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 2 5 1.00 vcvtps2pd %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvtps2pd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvtps2pd (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvtps2pd %xmm16, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax), %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax){1to4}, %ymm19 +# CHECK-NEXT: 2 7 1.00 vcvtps2pd %xmm16, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: 2 7 1.00 vcvtps2pd %xmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2pd (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvtps2udq %xmm16, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvtps2udq (%rax), %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvtps2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 1 4 0.50 vcvtps2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvtps2udq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvtps2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvtps2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvtps2udq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvtps2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvtps2udq %ymm16, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax), %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 4 0.50 vcvtps2udq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvtps2udq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvtps2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvttpd2dq %ymm16, %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dqy (%rax), %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 2 7 1.00 vcvttpd2dq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 2 7 1.00 vcvttpd2dq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 5 1.00 vcvttpd2dq %xmm16, %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dqx (%rax), %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 2 5 1.00 vcvttpd2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 2 5 1.00 vcvttpd2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvttps2dq %xmm16, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvttps2dq (%rax), %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvttps2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 1 4 0.50 vcvttps2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvttps2dq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvttps2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvttps2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvttps2dq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvttps2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvttps2dq %ymm16, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvttps2dq (%rax), %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvttps2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 4 0.50 vcvttps2dq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvttps2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvttps2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvttps2dq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvttps2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvttps2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 7 1.00 vcvttpd2udq %ymm16, %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udqy (%rax), %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 2 7 1.00 vcvttpd2udq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 2 7 1.00 vcvttpd2udq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 5 1.00 vcvttpd2udq %xmm16, %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udqx (%rax), %xmm19 +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udq (%rax){1to2}, %xmm19 +# CHECK-NEXT: 2 5 1.00 vcvttpd2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: 2 5 1.00 vcvttpd2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 8 0.50 * vcvttpd2udq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvttps2udq %xmm16, %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvttps2udq (%rax), %xmm19 +# CHECK-NEXT: 2 10 0.50 * vcvttps2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: 1 4 0.50 vcvttps2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvttps2udq (%rax), %xmm19 {%k1} +# CHECK-NEXT: 2 10 0.50 * vcvttps2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvttps2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvttps2udq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: 2 10 0.50 * vcvttps2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: 1 4 0.50 vcvttps2udq %ymm16, %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax), %ymm19 +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: 1 4 0.50 vcvttps2udq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: 1 4 0.50 vcvttps2udq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: 2 11 0.50 * vcvttps2udq (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: 1 14 4.00 vdivpd %xmm16, %xmm17, %xmm19 # CHECK-NEXT: 2 20 4.00 * vdivpd (%rax), %xmm17, %xmm19 # CHECK-NEXT: 2 20 4.00 * vdivpd (%rax){1to2}, %xmm17, %xmm19 @@ -2622,7 +3002,7 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 423.00 289.33 201.33 407.17 407.17 32.00 747.33 4.00 10.67 +# CHECK-NEXT: - 423.00 364.33 276.33 467.17 467.17 32.00 825.33 4.00 10.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -2782,6 +3162,186 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtdq2ps %ymm16, %ymm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtdq2ps (%rax), %ymm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtdq2ps (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2dq %ymm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2dqy (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2dq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2dqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2dq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2dqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2dq %xmm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2dqx (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2dq (%rax){1to2}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2dqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2dq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2dqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2dq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtpd2ps %ymm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2psy (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2ps (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtpd2ps %ymm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2psy (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2ps (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtpd2ps %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2psy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2ps (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtpd2ps %xmm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2psx (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2ps (%rax){1to2}, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtpd2ps %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2psx (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2ps (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtpd2ps %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2psx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2ps (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2udq %ymm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2udqy (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2udq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2udqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2udq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2udqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2udq %xmm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2udqx (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2udq (%rax){1to2}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2udqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2udq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2udqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2udq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2dq %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2dq %ymm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2dq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2dq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtps2pd %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax){1to2}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtps2pd %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtps2pd %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtps2pd %xmm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax){1to4}, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtps2pd %xmm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax){1to4}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtps2pd %xmm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax){1to4}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2udq %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2udq (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2udq (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2udq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2udq %ymm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2udq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2udq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2udq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2udq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2dq %ymm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2dqy (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2dq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2dqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2dq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2dqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2dq %xmm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2dqx (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2dq (%rax){1to2}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2dqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2dq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2dqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2dq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvttps2dq %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2dq (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2dq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvttps2dq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2dq (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2dq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvttps2dq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2dq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2dq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvttps2dq %ymm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2dq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2dq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvttps2dq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2dq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2dq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvttps2dq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2dq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2dq (%rax){1to8}, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2udq %ymm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2udqy (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2udq %ymm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2udqy (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2udq %ymm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2udqy (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2udq %xmm16, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2udqx (%rax), %xmm19 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2udq (%rax){1to2}, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2udqx (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2udq (%rax){1to2}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2udqx (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2udq (%rax){1to2}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvttps2udq %xmm16, %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2udq (%rax), %xmm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2udq (%rax){1to4}, %xmm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvttps2udq %xmm16, %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2udq (%rax), %xmm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2udq (%rax){1to4}, %xmm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvttps2udq %xmm16, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2udq (%rax), %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2udq (%rax){1to4}, %xmm19 {%k1} {z} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvttps2udq %ymm16, %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2udq (%rax), %ymm19 +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2udq (%rax){1to8}, %ymm19 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvttps2udq %ymm16, %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2udq (%rax), %ymm19 {%k1} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2udq (%rax){1to8}, %ymm19 {%k1} +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvttps2udq %ymm16, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2udq (%rax), %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2udq (%rax){1to8}, %ymm19 {%k1} {z} # CHECK-NEXT: - 4.00 1.00 - - - - - - - vdivpd %xmm16, %xmm17, %xmm19 # CHECK-NEXT: - 4.00 1.00 - 0.50 0.50 - - - - vdivpd (%rax), %xmm17, %xmm19 # CHECK-NEXT: - 4.00 1.00 - 0.50 0.50 - - - - vdivpd (%rax){1to2}, %xmm17, %xmm19 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse2.s index aad08ede8c13d..c7b8c4b78da98 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse2.s @@ -431,7 +431,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 5 1.00 cvtpd2pi %xmm0, %mm2 # CHECK-NEXT: 3 11 1.00 * cvtpd2pi (%rax), %mm2 # CHECK-NEXT: 2 5 1.00 cvtpd2ps %xmm0, %xmm2 -# CHECK-NEXT: 3 11 1.00 * cvtpd2ps (%rax), %xmm2 +# CHECK-NEXT: 3 9 1.00 * cvtpd2ps (%rax), %xmm2 # CHECK-NEXT: 1 4 0.50 cvtpi2pd %mm0, %xmm2 # CHECK-NEXT: 2 9 0.50 * cvtpi2pd (%rax), %xmm2 # CHECK-NEXT: 1 4 0.50 cvtps2dq %xmm0, %xmm2 @@ -689,7 +689,7 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 40.00 103.58 82.58 63.50 63.50 14.00 97.58 2.25 5.00 +# CHECK-NEXT: - 40.00 104.25 83.25 63.50 63.50 14.00 96.25 2.25 5.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -716,8 +716,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - cvtpd2dq (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtpd2pi %xmm0, %mm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - cvtpd2pi (%rax), %mm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtpd2ps %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - cvtpd2ps (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtpd2ps %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvtpd2ps (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - cvtpi2pd %mm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtpi2pd (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - cvtps2dq %xmm0, %xmm2 @@ -728,8 +728,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - 1.00 1.00 - - - - - - cvtsd2si %xmm0, %rcx # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - cvtsd2si (%rax), %ecx # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - cvtsd2si (%rax), %rcx -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtsd2ss %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - cvtsd2ss (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - cvtsd2ss %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - cvtsd2ss (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtsi2sd %ecx, %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtsi2sd %rcx, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtsi2sdl (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-clflushopt.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-clflushopt.s index 733aec155ec49..4c16bafb6377d 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-clflushopt.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-clflushopt.s @@ -12,7 +12,7 @@ clflushopt (%rax) # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 8 0.50 * * U clflushopt (%rax) +# CHECK-NEXT: 1 4 0.50 * * U clflushopt (%rax) # CHECK: Resources: # CHECK-NEXT: [0] - ZnAGU0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-clzero.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-clzero.s index 420942130645b..70502433eefc7 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-clzero.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-clzero.s @@ -12,7 +12,7 @@ clzero # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 8 0.50 U clzero +# CHECK-NEXT: 1 4 0.50 U clzero # CHECK: Resources: # CHECK-NEXT: [0] - ZnAGU0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-prefetchw.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-prefetchw.s index 47a52fb06385a..7f5ec3104f09d 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-prefetchw.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-prefetchw.s @@ -13,8 +13,8 @@ prefetchw (%rax) # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 8 0.50 * * prefetch (%rax) -# CHECK-NEXT: 1 8 0.50 * * prefetchw (%rax) +# CHECK-NEXT: 1 4 0.50 * * prefetch (%rax) +# CHECK-NEXT: 1 4 0.50 * * prefetchw (%rax) # CHECK: Resources: # CHECK-NEXT: [0] - ZnAGU0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse1.s index 5616e648f4314..3bf248b044b85 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse1.s @@ -282,10 +282,10 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 pmovmskb %mm0, %ecx # CHECK-NEXT: 1 4 1.00 pmulhuw %mm0, %mm2 # CHECK-NEXT: 1 11 1.00 * pmulhuw (%rax), %mm2 -# CHECK-NEXT: 1 8 0.50 * * prefetcht0 (%rax) -# CHECK-NEXT: 1 8 0.50 * * prefetcht1 (%rax) -# CHECK-NEXT: 1 8 0.50 * * prefetcht2 (%rax) -# CHECK-NEXT: 1 8 0.50 * * prefetchnta (%rax) +# CHECK-NEXT: 1 4 0.50 * * prefetcht0 (%rax) +# CHECK-NEXT: 1 4 0.50 * * prefetcht1 (%rax) +# CHECK-NEXT: 1 4 0.50 * * prefetcht2 (%rax) +# CHECK-NEXT: 1 4 0.50 * * prefetchnta (%rax) # CHECK-NEXT: 1 3 1.00 psadbw %mm0, %mm2 # CHECK-NEXT: 1 10 1.00 * psadbw (%rax), %mm2 # CHECK-NEXT: 1 1 0.50 pshufw $1, %mm0, %mm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse2.s index 1db51b7b65147..c6bfe9a12137b 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse2.s @@ -415,7 +415,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 8 0.50 * andnpd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.25 andpd %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * andpd (%rax), %xmm2 -# CHECK-NEXT: 1 8 0.50 * * U clflush (%rax) +# CHECK-NEXT: 1 4 0.50 * * U clflush (%rax) # CHECK-NEXT: 1 1 0.50 cmpeqpd %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * cmpeqpd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 cmpeqsd %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-x86_64.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-x86_64.s index b190803318a10..90e7553f092a7 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-x86_64.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-x86_64.s @@ -1396,26 +1396,26 @@ xorq (%rax), %rdi # CHECK-NEXT: 1 100 0.25 U movsq (%rsi), %es:(%rdi) # CHECK-NEXT: 1 1 0.25 movsbw %al, %di # CHECK-NEXT: 1 1 0.25 movzbw %al, %di -# CHECK-NEXT: 2 5 0.50 * movsbw (%rax), %di -# CHECK-NEXT: 2 5 0.50 * movzbw (%rax), %di +# CHECK-NEXT: 1 4 0.50 * movsbw (%rax), %di +# CHECK-NEXT: 1 4 0.50 * movzbw (%rax), %di # CHECK-NEXT: 1 1 0.25 movsbl %al, %edi # CHECK-NEXT: 1 1 0.25 movzbl %al, %edi -# CHECK-NEXT: 1 8 0.50 * movsbl (%rax), %edi -# CHECK-NEXT: 1 8 0.50 * movzbl (%rax), %edi +# CHECK-NEXT: 1 4 0.50 * movsbl (%rax), %edi +# CHECK-NEXT: 1 4 0.50 * movzbl (%rax), %edi # CHECK-NEXT: 1 1 0.25 movsbq %al, %rdi # CHECK-NEXT: 1 1 0.25 movzbq %al, %rdi -# CHECK-NEXT: 2 5 0.50 * movsbq (%rax), %rdi -# CHECK-NEXT: 2 5 0.50 * movzbq (%rax), %rdi +# CHECK-NEXT: 1 4 0.50 * movsbq (%rax), %rdi +# CHECK-NEXT: 1 4 0.50 * movzbq (%rax), %rdi # CHECK-NEXT: 1 1 0.25 movswl %ax, %edi # CHECK-NEXT: 1 1 0.25 movzwl %ax, %edi -# CHECK-NEXT: 1 8 0.50 * movswl (%rax), %edi -# CHECK-NEXT: 1 8 0.50 * movzwl (%rax), %edi +# CHECK-NEXT: 1 4 0.50 * movswl (%rax), %edi +# CHECK-NEXT: 1 4 0.50 * movzwl (%rax), %edi # CHECK-NEXT: 1 1 0.25 movswq %ax, %rdi # CHECK-NEXT: 1 1 0.25 movzwq %ax, %rdi -# CHECK-NEXT: 2 5 0.50 * movswq (%rax), %rdi -# CHECK-NEXT: 2 5 0.50 * movzwq (%rax), %rdi +# CHECK-NEXT: 1 4 0.50 * movswq (%rax), %rdi +# CHECK-NEXT: 1 4 0.50 * movzwq (%rax), %rdi # CHECK-NEXT: 1 1 0.25 movslq %eax, %rdi -# CHECK-NEXT: 2 5 0.50 * movslq (%rax), %rdi +# CHECK-NEXT: 1 4 0.50 * movslq (%rax), %rdi # CHECK-NEXT: 1 4 1.00 mulb %dil # CHECK-NEXT: 2 8 1.00 * mulb (%rax) # CHECK-NEXT: 1 3 1.00 mulw %si @@ -1957,7 +1957,7 @@ xorq (%rax), %rdi # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: 233.00 233.00 230.50 264.50 246.50 230.50 392.00 - - - - 34.00 +# CHECK-NEXT: 233.00 233.00 228.75 262.75 244.75 228.75 392.00 - - - - 34.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -2327,26 +2327,26 @@ xorq (%rax), %rdi # CHECK-NEXT: - - - - - - - - - - - - movsq (%rsi), %es:(%rdi) # CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - movsbw %al, %di # CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - movzbw %al, %di -# CHECK-NEXT: 0.50 0.50 0.25 0.25 0.25 0.25 - - - - - - movsbw (%rax), %di -# CHECK-NEXT: 0.50 0.50 0.25 0.25 0.25 0.25 - - - - - - movzbw (%rax), %di +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - movsbw (%rax), %di +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - movzbw (%rax), %di # CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - movsbl %al, %edi # CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - movzbl %al, %edi # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - movsbl (%rax), %edi # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - movzbl (%rax), %edi # CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - movsbq %al, %rdi # CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - movzbq %al, %rdi -# CHECK-NEXT: 0.50 0.50 0.25 0.25 0.25 0.25 - - - - - - movsbq (%rax), %rdi -# CHECK-NEXT: 0.50 0.50 0.25 0.25 0.25 0.25 - - - - - - movzbq (%rax), %rdi +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - movsbq (%rax), %rdi +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - movzbq (%rax), %rdi # CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - movswl %ax, %edi # CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - movzwl %ax, %edi # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - movswl (%rax), %edi # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - movzwl (%rax), %edi # CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - movswq %ax, %rdi # CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - movzwq %ax, %rdi -# CHECK-NEXT: 0.50 0.50 0.25 0.25 0.25 0.25 - - - - - - movswq (%rax), %rdi -# CHECK-NEXT: 0.50 0.50 0.25 0.25 0.25 0.25 - - - - - - movzwq (%rax), %rdi +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - movswq (%rax), %rdi +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - movzwq (%rax), %rdi # CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - movslq %eax, %rdi -# CHECK-NEXT: 0.50 0.50 0.25 0.25 0.25 0.25 - - - - - - movslq (%rax), %rdi +# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - movslq (%rax), %rdi # CHECK-NEXT: - - - 1.00 - - - - - - - 1.00 mulb %dil # CHECK-NEXT: 0.50 0.50 - 1.00 - - - - - - - 1.00 mulb (%rax) # CHECK-NEXT: - - - 1.00 - - - - - - - 1.00 mulw %si diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-x87.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-x87.s index 0c26a40849d62..6cfa018e6dbca 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-x87.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-x87.s @@ -276,8 +276,8 @@ fyl2xp1 # CHECK-NEXT: 1 12 0.50 * U fisttpl (%ecx) # CHECK-NEXT: 1 12 0.50 * U fisttpll (%eax) # CHECK-NEXT: 1 1 0.50 U fld %st(0) -# CHECK-NEXT: 1 8 0.50 * U flds (%edx) -# CHECK-NEXT: 1 8 0.50 * U fldl (%ecx) +# CHECK-NEXT: 1 4 0.50 * U flds (%edx) +# CHECK-NEXT: 1 4 0.50 * U fldl (%ecx) # CHECK-NEXT: 2 1 0.50 * U fldt (%eax) # CHECK-NEXT: 1 100 0.25 * U fldcw (%eax) # CHECK-NEXT: 1 100 0.25 * U fldenv (%eax) diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-clflushopt.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-clflushopt.s index 461c0109254e1..671381f78a953 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-clflushopt.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-clflushopt.s @@ -12,7 +12,7 @@ clflushopt (%rax) # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 8 0.33 * * U clflushopt (%rax) +# CHECK-NEXT: 1 4 0.33 * * U clflushopt (%rax) # CHECK: Resources: # CHECK-NEXT: [0] - Zn2AGU0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-clzero.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-clzero.s index 83de8d3a691d6..12c4f757551b1 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-clzero.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-clzero.s @@ -12,7 +12,7 @@ clzero # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 8 0.33 U clzero +# CHECK-NEXT: 1 4 0.33 U clzero # CHECK: Resources: # CHECK-NEXT: [0] - Zn2AGU0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-prefetchw.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-prefetchw.s index 2e240b3af8e83..b405f4c29e0f0 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-prefetchw.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-prefetchw.s @@ -13,8 +13,8 @@ prefetchw (%rax) # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 8 0.33 * * prefetch (%rax) -# CHECK-NEXT: 1 8 0.33 * * prefetchw (%rax) +# CHECK-NEXT: 1 4 0.33 * * prefetch (%rax) +# CHECK-NEXT: 1 4 0.33 * * prefetchw (%rax) # CHECK: Resources: # CHECK-NEXT: [0] - Zn2AGU0 diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse1.s index 030b6521d628e..64c3ae95ba0d4 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse1.s @@ -282,10 +282,10 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 pmovmskb %mm0, %ecx # CHECK-NEXT: 1 4 1.00 pmulhuw %mm0, %mm2 # CHECK-NEXT: 1 11 1.00 * pmulhuw (%rax), %mm2 -# CHECK-NEXT: 1 8 0.33 * * prefetcht0 (%rax) -# CHECK-NEXT: 1 8 0.33 * * prefetcht1 (%rax) -# CHECK-NEXT: 1 8 0.33 * * prefetcht2 (%rax) -# CHECK-NEXT: 1 8 0.33 * * prefetchnta (%rax) +# CHECK-NEXT: 1 4 0.33 * * prefetcht0 (%rax) +# CHECK-NEXT: 1 4 0.33 * * prefetcht1 (%rax) +# CHECK-NEXT: 1 4 0.33 * * prefetcht2 (%rax) +# CHECK-NEXT: 1 4 0.33 * * prefetchnta (%rax) # CHECK-NEXT: 1 3 1.00 psadbw %mm0, %mm2 # CHECK-NEXT: 1 10 1.00 * psadbw (%rax), %mm2 # CHECK-NEXT: 1 1 0.50 pshufw $1, %mm0, %mm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse2.s index a0535cfa0d02d..9a465802f8b17 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse2.s @@ -415,7 +415,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 8 0.33 * andnpd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.25 andpd %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.33 * andpd (%rax), %xmm2 -# CHECK-NEXT: 1 8 0.33 * * U clflush (%rax) +# CHECK-NEXT: 1 4 0.33 * * U clflush (%rax) # CHECK-NEXT: 1 1 0.50 cmpeqpd %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * cmpeqpd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 cmpeqsd %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-x86_64.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-x86_64.s index 7ccf6efb43e36..2bef39cec5598 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-x86_64.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-x86_64.s @@ -1198,26 +1198,26 @@ xorq (%rax), %rdi # CHECK-NEXT: 1 100 0.25 U movsq (%rsi), %es:(%rdi) # CHECK-NEXT: 1 1 0.25 movsbw %al, %di # CHECK-NEXT: 1 1 0.25 movzbw %al, %di -# CHECK-NEXT: 2 5 0.33 * movsbw (%rax), %di -# CHECK-NEXT: 2 5 0.33 * movzbw (%rax), %di +# CHECK-NEXT: 1 4 0.33 * movsbw (%rax), %di +# CHECK-NEXT: 1 4 0.33 * movzbw (%rax), %di # CHECK-NEXT: 1 1 0.25 movsbl %al, %edi # CHECK-NEXT: 1 1 0.25 movzbl %al, %edi -# CHECK-NEXT: 1 8 0.33 * movsbl (%rax), %edi -# CHECK-NEXT: 1 8 0.33 * movzbl (%rax), %edi +# CHECK-NEXT: 1 4 0.33 * movsbl (%rax), %edi +# CHECK-NEXT: 1 4 0.33 * movzbl (%rax), %edi # CHECK-NEXT: 1 1 0.25 movsbq %al, %rdi # CHECK-NEXT: 1 1 0.25 movzbq %al, %rdi -# CHECK-NEXT: 2 5 0.33 * movsbq (%rax), %rdi -# CHECK-NEXT: 2 5 0.33 * movzbq (%rax), %rdi +# CHECK-NEXT: 1 4 0.33 * movsbq (%rax), %rdi +# CHECK-NEXT: 1 4 0.33 * movzbq (%rax), %rdi # CHECK-NEXT: 1 1 0.25 movswl %ax, %edi # CHECK-NEXT: 1 1 0.25 movzwl %ax, %edi -# CHECK-NEXT: 1 8 0.33 * movswl (%rax), %edi -# CHECK-NEXT: 1 8 0.33 * movzwl (%rax), %edi +# CHECK-NEXT: 1 4 0.33 * movswl (%rax), %edi +# CHECK-NEXT: 1 4 0.33 * movzwl (%rax), %edi # CHECK-NEXT: 1 1 0.25 movswq %ax, %rdi # CHECK-NEXT: 1 1 0.25 movzwq %ax, %rdi -# CHECK-NEXT: 2 5 0.33 * movswq (%rax), %rdi -# CHECK-NEXT: 2 5 0.33 * movzwq (%rax), %rdi +# CHECK-NEXT: 1 4 0.33 * movswq (%rax), %rdi +# CHECK-NEXT: 1 4 0.33 * movzwq (%rax), %rdi # CHECK-NEXT: 1 1 0.25 movslq %eax, %rdi -# CHECK-NEXT: 2 5 0.33 * movslq (%rax), %rdi +# CHECK-NEXT: 1 4 0.33 * movslq (%rax), %rdi # CHECK-NEXT: 1 4 1.00 mulb %dil # CHECK-NEXT: 2 8 1.00 * mulb (%rax) # CHECK-NEXT: 1 3 1.00 mulw %si @@ -1696,7 +1696,7 @@ xorq (%rax), %rdi # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 116.00 116.00 116.00 197.00 231.00 213.00 197.00 392.00 - - - - 34.00 +# CHECK-NEXT: 116.00 116.00 116.00 195.25 229.25 211.25 195.25 392.00 - - - - 34.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: @@ -1999,26 +1999,26 @@ xorq (%rax), %rdi # CHECK-NEXT: - - - - - - - - - - - - - movsq (%rsi), %es:(%rdi) # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - movsbw %al, %di # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - movzbw %al, %di -# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - movsbw (%rax), %di -# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - movzbw (%rax), %di +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - movsbw (%rax), %di +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - movzbw (%rax), %di # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - movsbl %al, %edi # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - movzbl %al, %edi # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - movsbl (%rax), %edi # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - movzbl (%rax), %edi # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - movsbq %al, %rdi # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - movzbq %al, %rdi -# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - movsbq (%rax), %rdi -# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - movzbq (%rax), %rdi +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - movsbq (%rax), %rdi +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - movzbq (%rax), %rdi # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - movswl %ax, %edi # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - movzwl %ax, %edi # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - movswl (%rax), %edi # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - movzwl (%rax), %edi # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - movswq %ax, %rdi # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - movzwq %ax, %rdi -# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - movswq (%rax), %rdi -# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - movzwq (%rax), %rdi +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - movswq (%rax), %rdi +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - movzwq (%rax), %rdi # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - movslq %eax, %rdi -# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - movslq (%rax), %rdi +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - movslq (%rax), %rdi # CHECK-NEXT: - - - - 1.00 - - - - - - - 1.00 mulb %dil # CHECK-NEXT: 0.33 0.33 0.33 - 1.00 - - - - - - - 1.00 mulb (%rax) # CHECK-NEXT: - - - - 1.00 - - - - - - - 1.00 mulw %si diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-x87.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-x87.s index be542ecb2debc..1987176040002 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-x87.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-x87.s @@ -276,8 +276,8 @@ fyl2xp1 # CHECK-NEXT: 1 12 0.50 * U fisttpl (%ecx) # CHECK-NEXT: 1 12 0.50 * U fisttpll (%eax) # CHECK-NEXT: 1 1 0.50 U fld %st(0) -# CHECK-NEXT: 1 8 0.33 * U flds (%edx) -# CHECK-NEXT: 1 8 0.33 * U fldl (%ecx) +# CHECK-NEXT: 1 4 0.33 * U flds (%edx) +# CHECK-NEXT: 1 4 0.33 * U fldl (%ecx) # CHECK-NEXT: 2 1 0.50 * U fldt (%eax) # CHECK-NEXT: 1 100 0.25 * U fldcw (%eax) # CHECK-NEXT: 1 100 0.25 * U fldenv (%eax) diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-x86_64.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-x86_64.s index 0783c5decbb88..9a201b0219784 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-x86_64.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-x86_64.s @@ -1706,7 +1706,7 @@ xorq (%rax), %rdi # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] -# CHECK-NEXT: 176.00 176.00 176.00 1669.50 1824.50 1704.50 1467.50 - - - - - - - - 175.33 175.33 175.33 109.00 109.00 109.00 99.50 99.50 +# CHECK-NEXT: 176.00 176.00 176.00 1667.25 1822.25 1702.25 1465.25 - - - - - - - - 175.33 175.33 175.33 109.00 109.00 109.00 99.50 99.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions: @@ -2013,22 +2013,22 @@ xorq (%rax), %rdi # CHECK-NEXT: 1.00 1.00 1.00 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movzbw (%rax), %di # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - movsbl %al, %edi # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - movzbl %al, %edi -# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movsbl (%rax), %edi -# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movzbl (%rax), %edi +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movsbl (%rax), %edi +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movzbl (%rax), %edi # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - movsbq %al, %rdi # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - movzbq %al, %rdi -# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movsbq (%rax), %rdi -# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movzbq (%rax), %rdi +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movsbq (%rax), %rdi +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movzbq (%rax), %rdi # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - movswl %ax, %edi # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - movzwl %ax, %edi -# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movswl (%rax), %edi -# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movzwl (%rax), %edi +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movswl (%rax), %edi +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movzwl (%rax), %edi # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - movswq %ax, %rdi # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - movzwq %ax, %rdi -# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movswq (%rax), %rdi -# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movzwq (%rax), %rdi +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movswq (%rax), %rdi +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movzwq (%rax), %rdi # CHECK-NEXT: - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - - - - - - movslq %eax, %rdi -# CHECK-NEXT: 0.33 0.33 0.33 0.25 0.25 0.25 0.25 - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movslq (%rax), %rdi +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - movslq (%rax), %rdi # CHECK-NEXT: - - - - 3.00 - - - - - - - - - - - - - - - - - - mulb %dil # CHECK-NEXT: 0.33 0.33 0.33 - 3.00 - - - - - - - - - - 0.33 0.33 0.33 0.33 0.33 0.33 - - mulb (%rax) # CHECK-NEXT: - - - - 3.00 - - - - - - - - - - - - - - - - - - mulw %si diff --git a/llvm/test/tools/llvm-objdump/Offloading/coff.test b/llvm/test/tools/llvm-objdump/Offloading/coff.test new file mode 100644 index 0000000000000..022277d137bd4 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/Offloading/coff.test @@ -0,0 +1,42 @@ +# RUN: yaml2obj %S/Inputs/binary.yaml -o %t.bin +# RUN: yaml2obj %s -o %t.coff +# RUN: llvm-objcopy --add-section .llvm.offloading=%t.bin %t.coff +# RUN: llvm-objdump --offloading %t.coff | FileCheck %s --match-full-lines --strict-whitespace --implicit-check-not={{.}} + +--- !COFF +header: + Machine: IMAGE_FILE_MACHINE_AMD64 + Characteristics: [] +sections: + - Name: .rdata + Characteristics: [] + - Name: .llvm.offloading + Characteristics: [ IMAGE_SCN_LNK_REMOVE, IMAGE_SCN_MEM_DISCARDABLE ] + Alignment: 8 +symbols: + +# CHECK:{{.*}}file format coff-x86-64 +# CHECK-EMPTY: +# CHECK-NEXT:OFFLOADING IMAGE [0]: +# CHECK-NEXT:kind llvm ir +# CHECK-NEXT:arch gfx908 +# CHECK-NEXT:triple amdgcn-amd-amdhsa +# CHECK-NEXT:producer openmp +# CHECK-EMPTY: +# CHECK-NEXT:OFFLOADING IMAGE [1]: +# CHECK-NEXT:kind llvm ir +# CHECK-NEXT:arch gfx90a +# CHECK-NEXT:triple amdgcn-amd-amdhsa +# CHECK-NEXT:producer openmp +# CHECK-EMPTY: +# CHECK-NEXT:OFFLOADING IMAGE [2]: +# CHECK-NEXT:kind cubin +# CHECK-NEXT:arch sm_52 +# CHECK-NEXT:triple nvptx64-nvidia-cuda +# CHECK-NEXT:producer openmp +# CHECK-EMPTY: +# CHECK-NEXT:OFFLOADING IMAGE [3]: +# CHECK-NEXT:kind +# CHECK-NEXT:arch sm_70 +# CHECK-NEXT:triple nvptx64-nvidia-cuda +# CHECK-NEXT:producer none diff --git a/llvm/test/tools/llvm-objdump/Offloading/content-failure.test b/llvm/test/tools/llvm-objdump/Offloading/content-failure.test index 5089edae04502..40ff6785f2d38 100644 --- a/llvm/test/tools/llvm-objdump/Offloading/content-failure.test +++ b/llvm/test/tools/llvm-objdump/Offloading/content-failure.test @@ -15,4 +15,4 @@ Sections: ShOffset: 0x99999 AddressAlign: 0x0000000000000008 -# CHECK: error: '[[FILENAME]]': The end of the file was unexpectedly encountered +# CHECK: error: '[[FILENAME]]': while extracting offloading files: The end of the file was unexpectedly encountered diff --git a/llvm/test/tools/llvm-objdump/Offloading/binary.test b/llvm/test/tools/llvm-objdump/Offloading/elf.test similarity index 67% rename from llvm/test/tools/llvm-objdump/Offloading/binary.test rename to llvm/test/tools/llvm-objdump/Offloading/elf.test index 880bab2ec5337..10182aeb856cd 100644 --- a/llvm/test/tools/llvm-objdump/Offloading/binary.test +++ b/llvm/test/tools/llvm-objdump/Offloading/elf.test @@ -3,15 +3,21 @@ # RUN: llvm-objdump --offloading %t.bin | FileCheck %s --match-full-lines --strict-whitespace --implicit-check-not={{.}} ## Check that we can dump an offloading binary inside of an ELF section. -# RUN: yaml2obj %s -o %t.elf -# RUN: llvm-objcopy --update-section .llvm.offloading=%t.bin %t.elf -# RUN: llvm-objdump --offloading %t.elf | FileCheck %s --check-prefixes=CHECK,ELF --match-full-lines --strict-whitespace --implicit-check-not={{.}} +# RUN: yaml2obj %s -o %t -DTYPE=ET_EXEC +# RUN: yaml2obj %s -o %t.so -DTYPE=ET_DYN +# RUN: yaml2obj %s -o %t.o -DTYPE=ET_REL +# RUN: llvm-objcopy --update-section .llvm.offloading=%t.bin %t +# RUN: llvm-objcopy --update-section .llvm.offloading=%t.bin %t.so +# RUN: llvm-objcopy --update-section .llvm.offloading=%t.bin %t.o +# RUN: llvm-objdump --offloading %t | FileCheck %s --check-prefixes=CHECK,ELF --match-full-lines --strict-whitespace --implicit-check-not={{.}} +# RUN: llvm-objdump --offloading %t.so | FileCheck %s --check-prefixes=CHECK,ELF --match-full-lines --strict-whitespace --implicit-check-not={{.}} +# RUN: llvm-objdump --offloading %t.o | FileCheck %s --check-prefixes=CHECK,ELF --match-full-lines --strict-whitespace --implicit-check-not={{.}} !ELF FileHeader: Class: ELFCLASS64 Data: ELFDATA2LSB - Type: ET_EXEC + Type: [[TYPE]] Sections: - Name: .llvm.offloading Type: SHT_LLVM_OFFLOADING diff --git a/llvm/test/tools/llvm-objdump/Offloading/non-elf.test b/llvm/test/tools/llvm-objdump/Offloading/non-elf.test index 955556f0567b5..a2eb377bb35d1 100644 --- a/llvm/test/tools/llvm-objdump/Offloading/non-elf.test +++ b/llvm/test/tools/llvm-objdump/Offloading/non-elf.test @@ -1,14 +1,14 @@ # RUN: yaml2obj %s -o %t # RUN: llvm-objdump --offloading %t 2>&1 | FileCheck -DFILENAME=%t %s ---- !COFF -header: - Machine: IMAGE_FILE_MACHINE_AMD64 - Characteristics: [] -sections: - - Name: .rdata - Characteristics: [] - SectionData: 00 -symbols: +--- !mach-o +FileHeader: + magic: 0xFEEDFACE + cputype: 0x00000007 + cpusubtype: 0x00000003 + filetype: 0x00000001 + ncmds: 0 + sizeofcmds: 0 + flags: 0x00002000 -# CHECK: warning: '[[FILENAME]]': --offloading is currently only supported for ELF targets +# CHECK: warning: '[[FILENAME]]': --offloading is currently only supported for COFF and ELF targets diff --git a/llvm/test/tools/llvm-objdump/Offloading/warning.test b/llvm/test/tools/llvm-objdump/Offloading/warning.test deleted file mode 100644 index a4be54ebf6dc3..0000000000000 --- a/llvm/test/tools/llvm-objdump/Offloading/warning.test +++ /dev/null @@ -1,21 +0,0 @@ -## Ensure we give a warning on bad input following good input. -# RUN: yaml2obj %S/Inputs/binary.yaml -o %t-good.bin -# RUN: yaml2obj %S/Inputs/malformed.yaml -o %t-bad.bin -# RUN: cat %t-bad.bin >> %t-good.bin -# RUN: yaml2obj %s -o %t.elf -# RUN: llvm-objcopy --update-section .llvm.offloading=%t-good.bin %t.elf -# RUN: llvm-objdump --offloading %t.elf 2>&1 | FileCheck %s -DFILENAME=%t.elf - -!ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_EXEC -Sections: - - Name: .llvm.offloading - Type: SHT_LLVM_OFFLOADING - Flags: [ SHF_EXCLUDE ] - AddressAlign: 0x0000000000000008 - -# CHECK: OFFLOADING IMAGE [0]: -# CHECK: warning: '[[FILENAME]]': while parsing offloading files: The end of the file was unexpectedly encountered diff --git a/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test b/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test index 1d11a85b47883..c673028584c0d 100644 --- a/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test +++ b/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test @@ -91,7 +91,7 @@ ; CHECK: 1: 4 ; CHECK: 2: 3 ; CHECK: 3: 1 -; CEHCK: 5: 4 fb:4 +; CHECK: 5: 4 fb:4 ; CHECK: 6: 1 fa:1 ; CHECK !CFGChecksum: 563022570642068 ; CHECK: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa]:6:2 diff --git a/llvm/test/tools/llvm-readobj/XCOFF/symbols-invalid.test b/llvm/test/tools/llvm-readobj/XCOFF/symbols-invalid.test index e1b1aac29b4ed..3db8803149242 100644 --- a/llvm/test/tools/llvm-readobj/XCOFF/symbols-invalid.test +++ b/llvm/test/tools/llvm-readobj/XCOFF/symbols-invalid.test @@ -1,10 +1,5 @@ ## Test that we report warnings or dump raw data when symbols are invalid. -# RUN: yaml2obj %s --docnum=1 -o %t1 -# RUN: llvm-readobj --syms %t1 2>&1 | FileCheck %s -DFILE=%t1 --check-prefix=CASE1 - -# CASE1: warning: '[[FILE]]': the non-function C_EXT symbol at index 1 should have only 1 auxiliary entry, i.e. the CSECT auxiliary entry - --- !XCOFF FileHeader: MagicNumber: 0x1DF @@ -14,55 +9,45 @@ Symbols: StorageClass: [[STORAGECLASS='C_EXT']] NumberOfAuxEntries: 2 -# RUN: yaml2obj %s --docnum=1 -DSTORAGECLASS='C_WEAKEXT' -o %t2 +# RUN: yaml2obj %s --docnum=1 -DSTORAGECLASS='C_STAT' -o %t1 +# RUN: llvm-readobj --syms %t1 2>&1 | FileCheck %s -DFILE=%t1 --check-prefix=CASE1 + +# CASE1: warning: '[[FILE]]': the C_STAT symbol at index 1 should not have more than 1 auxiliary entry + +# RUN: yaml2obj %s --docnum=1 -DSTORAGECLASS='C_DWARF' -o %t2 # RUN: llvm-readobj --syms %t2 2>&1 | FileCheck %s -DFILE=%t2 --check-prefix=CASE2 -# CASE2: warning: '[[FILE]]': the non-function C_WEAKEXT symbol at index 1 should have only 1 auxiliary entry, i.e. the CSECT auxiliary entry +# CASE2: warning: '[[FILE]]': the C_DWARF symbol at index 1 should not have more than 1 auxiliary entry -# RUN: yaml2obj %s --docnum=1 -DSTORAGECLASS='C_HIDEXT' -o %t3 +# RUN: yaml2obj %s --docnum=1 -DSTORAGECLASS='C_BLOCK' -o %t3 # RUN: llvm-readobj --syms %t3 2>&1 | FileCheck %s -DFILE=%t3 --check-prefix=CASE3 -# CASE3: warning: '[[FILE]]': the non-function C_HIDEXT symbol at index 1 should have only 1 auxiliary entry, i.e. the CSECT auxiliary entry +# CASE3: warning: '[[FILE]]': the C_BLOCK symbol at index 1 should not have more than 1 auxiliary entry -# RUN: yaml2obj %s --docnum=1 -DSTORAGECLASS='C_STAT' -o %t4 +# RUN: yaml2obj %s --docnum=1 -DSTORAGECLASS='C_FCN' -o %t4 # RUN: llvm-readobj --syms %t4 2>&1 | FileCheck %s -DFILE=%t4 --check-prefix=CASE4 -# CASE4: warning: '[[FILE]]': the C_STAT symbol at index 1 should not have more than 1 auxiliary entry - -# RUN: yaml2obj %s --docnum=1 -DSTORAGECLASS='C_DWARF' -o %t5 -# RUN: llvm-readobj --syms %t5 2>&1 | FileCheck %s -DFILE=%t5 --check-prefix=CASE5 - -# CASE5: warning: '[[FILE]]': the C_DWARF symbol at index 1 should not have more than 1 auxiliary entry - -# RUN: yaml2obj %s --docnum=1 -DSTORAGECLASS='C_BLOCK' -o %t6 -# RUN: llvm-readobj --syms %t6 2>&1 | FileCheck %s -DFILE=%t6 --check-prefix=CASE6 - -# CASE6: warning: '[[FILE]]': the C_BLOCK symbol at index 1 should not have more than 1 auxiliary entry - -# RUN: yaml2obj %s --docnum=1 -DSTORAGECLASS='C_FCN' -o %t7 -# RUN: llvm-readobj --syms %t7 2>&1 | FileCheck %s -DFILE=%t7 --check-prefix=CASE7 - -# CASE7: warning: '[[FILE]]': the C_FCN symbol at index 1 should not have more than 1 auxiliary entry +# CASE4: warning: '[[FILE]]': the C_FCN symbol at index 1 should not have more than 1 auxiliary entry ## This case tests the raw data output ability when a file auxiliary entry does ## not have the matching auxiliary type. -# RUN: yaml2obj %s --docnum=2 -o %t8 -# RUN: llvm-readobj --syms %t8 | FileCheck %s --strict-whitespace --match-full-lines --check-prefix=CASE8 - -# CASE8:Symbols [ -# CASE8-NEXT: Symbol { -# CASE8-NEXT: Index: 0 -# CASE8-NEXT: Name: .fun -# CASE8-NEXT: Value (SymbolTableIndex): 0x0 -# CASE8-NEXT: Section: N_UNDEF -# CASE8-NEXT: Source Language ID: TB_C (0x0) -# CASE8-NEXT: CPU Version ID: 0x0 -# CASE8-NEXT: StorageClass: C_FILE (0x67) -# CASE8-NEXT: NumberOfAuxEntries: 1 -# CASE8-NEXT: !Unexpected raw auxiliary entry data: -# CASE8-NEXT: 00000000 00000001 00020300 00000000 00fb -# CASE8-NEXT: } -# CASE8-NEXT:] +# RUN: yaml2obj %s --docnum=2 -o %t5 +# RUN: llvm-readobj --syms %t5 | FileCheck %s --strict-whitespace --match-full-lines --check-prefix=CASE5 + +# CASE5:Symbols [ +# CASE5-NEXT: Symbol { +# CASE5-NEXT: Index: 0 +# CASE5-NEXT: Name: .fun +# CASE5-NEXT: Value (SymbolTableIndex): 0x0 +# CASE5-NEXT: Section: N_UNDEF +# CASE5-NEXT: Source Language ID: TB_C (0x0) +# CASE5-NEXT: CPU Version ID: 0x0 +# CASE5-NEXT: StorageClass: C_FILE (0x67) +# CASE5-NEXT: NumberOfAuxEntries: 1 +# CASE5-NEXT: !Unexpected raw auxiliary entry data: +# CASE5-NEXT: 00000000 00000001 00020300 00000000 00fb +# CASE5-NEXT: } +# CASE5-NEXT:] --- !XCOFF FileHeader: diff --git a/llvm/test/tools/llvm-readobj/XCOFF/symbols.test b/llvm/test/tools/llvm-readobj/XCOFF/symbols.test index f72144c6f1a06..72ec8967cc957 100644 --- a/llvm/test/tools/llvm-readobj/XCOFF/symbols.test +++ b/llvm/test/tools/llvm-readobj/XCOFF/symbols.test @@ -134,6 +134,33 @@ Symbols: LineNumHi: 2 LineNumLo: 3 +##The C_WEAKEXT symbol with two Function auxiliary entries and a CSECT auxiliary entry. + - Name: .fun7 + Value: 0x0 + Section: .text + Type: 0x00 + StorageClass: C_WEAKEXT + NumberOfAuxEntries: 3 + AuxEntries: + - Type: AUX_FCN + OffsetToExceptionTbl: 2 + SizeOfFunction: 3 + SymIdxOfNextBeyond: 4 + PtrToLineNum: 5 + - Type: AUX_FCN + OffsetToExceptionTbl: 8 + SizeOfFunction: 5 + SymIdxOfNextBeyond: 8 + PtrToLineNum: 5 + - Type: AUX_CSECT + ParameterHashIndex: 11 + TypeChkSectNum: 22 + SymbolAlignmentAndType: 33 + StorageMappingClass: XMC_PR + SectionOrLength: 256 + StabInfoIndex: 44 + StabSectNum: 55 + # SYMBOL32: Symbols [ # SYMBOL32-NEXT: Symbol { # SYMBOL32-NEXT: Index: 0 @@ -304,4 +331,38 @@ Symbols: # SYMBOL32-NEXT: LineNumber (Low 2 Bytes): 0x3 # SYMBOL32-NEXT: } # SYMBOL32-NEXT: } +# SYMBOL32-NEXT: Symbol { +# SYMBOL32-NEXT: Index: 21 +# SYMBOL32-NEXT: Name: .fun7 +# SYMBOL32-NEXT: Value (RelocatableAddress): 0x0 +# SYMBOL32-NEXT: Section: .text +# SYMBOL32-NEXT: Type: 0x0 +# SYMBOL32-NEXT: StorageClass: C_WEAKEXT (0x6F) +# SYMBOL32-NEXT: NumberOfAuxEntries: 3 +# SYMBOL32-NEXT: Function Auxiliary Entry { +# SYMBOL32-NEXT: Index: 22 +# SYMBOL32-NEXT: OffsetToExceptionTable: 0x2 +# SYMBOL32-NEXT: SizeOfFunction: 0x3 +# SYMBOL32-NEXT: PointerToLineNum: 0x5 +# SYMBOL32-NEXT: SymbolIndexOfNextBeyond: 4 +# SYMBOL32-NEXT: } +# SYMBOL32-NEXT: Function Auxiliary Entry { +# SYMBOL32-NEXT: Index: 23 +# SYMBOL32-NEXT: OffsetToExceptionTable: 0x8 +# SYMBOL32-NEXT: SizeOfFunction: 0x5 +# SYMBOL32-NEXT: PointerToLineNum: 0x5 +# SYMBOL32-NEXT: SymbolIndexOfNextBeyond: 8 +# SYMBOL32-NEXT: } +# SYMBOL32-NEXT: CSECT Auxiliary Entry { +# SYMBOL32-NEXT: Index: 24 +# SYMBOL32-NEXT: SectionLen: 256 +# SYMBOL32-NEXT: ParameterHashIndex: 0xB +# SYMBOL32-NEXT: TypeChkSectNum: 0x16 +# SYMBOL32-NEXT: SymbolAlignmentLog2: 4 +# SYMBOL32-NEXT: SymbolType: XTY_SD (0x1) +# SYMBOL32-NEXT: StorageMappingClass: XMC_PR (0x0) +# SYMBOL32-NEXT: StabInfoIndex: 0x2C +# SYMBOL32-NEXT: StabSectNum: 0x37 +# SYMBOL32-NEXT: } +# SYMBOL32-NEXT: } # SYMBOL32-NEXT: ] diff --git a/llvm/test/tools/llvm-reduce/file-output-type.test b/llvm/test/tools/llvm-reduce/file-output-type.test index 5535866e7251d..bbfbeb2fc158a 100644 --- a/llvm/test/tools/llvm-reduce/file-output-type.test +++ b/llvm/test/tools/llvm-reduce/file-output-type.test @@ -1,3 +1,4 @@ +# REQUIRES: x86-registered-target # RUN: rm -f reduced.ll reduced.bc # RUN: llvm-as -o test-output-format.bc %p/Inputs/test-output-format.ll @@ -40,7 +41,7 @@ # Make sure MIR ignores requests for bitcode -# RUN: llvm-reduce -output-bitcode --delta-passes=instructions -o %t.3 --test FileCheck --test-arg --check-prefix=MIR --test-arg %s --test-arg --input-file %p/Inputs/test-output-format.mir +# RUN: llvm-reduce -mtriple=x86_64-- -output-bitcode --delta-passes=instructions -o %t.3 --test FileCheck --test-arg --check-prefix=MIR --test-arg %s --test-arg --input-file %p/Inputs/test-output-format.mir # RUN: llc -x mir -run-pass=none -o /dev/null %t.3 diff --git a/llvm/test/tools/llvm-reduce/reduce-opcodes-call-typed-pointers.ll b/llvm/test/tools/llvm-reduce/reduce-opcodes-call-typed-pointers.ll new file mode 100644 index 0000000000000..f944b1d763b02 --- /dev/null +++ b/llvm/test/tools/llvm-reduce/reduce-opcodes-call-typed-pointers.ll @@ -0,0 +1,87 @@ +; RUN: llvm-reduce --abort-on-invalid-reduction --delta-passes=opcodes --test FileCheck --test-arg --check-prefix=ALL --test-arg %s --test-arg --input-file %s -o %t +; RUN: FileCheck -check-prefixes=RESULT,ALL %s < %t + +target datalayout = "A5" + +; ALL-LABEL: @call_void_no_args( +; RESULT-NEXT: store volatile i32 0, i32 addrspace(5)* null, align 4 +; RESULT-NEXT: ret void +define void @call_void_no_args() { + call void @void_no_args() + ret void +} + +; ALL-LABEL: @call_load_like_i32( +; RESULT-NEXT: %op = load volatile i32, i32 addrspace(1)* %ptr, align 4 +; RESULT-NEXT: ret i32 %op +define i32 @call_load_like_i32(i32 addrspace(1)* %ptr) { + %op = call i32 @load_like_i32(i32 addrspace(1)* %ptr) + ret i32 %op +} + +; ALL-LABEL: @call_load_like_ptr_ptr( +; RESULT-NEXT: %op = load volatile i32 addrspace(1)*, i32 addrspace(1)* addrspace(3)* %ptr, align 8 +; RESULT-NEXT: ret i32 addrspace(1)* %op +define i32 addrspace(1)* @call_load_like_ptr_ptr(i32 addrspace(1)* addrspace(3)* %ptr) { + %op = call i32 addrspace(1)* @load_like_ptr_ptr(i32 addrspace(1)* addrspace(3)* %ptr) + ret i32 addrspace(1)* %op +} + +; ALL-LABEL: @call_store_like_i16( +; RESULT-NEXT: store volatile i16 %val, i16 addrspace(1)* %ptr, align 2 +; RESULT-NEXT: ret void +define void @call_store_like_i16(i16 %val, i16 addrspace(1)* %ptr) { + call void @store_like_i16(i16 %val, i16 addrspace(1)* %ptr) + ret void +} + +; ALL-LABEL: @call_load_like_ptr_mismatch( +; RESULT-NEXT: %op = call i32 @load_like_ptr_mismatch(i16 addrspace(1)* %ptr) +; RESULT-NEXT: ret i32 %op +define i32 @call_load_like_ptr_mismatch(i16 addrspace(1)* %ptr) { + %op = call i32 @load_like_ptr_mismatch(i16 addrspace(1)* %ptr) + ret i32 %op +} + +; ALL-LABEL: @call_store_like_ptr_store( +; RESULT-NEXT: call +; RESULT-NEXT: ret void +define void @call_store_like_ptr_store(i32 addrspace(3)* %ptr.val, i32 addrspace(1)* %ptr) { + call void @store_like_ptr_store(i32 addrspace(3)* %ptr.val, i32 addrspace(1)* %ptr) + ret void +} + + +; ALL-LABEL: @call_store_like_ptr_store_swap( +; RESULT-NEXT: call +; RESULT-NEXT: ret void +define void @call_store_like_ptr_store_swap(i32 addrspace(1)* %ptr, i32 addrspace(3)* %ptr.val) { + call void @store_like_ptr_store_swap(i32 addrspace(1)* %ptr, i32 addrspace(3)* %ptr.val) + ret void +} + +; ALL-LABEL: @call_store_like_ptr_store_different_element_type( +; RESULT-NEXT: call +; RESULT-NEXT: ret void +define void @call_store_like_ptr_store_different_element_type(i32 addrspace(3)* %ptr.val, i16 addrspace(1)* %ptr) { + call void @store_like_ptr_store_different_element_type(i32 addrspace(3)* %ptr.val, i16 addrspace(1)* %ptr) + ret void +} + +; ALL-LABEL: @call_store_like_ptr_store_different_element_type_swap( +; RESULT-NEXT: call +; RESULT-NEXT: ret void +define void @call_store_like_ptr_store_different_element_type_swap(i16 addrspace(1)* %ptr, i32 addrspace(3)* %ptr.val) { + call void @store_like_ptr_store_different_element_type_swap(i16 addrspace(1)* %ptr, i32 addrspace(3)* %ptr.val) + ret void +} + +declare void @void_no_args() +declare i32 addrspace(1)* @load_like_ptr_ptr(i32 addrspace(1)* addrspace(3)*) +declare i32 @load_like_i32(i32 addrspace(1)*) +declare void @store_like_i16(i16 %val, i16 addrspace(1)* %ptr) +declare i32 @load_like_ptr_mismatch(i16 addrspace(1)*) +declare void @store_like_ptr_store(i32 addrspace(3)* %ptr.val, i32 addrspace(1)* %ptr) +declare void @store_like_ptr_store_swap(i32 addrspace(1)* %ptr, i32 addrspace(3)* %ptr.val) +declare void @store_like_ptr_store_different_element_type(i32 addrspace(3)* %ptr.val, i16 addrspace(1)* %ptr) +declare void @store_like_ptr_store_different_element_type_swap(i16 addrspace(1)*, i32 addrspace(3)*) diff --git a/llvm/test/tools/llvm-reduce/reduce-opcodes-call.ll b/llvm/test/tools/llvm-reduce/reduce-opcodes-call.ll new file mode 100644 index 0000000000000..c63ba522abef8 --- /dev/null +++ b/llvm/test/tools/llvm-reduce/reduce-opcodes-call.ll @@ -0,0 +1,335 @@ +; RUN: llvm-reduce --abort-on-invalid-reduction --delta-passes=opcodes --test FileCheck --test-arg --check-prefix=ALL --test-arg %s --test-arg --input-file %s -o %t +; RUN: FileCheck -check-prefixes=RESULT,ALL %s < %t + +target datalayout = "A5" + +declare token @llvm.return.token() +declare void @llvm.uses.token(token) + +; ALL-LABEL: @call_token( +; RESULT-NEXT: %token = call token @llvm.return.token() +; RESULT-NEXT: call void @llvm.uses.token(token %token) +; RESULT-NEXT: ret void +define void @call_token() { + %token = call token @llvm.return.token() + call void @llvm.uses.token(token %token) + ret void +} + +; ALL-LABEL: @call_void_0_size_arg( +; RESULT-NEXT: store volatile {} %arg, ptr addrspace(5) null, align 1 +; RESULT-NEXT: ret void +define void @call_void_0_size_arg({} %arg) { + call void @void_0_size_arg({} %arg) + ret void +} + +; ALL-LABEL: @call_return_0_size( +; RESULT-NEXT: %op = load volatile {}, ptr %ptr, align 1 +; RESULT-NEXT: ret {} %op +define {} @call_return_0_size(ptr %ptr) { + %op = call {} @return_0_size(ptr %ptr) + ret {} %op +} + +; ALL-LABEL: define void @call_void_no_args( +; RESULT-NEXT: store volatile i32 0, ptr addrspace(5) null, align 4 +; RESULT-NEXT: ret void +define void @call_void_no_args() { + call void @void_no_args() + ret void +} + +; ALL-LABEL: @call_store_like_i16( +; RESULT-NEXT: store volatile i16 %val, ptr addrspace(1) %ptr, align 2 +; RESULT-NEXT: ret void +define void @call_store_like_i16(i16 %val, ptr addrspace(1) %ptr) { + call void @store_like_i16(i16 %val, ptr addrspace(1) %ptr) + ret void +} + +; ALL-LABEL: @keep_call_store_like_i16( +; ALL-NEXT: call void @store_like_i16(i16 %val, ptr addrspace(1) %ptr) +; ALL-NEXT: ret void +define void @keep_call_store_like_i16(i16 %val, ptr addrspace(1) %ptr) { + call void @store_like_i16(i16 %val, ptr addrspace(1) %ptr) + ret void +} + +; ALL-LABEL: @call_store_like_i16_swap( +; RESULT-NEXT: store volatile i16 %val, ptr addrspace(1) %ptr +; RESULT-NEXT: ret void +define void @call_store_like_i16_swap(ptr addrspace(1) %ptr, i16 %val) { + call void @store_like_i16_swap(ptr addrspace(1) %ptr, i16 %val) + ret void +} + +; ALL-LABEL: @call_store_like_i16_extra_arg( +; RESULT-NEXT: call void @store_like_i16_extra_arg(i16 %val, ptr addrspace(1) %ptr, i32 %extra) +; RESULT-NEXT: ret void +define void @call_store_like_i16_extra_arg(i16 %val, ptr addrspace(1) %ptr, i32 %extra) { + call void @store_like_i16_extra_arg(i16 %val, ptr addrspace(1) %ptr, i32 %extra) + ret void +} + +; ALL-LABEL: @call_store_like_i16_extra_ptr_arg( +; RESULT-NEXT: call void @store_like_i16_extra_ptr_arg(i16 %val, ptr addrspace(1) %ptr, ptr addrspace(1) %extra) +; RESULT-NEXT: ret void +define void @call_store_like_i16_extra_ptr_arg(i16 %val, ptr addrspace(1) %ptr, ptr addrspace(1) %extra) { + call void @store_like_i16_extra_ptr_arg(i16 %val, ptr addrspace(1) %ptr, ptr addrspace(1) %extra) + ret void +} + +; ALL-LABEL: @call_store_like_ptr_store( +; RESULT-NEXT: store volatile ptr addrspace(1) %ptr, ptr addrspace(3) %ptr.val, align 8 +; RESULT-NEXT: ret void +define void @call_store_like_ptr_store(ptr addrspace(3) %ptr.val, ptr addrspace(1) %ptr) { + call void @store_like_ptr_store(ptr addrspace(3) %ptr.val, ptr addrspace(1) %ptr) + ret void +} + +; ALL-LABEL: @call_store_like_ptr_store_swap( +; RESULT-NEXT: store volatile ptr addrspace(3) %ptr.val, ptr addrspace(1) %ptr, align 8 +; RESULT-NEXT: ret void +define void @call_store_like_ptr_store_swap(ptr addrspace(1) %ptr, ptr addrspace(3) %ptr.val) { + call void @store_like_ptr_store_swap(ptr addrspace(1) %ptr, ptr addrspace(3) %ptr.val) + ret void +} + +; ALL-LABEL: @call_store_like_ptr_store_different_element_type( +; RESULT-NEXT: store volatile ptr addrspace(1) %ptr, ptr addrspace(3) %ptr.val, align 8 +; RESULT-NEXT: ret void +define void @call_store_like_ptr_store_different_element_type(ptr addrspace(3) %ptr.val, ptr addrspace(1) %ptr) { + call void @store_like_ptr_store_different_element_type(ptr addrspace(3) %ptr.val, ptr addrspace(1) %ptr) + ret void +} + +; ALL-LABEL: @call_store_like_ptr_store_different_element_type_swap( +; RESULT-NEXT: store volatile ptr addrspace(3) %ptr.val, ptr addrspace(1) %ptr, align 8 +; RESULT-NEXT: ret void +define void @call_store_like_ptr_store_different_element_type_swap(ptr addrspace(1) %ptr, ptr addrspace(3) %ptr.val) { + call void @store_like_ptr_store_different_element_type_swap(ptr addrspace(1) %ptr, ptr addrspace(3) %ptr.val) + ret void +} + +; ALL-LABEL: @call_load_like_i32( +; RESULT-NEXT: %op = load volatile i32, ptr addrspace(1) %ptr, align 4 +; RESULT-NEXT: ret i32 %op +define i32 @call_load_like_i32(ptr addrspace(1) %ptr) { + %op = call i32 @load_like_i32(ptr addrspace(1) %ptr) + ret i32 %op +} + +; ALL-LABEL: @keep_call_load_like_i32( +; ALL-NEXT: %op = call i32 @load_like_i32(ptr addrspace(1) %ptr) +; ALL-NEXT: ret i32 %op +define i32 @keep_call_load_like_i32(ptr addrspace(1) %ptr) { + %op = call i32 @load_like_i32(ptr addrspace(1) %ptr) + ret i32 %op +} + +; ALL-LABEL: @call_load_like_i32_extra_arg( +; RESULT-NEXT: %op = call i32 @load_like_i32_extra_arg(ptr addrspace(1) %ptr, i32 %extra) +; RESULT-NEXT: ret i32 %op +define i32 @call_load_like_i32_extra_arg(ptr addrspace(1) %ptr, i32 %extra) { + %op = call i32 @load_like_i32_extra_arg(ptr addrspace(1) %ptr, i32 %extra) + ret i32 %op +} + +; ALL-LABEL: @call_load_like_ptr_mismatch( +; RESULT-NEXT: %op = load volatile i32, ptr addrspace(1) %ptr, align 4 +; RESULT-NEXT: ret i32 %op +define i32 @call_load_like_ptr_mismatch(ptr addrspace(1) %ptr) { + %op = call i32 @load_like_ptr_mismatch(ptr addrspace(1) %ptr) + ret i32 %op +} + +; ALL-LABEL: @call_load_like_skip_arg( +; RESULT-NEXT: %op = load volatile i32, ptr addrspace(1) %ptr, align 4 +; RESULT-NEXT: ret i32 %op +define i32 @call_load_like_skip_arg(float, ptr addrspace(1) %ptr) { + %op = call i32 @load_like_skip_arg(float poison, ptr addrspace(1) %ptr) + ret i32 %op +} + +; ALL-LABEL: @call_fp_scalar_noargs( +; RESULT-NEXT: %op = load volatile float, ptr addrspace(5) null, align 4 +; RESULT-NEXT: ret float %op +define float @call_fp_scalar_noargs() { + %op = call nsz float @fp_scalar_noargs() + ret float %op +} + +; ALL-LABEL: @call_fp_vector_noargs( +; RESULT-NEXT: %op = load volatile <2 x half>, ptr addrspace(5) null, align 4 +; RESULT-NEXT: ret <2 x half> %op +define <2 x half> @call_fp_vector_noargs() { + %op = call nsz <2 x half> @fp_vector_noargs() + ret <2 x half> %op +} + +; ALL-LABEL: @call_unary_fp_scalar( +; RESULT-NEXT: %op = fneg nsz float %a +; RESULT-NEXT: ret float %op +define float @call_unary_fp_scalar(float %a) { + %op = call nsz float @unary_fp_scalar(float %a) + ret float %op +} + +; ALL-LABEL: @call_unary_fp_vector( +; RESULT-NEXT: %op = fneg nsz <2 x half> %a +; RESULT-NEXT: ret <2 x half> %op +define <2 x half> @call_unary_fp_vector(<2 x half> %a) { + %op = call nsz <2 x half> @unary_fp_vector(<2 x half> %a) + ret <2 x half> %op +} + +; ALL-LABEL: @ignore_undef_args_unary_fp( +; RESULT-NEXT: %op = fneg nnan float %a +; RESULT-NEXT: ret float %op +define float @ignore_undef_args_unary_fp(float %a) { + %op = call nnan float @func_i32_f32_i32(i32 poison, float %a, i32 poison) + ret float %op +} + +; ALL-LABEL: @call_binary_fp_scalar( +; RESULT: %op = fmul afn float %a, %b +; RESULT-NEXT: ret float %op +define float @call_binary_fp_scalar(float %a, float %b) { + %op = call afn float @binary_fp_scalar(float %a, float %b) + ret float %op +} + +; ALL-LABEL: @call_binary_fp_vector( +; RESULT-NEXT: %op = fmul afn <2 x half> %a, %b +; RESULT-NEXT: ret <2 x half> %op +define <2 x half> @call_binary_fp_vector(<2 x half> %a, <2 x half> %b) { + %op = call afn <2 x half> @binary_fp_vector(<2 x half> %a, <2 x half> %b) + ret <2 x half> %op +} + +; ALL-LABEL: @call_ternary_fp_scalar( +; RESULT-NEXT: %op = call afn float @llvm.fma.f32(float %a, float %b, float %c) +; RESULT-NEXT: ret float %op +define float @call_ternary_fp_scalar(float %a, float %b, float %c) { + %op = call afn float @ternary_fp_scalar(float %a, float %b, float %c) + ret float %op +} + +; ALL-LABEL: @call_ternary_fp_vector( +; RESULT-NEXT: %op = call afn <2 x half> @llvm.fma.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) +; RESULT-NEXT: ret <2 x half> %op +define <2 x half> @call_ternary_fp_vector(<2 x half> %a, <2 x half> %b, <2 x half> %c) { + %op = call afn <2 x half> @ternary_fp_vector(<2 x half> %a, <2 x half> %b, <2 x half> %c) + ret <2 x half> %op +} + +; ALL-LABEL: @call_unary_int_scalar( +; RESULT-NEXT: %op = call i32 @llvm.bswap.i32(i32 %a) +; RESULT-NEXT: ret i32 %op +define i32 @call_unary_int_scalar(i32 %a) { + %op = call i32 @unary_int_scalar(i32 %a) + ret i32 %op +} + +; ALL-LABEL: @call_unary_int_vector( +; RESULT-NEXT: %op = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %a) +; RESULT-NEXT: ret <2 x i16> %op +define <2 x i16> @call_unary_int_vector(<2 x i16> %a) { + %op = call <2 x i16> @unary_int_vector(<2 x i16> %a) + ret <2 x i16> %op +} + +; ALL-LABEL: @call_binary_int_scalar( +; RESULT-NEXT: %op = and i32 %a, %b +; RESULT-NEXT: ret i32 %op +define i32 @call_binary_int_scalar(i32 %a, i32 %b) { + %op = call i32 @binary_int_scalar(i32 %a, i32 %b) + ret i32 %op +} + +; ALL-LABEL: @call_binary_int_vector( +; RESULT-NEXT: %op = and <2 x i16> %a, %b +; RESULT-NEXT: ret <2 x i16> %op +define <2 x i16> @call_binary_int_vector(<2 x i16> %a, <2 x i16> %b) { + %op = call <2 x i16> @binary_int_vector(<2 x i16> %a, <2 x i16> %b) + ret <2 x i16> %op +} + +; ALL-LABEL: @call_ternary_int_scalar( +; RESULT-NEXT: %op = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) +; RESULT-NEXT: ret i32 %op +define i32 @call_ternary_int_scalar(i32 %a, i32 %b, i32 %c) { + %op = call i32 @ternary_int_scalar(i32 %a, i32 %b, i32 %c) + ret i32 %op +} + +; ALL-LABEL: @call_ternary_int_vector( +; RESULT-NEXT: %op = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c) +; RESULT-NEXT: ret <2 x i16> %op +define <2 x i16> @call_ternary_int_vector(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c) { + %op = call <2 x i16> @ternary_int_vector(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c) + ret <2 x i16> %op +} + +; ALL-LABEL: @call_quaternary_int_scalar( +; RESULT-NEXT: %op = call i32 @quaternary_int_scalar(i32 %a, i32 %b, i32 %c, i32 %d) +; RESULT-NEXT: ret i32 %op +define i32 @call_quaternary_int_scalar(i32 %a, i32 %b, i32 %c, i32 %d) { + %op = call i32 @quaternary_int_scalar(i32 %a, i32 %b, i32 %c, i32 %d) + ret i32 %op +} + +; ALL-LABEL: @call_quaternary_int_vector( +; RESULT-NEXT: %op = call <2 x i16> @quaternary_int_vector(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x i16> %d) +; RESULT-NEXT: ret <2 x i16> %op +define <2 x i16> @call_quaternary_int_vector(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x i16> %d) { + %op = call <2 x i16> @quaternary_int_vector(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x i16> %d) + ret <2 x i16> %op +} + +declare void @void_0_size_arg({}) +declare {} @return_0_size(ptr) +declare void @void_no_args() +declare void @store_like_i16(i16, ptr addrspace(1)) +declare void @store_like_i16_swap(ptr addrspace(1), i16) +declare void @store_like_i16_extra_arg(i16, ptr addrspace(1), i32) +declare void @store_like_i16_extra_ptr_arg(i16, ptr addrspace(1), ptr addrspace(1)) +declare void @store_like_ptr_store(ptr addrspace(3), ptr addrspace(1)) +declare void @store_like_ptr_store_swap(ptr addrspace(1), ptr addrspace(3)) +declare void @store_like_ptr_store_different_element_type(ptr addrspace(3), ptr addrspace(1)) +declare void @store_like_ptr_store_different_element_type_swap(ptr addrspace(1), ptr addrspace(3)) +declare i32 @load_like_i32(ptr addrspace(1)) + +declare i32 @load_like_i32_extra_arg(ptr addrspace(1), i32) + +declare i32 @load_like_ptr_mismatch(ptr addrspace(1)) +declare i32 @load_like_skip_arg(float, ptr addrspace(1)) + +declare float @fp_scalar_noargs() +declare i32 @int_scalar_noargs() + +declare <2 x half> @fp_vector_noargs() +declare <2 x i16> @int_vector_noargs() + +declare float @unary_fp_scalar(float) +declare <2 x half> @unary_fp_vector(<2 x half>) +declare float @func_i32_f32_i32(i32, float, i32) + +declare float @binary_fp_scalar(float, float) +declare <2 x half> @binary_fp_vector(<2 x half>, <2 x half>) + +declare float @ternary_fp_scalar(float, float, float) +declare <2 x half> @ternary_fp_vector(<2 x half>, <2 x half>, <2 x half>) + +declare float @quaternary_fp_scalar(float, float, float, float) +declare <2 x half> @quaternary_fp_vector(<2 x half>, <2 x half>, <2 x half>, <2 x half>) + +declare i32 @unary_int_scalar(i32) +declare <2 x i16> @unary_int_vector(<2 x i16>) +declare i32 @binary_int_scalar(i32, i32) +declare <2 x i16> @binary_int_vector(<2 x i16>, <2 x i16>) +declare i32 @ternary_int_scalar(i32, i32, i32) +declare <2 x i16> @ternary_int_vector(<2 x i16>, <2 x i16>, <2 x i16>) +declare i32 @quaternary_int_scalar(i32, i32, i32, i32) +declare <2 x i16> @quaternary_int_vector(<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>) diff --git a/llvm/test/tools/llvm-reduce/remove-attributes-from-intrinsics.ll b/llvm/test/tools/llvm-reduce/remove-attributes-from-intrinsics.ll index 3eb43c344ea8e..2aec90b086f7f 100644 --- a/llvm/test/tools/llvm-reduce/remove-attributes-from-intrinsics.ll +++ b/llvm/test/tools/llvm-reduce/remove-attributes-from-intrinsics.ll @@ -26,7 +26,7 @@ define i32 @t(i32 %a) { ; CHECK-ALL: declare i32 @llvm.uadd.sat.i32(i32, i32) #0 declare i32 @llvm.uadd.sat.i32(i32, i32) #0 -; CHECK-ALL: attributes #0 = { nocallback nofree nosync nounwind readnone speculatable willreturn } +; CHECK-ALL: attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; CHECK-INTERESTINGNESS: attributes #1 = { ; CHECK-INTERESTINGNESS-SAME: "arg4" diff --git a/llvm/tools/dsymutil/CMakeLists.txt b/llvm/tools/dsymutil/CMakeLists.txt index a255c1c5daf51..38028cd3d80a3 100644 --- a/llvm/tools/dsymutil/CMakeLists.txt +++ b/llvm/tools/dsymutil/CMakeLists.txt @@ -40,6 +40,4 @@ if(APPLE) target_link_libraries(dsymutil PRIVATE "-framework CoreFoundation") endif(APPLE) -if(HAVE_CXX_ATOMICS_WITH_LIB OR HAVE_CXX_ATOMICS64_WITH_LIB) - target_link_libraries(dsymutil PRIVATE atomic) -endif() +target_link_libraries(dsymutil PRIVATE ${LLVM_ATOMIC_LIB}) diff --git a/llvm/tools/llvm-config/llvm-config.cpp b/llvm/tools/llvm-config/llvm-config.cpp index 8b28a00b26236..b1d795a0a3491 100644 --- a/llvm/tools/llvm-config/llvm-config.cpp +++ b/llvm/tools/llvm-config/llvm-config.cpp @@ -234,7 +234,6 @@ Options:\n\ --obj-root Print the object root used to build LLVM.\n\ --prefix Print the installation prefix.\n\ --shared-mode Print how the provided components can be collectively linked (`shared` or `static`).\n\ - --src-root Print the source root LLVM was built from.\n\ --system-libs System Libraries needed to link against LLVM components.\n\ --targets-built List of all targets currently built.\n\ --version Print LLVM version.\n\ @@ -592,8 +591,6 @@ int main(int argc, char **argv) { PrintSharedMode = true; } else if (Arg == "--obj-root") { OS << ActivePrefix << '\n'; - } else if (Arg == "--src-root") { - OS << LLVM_SRC_ROOT << '\n'; } else if (Arg == "--ignore-libllvm") { LinkDyLib = false; LinkMode = BuiltSharedLibs ? LinkModeShared : LinkModeAuto; diff --git a/llvm/tools/llvm-objdump/OffloadDump.cpp b/llvm/tools/llvm-objdump/OffloadDump.cpp index 46334c249070d..4ac6b99e79bbb 100644 --- a/llvm/tools/llvm-objdump/OffloadDump.cpp +++ b/llvm/tools/llvm-objdump/OffloadDump.cpp @@ -10,6 +10,7 @@ /// This file implements the offloading-specific dumper for llvm-objdump. /// //===----------------------------------------------------------------------===// + #include "OffloadDump.h" #include "llvm-objdump.h" #include "llvm/Object/ELFObjectFile.h" @@ -46,67 +47,34 @@ static void printBinary(const OffloadBinary &OB, uint64_t Index) { << getOffloadKindName(OB.getOffloadKind()) << "\n"; } -static Error visitAllBinaries(const OffloadBinary &OB) { - uint64_t Offset = 0; - uint64_t Index = 0; - while (Offset < OB.getMemoryBufferRef().getBufferSize()) { - MemoryBufferRef Buffer = - MemoryBufferRef(OB.getData().drop_front(Offset), OB.getFileName()); - auto BinaryOrErr = OffloadBinary::create(Buffer); - if (!BinaryOrErr) - return BinaryOrErr.takeError(); - - OffloadBinary &Binary = **BinaryOrErr; - printBinary(Binary, Index++); - - Offset += Binary.getSize(); - } - return Error::success(); -} - /// Print the embedded offloading contents of an ObjectFile \p O. void llvm::dumpOffloadBinary(const ObjectFile &O) { - if (!O.isELF()) { - reportWarning("--offloading is currently only supported for ELF targets", - O.getFileName()); + if (!O.isELF() && !O.isCOFF()) { + reportWarning( + "--offloading is currently only supported for COFF and ELF targets", + O.getFileName()); return; } - for (ELFSectionRef Sec : O.sections()) { - if (Sec.getType() != ELF::SHT_LLVM_OFFLOADING) - continue; - - Expected Contents = Sec.getContents(); - if (!Contents) - reportError(Contents.takeError(), O.getFileName()); - - std::unique_ptr Buffer = - MemoryBuffer::getMemBuffer(*Contents, O.getFileName(), false); - if (!isAddrAligned(Align(OffloadBinary::getAlignment()), - Buffer->getBufferStart())) - Buffer = MemoryBuffer::getMemBufferCopy(Buffer->getBuffer(), - Buffer->getBufferIdentifier()); - auto BinaryOrErr = OffloadBinary::create(*Buffer); - if (!BinaryOrErr) - reportError(O.getFileName(), "while extracting offloading files: " + - toString(BinaryOrErr.takeError())); - OffloadBinary &Binary = **BinaryOrErr; + SmallVector Binaries; + if (Error Err = extractOffloadBinaries(O.getMemoryBufferRef(), Binaries)) + reportError(O.getFileName(), "while extracting offloading files: " + + toString(std::move(Err))); - // Print out all the binaries that are contained in this buffer. If we fail - // to parse a binary before reaching the end of the buffer emit a warning. - if (Error Err = visitAllBinaries(Binary)) - reportWarning("while parsing offloading files: " + - toString(std::move(Err)), - O.getFileName()); - } + // Print out all the binaries that are contained in this buffer. + for (uint64_t I = 0, E = Binaries.size(); I != E; ++I) + printBinary(*Binaries[I].getBinary(), I); } /// Print the contents of an offload binary file \p OB. This may contain /// multiple binaries stored in the same buffer. void llvm::dumpOffloadSections(const OffloadBinary &OB) { - // Print out all the binaries that are contained at this buffer. If we fail to - // parse a binary before reaching the end of the buffer emit a warning. - if (Error Err = visitAllBinaries(OB)) - reportWarning("while parsing offloading files: " + toString(std::move(Err)), - OB.getFileName()); + SmallVector Binaries; + if (Error Err = extractOffloadBinaries(OB.getMemoryBufferRef(), Binaries)) + reportError(OB.getFileName(), "while extracting offloading files: " + + toString(std::move(Err))); + + // Print out all the binaries that are contained in this buffer. + for (uint64_t I = 0, E = Binaries.size(); I != E; ++I) + printBinary(*Binaries[I].getBinary(), I); } diff --git a/llvm/tools/llvm-readobj/XCOFFDumper.cpp b/llvm/tools/llvm-readobj/XCOFFDumper.cpp index a2ca5b86f35e9..9e52f86a08fc9 100644 --- a/llvm/tools/llvm-readobj/XCOFFDumper.cpp +++ b/llvm/tools/llvm-readobj/XCOFFDumper.cpp @@ -616,22 +616,13 @@ void XCOFFDumper::printSymbol(const SymbolRef &S) { case XCOFF::C_EXT: case XCOFF::C_WEAKEXT: case XCOFF::C_HIDEXT: { - if (!SymbolEntRef.isFunction() && NumberOfAuxEntries > 1) - reportUniqueWarning("the non-function " + - enumToString(static_cast(SymbolClass), - makeArrayRef(SymStorageClass)) + - " symbol at index " + Twine(SymbolIdx) + - " should have only 1 auxiliary entry, i.e. the CSECT " - "auxiliary entry"); - // For 32-bit objects, print the function auxiliary symbol table entry. The // last one must be a CSECT auxiliary entry. // For 64-bit objects, both a function auxiliary entry and an exception // auxiliary entry may appear, print them in the loop and skip printing the // CSECT auxiliary entry, which will be printed outside the loop. for (int I = 1; I <= NumberOfAuxEntries; I++) { - if ((I == NumberOfAuxEntries && !Obj.is64Bit()) || - !SymbolEntRef.isFunction()) + if (I == NumberOfAuxEntries && !Obj.is64Bit()) break; uintptr_t AuxAddress = XCOFFObjectFile::getAdvancedSymbolEntryAddress( diff --git a/llvm/tools/llvm-reduce/deltas/ReduceOpcodes.cpp b/llvm/tools/llvm-reduce/deltas/ReduceOpcodes.cpp index 2e515110517de..75a00ae22ee41 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceOpcodes.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceOpcodes.cpp @@ -19,6 +19,12 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsAMDGPU.h" +// Assume outgoing undef arguments aren't relevant. +// TODO: Maybe skip any trivial constant arguments. +static bool shouldIgnoreArgument(const Value *V) { + return isa(V); +} + static Value *replaceIntrinsic(Module &M, IntrinsicInst *II, Intrinsic::ID NewIID, ArrayRef Tys = None) { @@ -64,8 +70,142 @@ static Value *reduceIntrinsic(Oracle &O, Module &M, IntrinsicInst *II) { } } +/// Look for calls that look like they could be replaced with a load or store. +static bool callLooksLikeLoadStore(CallBase *CB, Value *&DataArg, + Value *&PtrArg) { + const bool IsStore = CB->getType()->isVoidTy(); + + PtrArg = nullptr; + DataArg = nullptr; + for (Value *Arg : CB->args()) { + if (shouldIgnoreArgument(Arg)) + continue; + + if (!Arg->getType()->isSized()) + return false; + + PointerType *PT = dyn_cast(Arg->getType()); + if (!PtrArg && PT) { + // FIXME: Could create bitcast for typed pointers, but roll back unused + // replacement only erases one instruction. + if (!IsStore && !PT->isOpaqueOrPointeeTypeMatches(CB->getType())) + return false; + + PtrArg = Arg; + continue; + } + + if (!IsStore || DataArg) + return false; + + DataArg = Arg; + } + + if (IsStore && !DataArg) { + // FIXME: For typed pointers, use element type? + DataArg = ConstantInt::get(IntegerType::getInt32Ty(CB->getContext()), 0); + } + + // If we didn't find any arguments, we can fill in the pointer. + if (!PtrArg) { + unsigned AS = CB->getModule()->getDataLayout().getAllocaAddrSpace(); + + PointerType *PtrTy = + PointerType::get(DataArg ? DataArg->getType() + : IntegerType::getInt32Ty(CB->getContext()), + AS); + + PtrArg = ConstantPointerNull::get(PtrTy); + } + + // Make sure we don't emit an invalid store with typed pointers. + if (IsStore && DataArg->getType()->getPointerTo( + cast(PtrArg->getType())->getAddressSpace()) != + PtrArg->getType()) + return false; + + return true; +} + +// TODO: Replace 2 pointer argument calls with memcpy +static Value *tryReplaceCallWithLoadStore(Oracle &O, Module &M, CallBase *CB) { + Value *PtrArg = nullptr; + Value *DataArg = nullptr; + if (!callLooksLikeLoadStore(CB, DataArg, PtrArg) || O.shouldKeep()) + return nullptr; + + IRBuilder<> B(CB); + if (DataArg) + return B.CreateStore(DataArg, PtrArg, true); + return B.CreateLoad(CB->getType(), PtrArg, true); +} + +static bool callLooksLikeOperator(CallBase *CB, + SmallVectorImpl &OperatorArgs) { + Type *ReturnTy = CB->getType(); + if (!ReturnTy->isFirstClassType()) + return false; + + for (Value *Arg : CB->args()) { + if (shouldIgnoreArgument(Arg)) + continue; + + if (Arg->getType() != ReturnTy) + return false; + + OperatorArgs.push_back(Arg); + } + + return true; +} + +static Value *tryReplaceCallWithOperator(Oracle &O, Module &M, CallBase *CB) { + SmallVector Arguments; + + if (!callLooksLikeOperator(CB, Arguments) || Arguments.size() > 3) + return nullptr; + + if (O.shouldKeep()) + return nullptr; + + IRBuilder<> B(CB); + if (CB->getType()->isFPOrFPVectorTy()) { + switch (Arguments.size()) { + case 1: + return B.CreateFNeg(Arguments[0]); + case 2: + return B.CreateFMul(Arguments[0], Arguments[1]); + case 3: + return B.CreateIntrinsic(Intrinsic::fma, {CB->getType()}, Arguments); + default: + return nullptr; + } + + llvm_unreachable("all argument sizes handled"); + } + + if (CB->getType()->isIntOrIntVectorTy()) { + switch (Arguments.size()) { + case 1: + return B.CreateUnaryIntrinsic(Intrinsic::bswap, Arguments[0]); + case 2: + return B.CreateAnd(Arguments[0], Arguments[1]); + case 3: + return B.CreateIntrinsic(Intrinsic::fshl, {CB->getType()}, Arguments); + default: + return nullptr; + } + + llvm_unreachable("all argument sizes handled"); + } + + return nullptr; +} + static Value *reduceInstruction(Oracle &O, Module &M, Instruction &I) { IRBuilder<> B(&I); + + // TODO: fp binary operator with constant to fneg switch (I.getOpcode()) { case Instruction::FDiv: case Instruction::FRem: @@ -96,6 +236,14 @@ static Value *reduceInstruction(Oracle &O, Module &M, Instruction &I) { if (IntrinsicInst *II = dyn_cast(&I)) return reduceIntrinsic(O, M, II); + CallBase *CB = cast(&I); + + if (Value *NewOp = tryReplaceCallWithOperator(O, M, CB)) + return NewOp; + + if (Value *NewOp = tryReplaceCallWithLoadStore(O, M, CB)) + return NewOp; + return nullptr; } default: diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp index 4fc76cf4d26b5..ec808abce9e7f 100644 --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -85,6 +85,8 @@ static cl::opt PassPipeline( cl::desc( "A textual description of the pass pipeline. To have analysis passes " "available before a certain pass, add 'require'.")); +static cl::alias PassPipeline2("p", cl::aliasopt(PassPipeline), + cl::desc("Alias for -passes")); static cl::opt PrintPasses("print-passes", cl::desc("Print available passes that can be " diff --git a/llvm/unittests/ADT/PackedVectorTest.cpp b/llvm/unittests/ADT/PackedVectorTest.cpp index 24df398934670..b4e017971efac 100644 --- a/llvm/unittests/ADT/PackedVectorTest.cpp +++ b/llvm/unittests/ADT/PackedVectorTest.cpp @@ -8,7 +8,7 @@ // BitVectorTest tests fail on PowerPC for unknown reasons, so disable this // as well since it depends on a BitVector. -#ifndef __ppc__ +#ifndef __powerpc__ #include "llvm/ADT/PackedVector.h" #include "gtest/gtest.h" diff --git a/llvm/unittests/Analysis/IVDescriptorsTest.cpp b/llvm/unittests/Analysis/IVDescriptorsTest.cpp index e7948db10ae66..fd9a5a801042c 100644 --- a/llvm/unittests/Analysis/IVDescriptorsTest.cpp +++ b/llvm/unittests/Analysis/IVDescriptorsTest.cpp @@ -203,3 +203,107 @@ TEST(IVDescriptorsTest, LoopWithPtrToInt) { EXPECT_TRUE(IsInductionPHI); }); } + +// This tests that correct identity value is returned for a RecurrenceDescriptor +// that describes FMin reduction idiom. +TEST(IVDescriptorsTest, FMinRednIdentity) { + // Parse the module. + LLVMContext Context; + + std::unique_ptr M = parseIR(Context, + R"(define float @foo(float* %A, i64 %ub) { +entry: + br label %for.body + +for.body: + %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] + %fmin = phi float [ 1.000000e+00, %entry ], [ %fmin.next, %for.body ] + %arrayidx = getelementptr inbounds float, float* %A, i64 %i + %ld = load float, float* %arrayidx + %fmin.cmp = fcmp nnan nsz olt float %fmin, %ld + %fmin.next = select nnan nsz i1 %fmin.cmp, float %fmin, float %ld + %i.next = add nsw i64 %i, 1 + %cmp = icmp slt i64 %i.next, %ub + br i1 %cmp, label %for.body, label %for.end + +for.end: + %fmin.lcssa = phi float [ %fmin.next, %for.body ] + ret float %fmin.lcssa +})"); + + runWithLoopInfoAndSE( + *M, "foo", [&](Function &F, LoopInfo &LI, ScalarEvolution &SE) { + Function::iterator FI = F.begin(); + // First basic block is entry - skip it. + BasicBlock *Header = &*(++FI); + assert(Header->getName() == "for.body"); + Loop *L = LI.getLoopFor(Header); + EXPECT_NE(L, nullptr); + BasicBlock::iterator BBI = Header->begin(); + assert((&*BBI)->getName() == "i"); + ++BBI; + PHINode *Phi = dyn_cast(&*BBI); + assert(Phi->getName() == "fmin"); + RecurrenceDescriptor Rdx; + bool IsRdxPhi = RecurrenceDescriptor::isReductionPHI(Phi, L, Rdx); + EXPECT_TRUE(IsRdxPhi); + RecurKind Kind = Rdx.getRecurrenceKind(); + EXPECT_EQ(Kind, RecurKind::FMin); + Type *Ty = Phi->getType(); + Value *Id = Rdx.getRecurrenceIdentity(Kind, Ty, Rdx.getFastMathFlags()); + // Identity value for FP min reduction is +Inf. + EXPECT_EQ(Id, ConstantFP::getInfinity(Ty, false /*Negative*/)); + }); +} + +// This tests that correct identity value is returned for a RecurrenceDescriptor +// that describes FMax reduction idiom. +TEST(IVDescriptorsTest, FMaxRednIdentity) { + // Parse the module. + LLVMContext Context; + + std::unique_ptr M = parseIR(Context, + R"(define float @foo(float* %A, i64 %ub) { +entry: + br label %for.body + +for.body: + %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] + %fmax = phi float [ 1.000000e+00, %entry ], [ %fmax.next, %for.body ] + %arrayidx = getelementptr inbounds float, float* %A, i64 %i + %ld = load float, float* %arrayidx + %fmax.cmp = fcmp nnan nsz ogt float %fmax, %ld + %fmax.next = select nnan nsz i1 %fmax.cmp, float %fmax, float %ld + %i.next = add nsw i64 %i, 1 + %cmp = icmp slt i64 %i.next, %ub + br i1 %cmp, label %for.body, label %for.end + +for.end: + %fmax.lcssa = phi float [ %fmax.next, %for.body ] + ret float %fmax.lcssa +})"); + + runWithLoopInfoAndSE( + *M, "foo", [&](Function &F, LoopInfo &LI, ScalarEvolution &SE) { + Function::iterator FI = F.begin(); + // First basic block is entry - skip it. + BasicBlock *Header = &*(++FI); + assert(Header->getName() == "for.body"); + Loop *L = LI.getLoopFor(Header); + EXPECT_NE(L, nullptr); + BasicBlock::iterator BBI = Header->begin(); + assert((&*BBI)->getName() == "i"); + ++BBI; + PHINode *Phi = dyn_cast(&*BBI); + assert(Phi->getName() == "fmax"); + RecurrenceDescriptor Rdx; + bool IsRdxPhi = RecurrenceDescriptor::isReductionPHI(Phi, L, Rdx); + EXPECT_TRUE(IsRdxPhi); + RecurKind Kind = Rdx.getRecurrenceKind(); + EXPECT_EQ(Kind, RecurKind::FMax); + Type *Ty = Phi->getType(); + Value *Id = Rdx.getRecurrenceIdentity(Kind, Ty, Rdx.getFastMathFlags()); + // Identity value for FP max reduction is -Inf. + EXPECT_EQ(Id, ConstantFP::getInfinity(Ty, true /*Negative*/)); + }); +} diff --git a/llvm/unittests/Analysis/ValueLatticeTest.cpp b/llvm/unittests/Analysis/ValueLatticeTest.cpp index b0b4b5e7bdc1d..ae221811f3fb5 100644 --- a/llvm/unittests/Analysis/ValueLatticeTest.cpp +++ b/llvm/unittests/Analysis/ValueLatticeTest.cpp @@ -23,6 +23,7 @@ namespace { class ValueLatticeTest : public testing::Test { protected: LLVMContext Context; + DataLayout DL = DataLayout(""); }; TEST_F(ValueLatticeTest, ValueLatticeGetters) { @@ -106,42 +107,42 @@ TEST_F(ValueLatticeTest, getCompareIntegers) { auto LV1 = ValueLatticeElement::get(C1); // Check getCompare for equal integer constants. - EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_EQ, I1Ty, LV1)->isOneValue()); - EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_SGE, I1Ty, LV1)->isOneValue()); - EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_SLE, I1Ty, LV1)->isOneValue()); - EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_NE, I1Ty, LV1)->isZeroValue()); - EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_SLT, I1Ty, LV1)->isZeroValue()); - EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_SGT, I1Ty, LV1)->isZeroValue()); + EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_EQ, I1Ty, LV1, DL)->isOneValue()); + EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_SGE, I1Ty, LV1, DL)->isOneValue()); + EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_SLE, I1Ty, LV1, DL)->isOneValue()); + EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_NE, I1Ty, LV1, DL)->isZeroValue()); + EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_SLT, I1Ty, LV1, DL)->isZeroValue()); + EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_SGT, I1Ty, LV1, DL)->isZeroValue()); auto LV2 = ValueLatticeElement::getRange({APInt(32, 10, true), APInt(32, 20, true)}); // Check getCompare with distinct integer ranges. - EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_SLT, I1Ty, LV2)->isOneValue()); - EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_SLE, I1Ty, LV2)->isOneValue()); - EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_NE, I1Ty, LV2)->isOneValue()); - EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_EQ, I1Ty, LV2)->isZeroValue()); - EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_SGE, I1Ty, LV2)->isZeroValue()); - EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_SGT, I1Ty, LV2)->isZeroValue()); + EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_SLT, I1Ty, LV2, DL)->isOneValue()); + EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_SLE, I1Ty, LV2, DL)->isOneValue()); + EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_NE, I1Ty, LV2, DL)->isOneValue()); + EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_EQ, I1Ty, LV2, DL)->isZeroValue()); + EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_SGE, I1Ty, LV2, DL)->isZeroValue()); + EXPECT_TRUE(LV1.getCompare(CmpInst::ICMP_SGT, I1Ty, LV2, DL)->isZeroValue()); auto LV3 = ValueLatticeElement::getRange({APInt(32, 15, true), APInt(32, 19, true)}); // Check getCompare with a subset integer ranges. - EXPECT_EQ(LV2.getCompare(CmpInst::ICMP_SLT, I1Ty, LV3), nullptr); - EXPECT_EQ(LV2.getCompare(CmpInst::ICMP_SLE, I1Ty, LV3), nullptr); - EXPECT_EQ(LV2.getCompare(CmpInst::ICMP_NE, I1Ty, LV3), nullptr); - EXPECT_EQ(LV2.getCompare(CmpInst::ICMP_EQ, I1Ty, LV3), nullptr); - EXPECT_EQ(LV2.getCompare(CmpInst::ICMP_SGE, I1Ty, LV3), nullptr); - EXPECT_EQ(LV2.getCompare(CmpInst::ICMP_SGT, I1Ty, LV3), nullptr); + EXPECT_EQ(LV2.getCompare(CmpInst::ICMP_SLT, I1Ty, LV3, DL), nullptr); + EXPECT_EQ(LV2.getCompare(CmpInst::ICMP_SLE, I1Ty, LV3, DL), nullptr); + EXPECT_EQ(LV2.getCompare(CmpInst::ICMP_NE, I1Ty, LV3, DL), nullptr); + EXPECT_EQ(LV2.getCompare(CmpInst::ICMP_EQ, I1Ty, LV3, DL), nullptr); + EXPECT_EQ(LV2.getCompare(CmpInst::ICMP_SGE, I1Ty, LV3, DL), nullptr); + EXPECT_EQ(LV2.getCompare(CmpInst::ICMP_SGT, I1Ty, LV3, DL), nullptr); auto LV4 = ValueLatticeElement::getRange({APInt(32, 15, true), APInt(32, 25, true)}); // Check getCompare with overlapping integer ranges. - EXPECT_EQ(LV3.getCompare(CmpInst::ICMP_SLT, I1Ty, LV4), nullptr); - EXPECT_EQ(LV3.getCompare(CmpInst::ICMP_SLE, I1Ty, LV4), nullptr); - EXPECT_EQ(LV3.getCompare(CmpInst::ICMP_NE, I1Ty, LV4), nullptr); - EXPECT_EQ(LV3.getCompare(CmpInst::ICMP_EQ, I1Ty, LV4), nullptr); - EXPECT_EQ(LV3.getCompare(CmpInst::ICMP_SGE, I1Ty, LV4), nullptr); - EXPECT_EQ(LV3.getCompare(CmpInst::ICMP_SGT, I1Ty, LV4), nullptr); + EXPECT_EQ(LV3.getCompare(CmpInst::ICMP_SLT, I1Ty, LV4, DL), nullptr); + EXPECT_EQ(LV3.getCompare(CmpInst::ICMP_SLE, I1Ty, LV4, DL), nullptr); + EXPECT_EQ(LV3.getCompare(CmpInst::ICMP_NE, I1Ty, LV4, DL), nullptr); + EXPECT_EQ(LV3.getCompare(CmpInst::ICMP_EQ, I1Ty, LV4, DL), nullptr); + EXPECT_EQ(LV3.getCompare(CmpInst::ICMP_SGE, I1Ty, LV4, DL), nullptr); + EXPECT_EQ(LV3.getCompare(CmpInst::ICMP_SGT, I1Ty, LV4, DL), nullptr); } TEST_F(ValueLatticeTest, getCompareFloat) { @@ -152,45 +153,46 @@ TEST_F(ValueLatticeTest, getCompareFloat) { auto LV2 = ValueLatticeElement::get(C1); // Check getCompare for equal floating point constants. - EXPECT_TRUE(LV1.getCompare(CmpInst::FCMP_OEQ, I1Ty, LV2)->isOneValue()); - EXPECT_TRUE(LV1.getCompare(CmpInst::FCMP_OGE, I1Ty, LV2)->isOneValue()); - EXPECT_TRUE(LV1.getCompare(CmpInst::FCMP_OLE, I1Ty, LV2)->isOneValue()); - EXPECT_TRUE(LV1.getCompare(CmpInst::FCMP_ONE, I1Ty, LV2)->isZeroValue()); - EXPECT_TRUE(LV1.getCompare(CmpInst::FCMP_OLT, I1Ty, LV2)->isZeroValue()); - EXPECT_TRUE(LV1.getCompare(CmpInst::FCMP_OGT, I1Ty, LV2)->isZeroValue()); + EXPECT_TRUE(LV1.getCompare(CmpInst::FCMP_OEQ, I1Ty, LV2, DL)->isOneValue()); + EXPECT_TRUE(LV1.getCompare(CmpInst::FCMP_OGE, I1Ty, LV2, DL)->isOneValue()); + EXPECT_TRUE(LV1.getCompare(CmpInst::FCMP_OLE, I1Ty, LV2, DL)->isOneValue()); + EXPECT_TRUE(LV1.getCompare(CmpInst::FCMP_ONE, I1Ty, LV2, DL)->isZeroValue()); + EXPECT_TRUE(LV1.getCompare(CmpInst::FCMP_OLT, I1Ty, LV2, DL)->isZeroValue()); + EXPECT_TRUE(LV1.getCompare(CmpInst::FCMP_OGT, I1Ty, LV2, DL)->isZeroValue()); EXPECT_TRUE( LV1.mergeIn(ValueLatticeElement::get(ConstantFP::get(FloatTy, 2.2)))); - EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OEQ, I1Ty, LV2), nullptr); - EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OGE, I1Ty, LV2), nullptr); - EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OLE, I1Ty, LV2), nullptr); - EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_ONE, I1Ty, LV2), nullptr); - EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OLT, I1Ty, LV2), nullptr); - EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OGT, I1Ty, LV2), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OEQ, I1Ty, LV2, DL), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OGE, I1Ty, LV2, DL), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OLE, I1Ty, LV2, DL), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_ONE, I1Ty, LV2, DL), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OLT, I1Ty, LV2, DL), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OGT, I1Ty, LV2, DL), nullptr); } TEST_F(ValueLatticeTest, getCompareUndef) { auto *I32Ty = IntegerType::get(Context, 32); auto *I1Ty = IntegerType::get(Context, 1); + // TODO: These results can be improved. auto LV1 = ValueLatticeElement::get(UndefValue::get(I32Ty)); auto LV2 = ValueLatticeElement::getRange({APInt(32, 10, true), APInt(32, 20, true)}); - EXPECT_TRUE(isa(LV1.getCompare(CmpInst::ICMP_SLT, I1Ty, LV2))); - EXPECT_TRUE(isa(LV1.getCompare(CmpInst::ICMP_SLE, I1Ty, LV2))); - EXPECT_TRUE(isa(LV1.getCompare(CmpInst::ICMP_NE, I1Ty, LV2))); - EXPECT_TRUE(isa(LV1.getCompare(CmpInst::ICMP_EQ, I1Ty, LV2))); - EXPECT_TRUE(isa(LV1.getCompare(CmpInst::ICMP_SGE, I1Ty, LV2))); - EXPECT_TRUE(isa(LV1.getCompare(CmpInst::ICMP_SGT, I1Ty, LV2))); + EXPECT_EQ(LV1.getCompare(CmpInst::ICMP_SLT, I1Ty, LV2, DL), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::ICMP_SLE, I1Ty, LV2, DL), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::ICMP_NE, I1Ty, LV2, DL), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::ICMP_EQ, I1Ty, LV2, DL), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::ICMP_SGE, I1Ty, LV2, DL), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::ICMP_SGT, I1Ty, LV2, DL), nullptr); auto *FloatTy = IntegerType::getFloatTy(Context); auto LV3 = ValueLatticeElement::get(ConstantFP::get(FloatTy, 1.0)); - EXPECT_TRUE(isa(LV1.getCompare(CmpInst::FCMP_OEQ, I1Ty, LV3))); - EXPECT_TRUE(isa(LV1.getCompare(CmpInst::FCMP_OGE, I1Ty, LV3))); - EXPECT_TRUE(isa(LV1.getCompare(CmpInst::FCMP_OLE, I1Ty, LV3))); - EXPECT_TRUE(isa(LV1.getCompare(CmpInst::FCMP_ONE, I1Ty, LV3))); - EXPECT_TRUE(isa(LV1.getCompare(CmpInst::FCMP_OLT, I1Ty, LV3))); - EXPECT_TRUE(isa(LV1.getCompare(CmpInst::FCMP_OGT, I1Ty, LV3))); + EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OEQ, I1Ty, LV3, DL), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OGE, I1Ty, LV3, DL), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OLE, I1Ty, LV3, DL), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_ONE, I1Ty, LV3, DL), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OLT, I1Ty, LV3, DL), nullptr); + EXPECT_EQ(LV1.getCompare(CmpInst::FCMP_OGT, I1Ty, LV3, DL), nullptr); } } // end anonymous namespace diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index b334755c35c8a..36c70ee1f54bd 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -5504,8 +5504,9 @@ TEST_F(OpenMPIRBuilderTest, EmitOffloadingArraysArguments) { TEST_F(OpenMPIRBuilderTest, OffloadEntriesInfoManager) { OffloadEntriesInfoManager InfoManager; - TargetRegionEntryInfo EntryInfo("parent", 1, 2, 4); + TargetRegionEntryInfo EntryInfo("parent", 1, 2, 4, 0); InfoManager.initializeTargetRegionEntryInfo(EntryInfo, 0); + EXPECT_TRUE(InfoManager.hasTargetRegionEntryInfo(EntryInfo, true)); InfoManager.initializeDeviceGlobalVarEntryInfo( "gvar", OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo, 0); InfoManager.registerTargetRegionEntryInfo( @@ -5514,7 +5515,6 @@ TEST_F(OpenMPIRBuilderTest, OffloadEntriesInfoManager) { InfoManager.registerDeviceGlobalVarEntryInfo( "gvar", 0x0, 8, OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo, GlobalValue::WeakAnyLinkage, true); - EXPECT_TRUE(InfoManager.hasTargetRegionEntryInfo(EntryInfo, true)); EXPECT_TRUE(InfoManager.hasDeviceGlobalVarEntryInfo("gvar")); } } // namespace diff --git a/llvm/unittests/IR/InstructionsTest.cpp b/llvm/unittests/IR/InstructionsTest.cpp index c80bfa9762663..65cb48a7ab038 100644 --- a/llvm/unittests/IR/InstructionsTest.cpp +++ b/llvm/unittests/IR/InstructionsTest.cpp @@ -731,11 +731,11 @@ TEST(InstructionsTest, CloneCall) { // Test cloning an attribute. { AttrBuilder AB(C); - AB.addAttribute(Attribute::ReadOnly); + AB.addAttribute(Attribute::NoUnwind); Call->setAttributes( AttributeList::get(C, AttributeList::FunctionIndex, AB)); std::unique_ptr Clone(cast(Call->clone())); - EXPECT_TRUE(Clone->onlyReadsMemory()); + EXPECT_TRUE(Clone->doesNotThrow()); } } diff --git a/llvm/unittests/IR/VerifierTest.cpp b/llvm/unittests/IR/VerifierTest.cpp index cebb5973fddc1..23df893957ba1 100644 --- a/llvm/unittests/IR/VerifierTest.cpp +++ b/llvm/unittests/IR/VerifierTest.cpp @@ -271,5 +271,35 @@ TEST(VerifierTest, AttributesWrongContext) { EXPECT_TRUE(verifyFunction(*F2)); } +TEST(VerifierTest, SwitchInst) { + LLVMContext C; + Module M("M", C); + IntegerType *Int32Ty = Type::getInt32Ty(C); + FunctionType *FTy = FunctionType::get(Type::getVoidTy(C), {Int32Ty, Int32Ty}, + /*isVarArg=*/false); + Function *F = Function::Create(FTy, Function::ExternalLinkage, "foo", M); + BasicBlock *Entry = BasicBlock::Create(C, "entry", F); + BasicBlock *Default = BasicBlock::Create(C, "default", F); + BasicBlock *OnOne = BasicBlock::Create(C, "on_one", F); + BasicBlock *OnTwo = BasicBlock::Create(C, "on_two", F); + + BasicBlock *Exit = BasicBlock::Create(C, "exit", F); + + BranchInst::Create(Exit, Default); + BranchInst::Create(Exit, OnTwo); + BranchInst::Create(Exit, OnOne); + ReturnInst::Create(C, Exit); + + Value *Cond = F->getArg(0); + SwitchInst *Switch = SwitchInst::Create(Cond, Default, 2, Entry); + Switch->addCase(ConstantInt::get(Int32Ty, 1), OnOne); + Switch->addCase(ConstantInt::get(Int32Ty, 2), OnTwo); + + EXPECT_FALSE(verifyFunction(*F)); + // set one case value to function argument. + Switch->setOperand(2, F->getArg(1)); + EXPECT_TRUE(verifyFunction(*F)); +} + } // end anonymous namespace } // end namespace llvm diff --git a/llvm/unittests/Support/CMakeLists.txt b/llvm/unittests/Support/CMakeLists.txt index 5b80572c1650b..10da48c38bef0 100644 --- a/llvm/unittests/Support/CMakeLists.txt +++ b/llvm/unittests/Support/CMakeLists.txt @@ -40,6 +40,7 @@ add_llvm_unittest(SupportTests FileCollectorTest.cpp FileOutputBufferTest.cpp FileUtilitiesTest.cpp + FormatChkTest.cpp FormatVariadicTest.cpp FSUniqueIDTest.cpp GlobPatternTest.cpp diff --git a/llvm/unittests/Support/FormatChkTest.cpp b/llvm/unittests/Support/FormatChkTest.cpp new file mode 100644 index 0000000000000..48023b8e48b81 --- /dev/null +++ b/llvm/unittests/Support/FormatChkTest.cpp @@ -0,0 +1,314 @@ +//===- FormatChkTest.cpp - Unit tests for checked string formatting -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Format.h" +#include "gtest/gtest.h" +#include + +using namespace llvm; + +namespace { + +constexpr auto ST_Unknown = llvm::PrintfStyleFormatReader::ST_Unknown; +constexpr auto ST_WideChar = llvm::PrintfStyleFormatReader::ST_WideChar; +constexpr auto ST_Int = llvm::PrintfStyleFormatReader::ST_Int; +constexpr auto ST_Long = llvm::PrintfStyleFormatReader::ST_Long; +constexpr auto ST_LongLong = llvm::PrintfStyleFormatReader::ST_LongLong; +constexpr auto ST_IntMax = llvm::PrintfStyleFormatReader::ST_IntMax; +constexpr auto ST_Size = llvm::PrintfStyleFormatReader::ST_Size; +constexpr auto ST_Ptrdiff = llvm::PrintfStyleFormatReader::ST_Ptrdiff; +constexpr auto ST_Double = llvm::PrintfStyleFormatReader::ST_Double; +constexpr auto ST_LongDouble = llvm::PrintfStyleFormatReader::ST_LongDouble; +constexpr auto ST_CString = llvm::PrintfStyleFormatReader::ST_CString; +constexpr auto ST_WideCString = llvm::PrintfStyleFormatReader::ST_WideCString; +constexpr auto ST_VoidPointer = llvm::PrintfStyleFormatReader::ST_VoidPointer; +constexpr auto ST_Count_Char = llvm::PrintfStyleFormatReader::ST_Count_Char; +constexpr auto ST_Count_Short = llvm::PrintfStyleFormatReader::ST_Count_Short; +constexpr auto ST_Count_Int = llvm::PrintfStyleFormatReader::ST_Count_Int; +constexpr auto ST_Count_Long = llvm::PrintfStyleFormatReader::ST_Count_Long; +constexpr auto ST_Count_LongLong = + llvm::PrintfStyleFormatReader::ST_Count_LongLong; +constexpr auto ST_Count_IntMax = llvm::PrintfStyleFormatReader::ST_Count_IntMax; +constexpr auto ST_Count_Size = llvm::PrintfStyleFormatReader::ST_Count_Size; +constexpr auto ST_Count_Ptrdiff = + llvm::PrintfStyleFormatReader::ST_Count_Ptrdiff; + +using STVec = std::vector; + +STVec ParseFormatString(const char *Fmt) { + STVec Result; + PrintfStyleFormatReader Reader(Fmt); + while (auto Spec = Reader.nextSpecifier()) { + Result.push_back(Spec); + } + return Result; +} + +#define EXPECT_FMT_EQ(FMT, ...) \ + EXPECT_EQ(ParseFormatString(FMT), STVec({__VA_ARGS__})) + +} // namespace + +TEST(FormatReader, EmptyFormatString) { + EXPECT_EQ(ParseFormatString(""), + std::vector()); +} + +TEST(FormatReader, PercentEscape) { + EXPECT_EQ(ParseFormatString("%%"), + std::vector()); +} + +TEST(FormatReader, PercentAtEnd) { EXPECT_FMT_EQ("%", ST_Unknown); } + +TEST(FormatReader, PercentWithWidth) { EXPECT_FMT_EQ("%ll%", ST_Unknown); } + +TEST(FormatReader, OneFormat) { + EXPECT_FMT_EQ("%i xx", ST_Int); + EXPECT_FMT_EQ("yy %i", ST_Int); + EXPECT_FMT_EQ("yy %i xx", ST_Int); +} + +TEST(FormatReader, TwoFormats) { + EXPECT_FMT_EQ("%i yy %f xx", ST_Int, ST_Double); + EXPECT_FMT_EQ("zz %i yy %f", ST_Int, ST_Double); + EXPECT_FMT_EQ("zz %i yy %f xx", ST_Int, ST_Double); +} + +TEST(FormatReader, PoundFlagValid) { + EXPECT_FMT_EQ("%#x", ST_Int); + EXPECT_FMT_EQ("%#X", ST_Int); + EXPECT_FMT_EQ("%#a", ST_Double); + EXPECT_FMT_EQ("%#A", ST_Double); + EXPECT_FMT_EQ("%#e", ST_Double); + EXPECT_FMT_EQ("%#E", ST_Double); + EXPECT_FMT_EQ("%#f", ST_Double); + EXPECT_FMT_EQ("%#F", ST_Double); + EXPECT_FMT_EQ("%#g", ST_Double); + EXPECT_FMT_EQ("%#G", ST_Double); + + EXPECT_FMT_EQ("%#p", ST_Unknown); + EXPECT_FMT_EQ("%#i", ST_Unknown); + EXPECT_FMT_EQ("%#c", ST_Unknown); + EXPECT_FMT_EQ("%#s", ST_Unknown); + EXPECT_FMT_EQ("%#d", ST_Unknown); + EXPECT_FMT_EQ("%#u", ST_Unknown); + EXPECT_FMT_EQ("%#o", ST_Unknown); + EXPECT_FMT_EQ("%#n", ST_Unknown); +} + +TEST(FormatReader, ZeroFlagValid) { + EXPECT_FMT_EQ("%0x", ST_Int); + EXPECT_FMT_EQ("%0X", ST_Int); + EXPECT_FMT_EQ("%0i", ST_Int); + EXPECT_FMT_EQ("%0d", ST_Int); + EXPECT_FMT_EQ("%0u", ST_Int); + EXPECT_FMT_EQ("%0o", ST_Int); + EXPECT_FMT_EQ("%0a", ST_Double); + EXPECT_FMT_EQ("%0A", ST_Double); + EXPECT_FMT_EQ("%0e", ST_Double); + EXPECT_FMT_EQ("%0E", ST_Double); + EXPECT_FMT_EQ("%0f", ST_Double); + EXPECT_FMT_EQ("%0F", ST_Double); + EXPECT_FMT_EQ("%0g", ST_Double); + EXPECT_FMT_EQ("%0G", ST_Double); + + EXPECT_FMT_EQ("%0p", ST_Unknown); + EXPECT_FMT_EQ("%0n", ST_Unknown); + EXPECT_FMT_EQ("%0c", ST_Unknown); + EXPECT_FMT_EQ("%0s", ST_Unknown); +} + +TEST(FormatReader, PrecisionValid) { + EXPECT_FMT_EQ("%.1x", ST_Int); + EXPECT_FMT_EQ("%.1X", ST_Int); + EXPECT_FMT_EQ("%.1i", ST_Int); + EXPECT_FMT_EQ("%.1d", ST_Int); + EXPECT_FMT_EQ("%.1u", ST_Int); + EXPECT_FMT_EQ("%.1o", ST_Int); + EXPECT_FMT_EQ("%.1a", ST_Double); + EXPECT_FMT_EQ("%.1A", ST_Double); + EXPECT_FMT_EQ("%.1e", ST_Double); + EXPECT_FMT_EQ("%.1E", ST_Double); + EXPECT_FMT_EQ("%.1f", ST_Double); + EXPECT_FMT_EQ("%.1F", ST_Double); + EXPECT_FMT_EQ("%.1g", ST_Double); + EXPECT_FMT_EQ("%.1G", ST_Double); + EXPECT_FMT_EQ("%.1s", ST_CString); + + EXPECT_FMT_EQ("%.1p", ST_Unknown); + EXPECT_FMT_EQ("%.1n", ST_Unknown); + EXPECT_FMT_EQ("%.1c", ST_Unknown); +} + +TEST(FormatReader, LongWidth) { + EXPECT_FMT_EQ("%1li", ST_Long); + EXPECT_FMT_EQ("%11li", ST_Long); + EXPECT_FMT_EQ("%1111li", ST_Long); + EXPECT_FMT_EQ("%10li", ST_Long); + EXPECT_FMT_EQ("%*li", ST_Int, ST_Long); + EXPECT_FMT_EQ("%*l!", ST_Unknown); +} + +TEST(FormatReader, LongPrecision) { + EXPECT_FMT_EQ("%.1li", ST_Long); + EXPECT_FMT_EQ("%.11li", ST_Long); + EXPECT_FMT_EQ("%.1111li", ST_Long); + EXPECT_FMT_EQ("%.10li", ST_Long); + EXPECT_FMT_EQ("%.*li", ST_Int, ST_Long); + EXPECT_FMT_EQ("%.*l!", ST_Unknown); + + EXPECT_FMT_EQ("%1.1li", ST_Long); + EXPECT_FMT_EQ("%11.11li", ST_Long); + EXPECT_FMT_EQ("%111.1111li", ST_Long); + EXPECT_FMT_EQ("%110.10li", ST_Long); + EXPECT_FMT_EQ("%1.*li", ST_Int, ST_Long); + EXPECT_FMT_EQ("%1.*l!", ST_Unknown); + + EXPECT_FMT_EQ("%*.*li", ST_Int, ST_Int, ST_Long); + EXPECT_FMT_EQ("%*.*l!", ST_Unknown); +} + +TEST(FormatReader, IntSpecifiers) { + EXPECT_FMT_EQ("%hhi", ST_Int); + EXPECT_FMT_EQ("%hhd", ST_Int); + EXPECT_FMT_EQ("%hi", ST_Int); + EXPECT_FMT_EQ("%hd", ST_Int); + EXPECT_FMT_EQ("%i", ST_Int); + EXPECT_FMT_EQ("%d", ST_Int); + EXPECT_FMT_EQ("%li", ST_Long); + EXPECT_FMT_EQ("%ld", ST_Long); + EXPECT_FMT_EQ("%lli", ST_LongLong); + EXPECT_FMT_EQ("%lld", ST_LongLong); + EXPECT_FMT_EQ("%ji", ST_IntMax); + EXPECT_FMT_EQ("%jd", ST_IntMax); + EXPECT_FMT_EQ("%zi", ST_Size); + EXPECT_FMT_EQ("%zd", ST_Size); + EXPECT_FMT_EQ("%ti", ST_Ptrdiff); + EXPECT_FMT_EQ("%td", ST_Ptrdiff); + + EXPECT_FMT_EQ("%Li", ST_Unknown); + EXPECT_FMT_EQ("%Ld", ST_Unknown); +} + +TEST(FormatReader, UIntSpecifiers) { + EXPECT_FMT_EQ("%hhu", ST_Int); + EXPECT_FMT_EQ("%hho", ST_Int); + EXPECT_FMT_EQ("%hhx", ST_Int); + EXPECT_FMT_EQ("%hhX", ST_Int); + EXPECT_FMT_EQ("%hu", ST_Int); + EXPECT_FMT_EQ("%ho", ST_Int); + EXPECT_FMT_EQ("%hx", ST_Int); + EXPECT_FMT_EQ("%hX", ST_Int); + EXPECT_FMT_EQ("%u", ST_Int); + EXPECT_FMT_EQ("%o", ST_Int); + EXPECT_FMT_EQ("%x", ST_Int); + EXPECT_FMT_EQ("%X", ST_Int); + EXPECT_FMT_EQ("%lu", ST_Long); + EXPECT_FMT_EQ("%lo", ST_Long); + EXPECT_FMT_EQ("%lx", ST_Long); + EXPECT_FMT_EQ("%lX", ST_Long); + EXPECT_FMT_EQ("%llu", ST_LongLong); + EXPECT_FMT_EQ("%llo", ST_LongLong); + EXPECT_FMT_EQ("%llx", ST_LongLong); + EXPECT_FMT_EQ("%llX", ST_LongLong); + EXPECT_FMT_EQ("%ju", ST_IntMax); + EXPECT_FMT_EQ("%jo", ST_IntMax); + EXPECT_FMT_EQ("%jx", ST_IntMax); + EXPECT_FMT_EQ("%jX", ST_IntMax); + EXPECT_FMT_EQ("%zu", ST_Size); + EXPECT_FMT_EQ("%zo", ST_Size); + EXPECT_FMT_EQ("%zx", ST_Size); + EXPECT_FMT_EQ("%zX", ST_Size); + EXPECT_FMT_EQ("%tu", ST_Ptrdiff); + EXPECT_FMT_EQ("%to", ST_Ptrdiff); + EXPECT_FMT_EQ("%tx", ST_Ptrdiff); + EXPECT_FMT_EQ("%tX", ST_Ptrdiff); + + EXPECT_FMT_EQ("%Lu", ST_Unknown); + EXPECT_FMT_EQ("%Lo", ST_Unknown); + EXPECT_FMT_EQ("%Lx", ST_Unknown); + EXPECT_FMT_EQ("%LX", ST_Unknown); +} + +TEST(FormatReader, FloatSpecifiers) { + EXPECT_FMT_EQ("%a", ST_Double); + EXPECT_FMT_EQ("%e", ST_Double); + EXPECT_FMT_EQ("%f", ST_Double); + EXPECT_FMT_EQ("%g", ST_Double); + EXPECT_FMT_EQ("%la", ST_Double); + EXPECT_FMT_EQ("%le", ST_Double); + EXPECT_FMT_EQ("%lf", ST_Double); + EXPECT_FMT_EQ("%lg", ST_Double); + + EXPECT_FMT_EQ("%La", ST_LongDouble); + EXPECT_FMT_EQ("%Le", ST_LongDouble); + EXPECT_FMT_EQ("%Lf", ST_LongDouble); + EXPECT_FMT_EQ("%Lg", ST_LongDouble); + + EXPECT_FMT_EQ("%ha", ST_Unknown); + EXPECT_FMT_EQ("%he", ST_Unknown); + EXPECT_FMT_EQ("%hf", ST_Unknown); + EXPECT_FMT_EQ("%hg", ST_Unknown); + EXPECT_FMT_EQ("%hha", ST_Unknown); + EXPECT_FMT_EQ("%hhe", ST_Unknown); + EXPECT_FMT_EQ("%hhf", ST_Unknown); + EXPECT_FMT_EQ("%hhg", ST_Unknown); + EXPECT_FMT_EQ("%lla", ST_Unknown); + EXPECT_FMT_EQ("%lle", ST_Unknown); + EXPECT_FMT_EQ("%llf", ST_Unknown); + EXPECT_FMT_EQ("%llg", ST_Unknown); +} + +TEST(FormatReader, CharSpecifiers) { + EXPECT_FMT_EQ("%hhc", ST_Unknown); + EXPECT_FMT_EQ("%hc", ST_Unknown); + EXPECT_FMT_EQ("%c", ST_Int); + EXPECT_FMT_EQ("%lc", ST_WideChar); + EXPECT_FMT_EQ("%llc", ST_Unknown); + EXPECT_FMT_EQ("%jc", ST_Unknown); + EXPECT_FMT_EQ("%zc", ST_Unknown); + EXPECT_FMT_EQ("%tc", ST_Unknown); + EXPECT_FMT_EQ("%Lc", ST_Unknown); +} + +TEST(FormatReader, StringSpecifiers) { + EXPECT_FMT_EQ("%hhs", ST_Unknown); + EXPECT_FMT_EQ("%hs", ST_Unknown); + EXPECT_FMT_EQ("%s", ST_CString); + EXPECT_FMT_EQ("%ls", ST_WideCString); + EXPECT_FMT_EQ("%lls", ST_Unknown); + EXPECT_FMT_EQ("%js", ST_Unknown); + EXPECT_FMT_EQ("%zs", ST_Unknown); + EXPECT_FMT_EQ("%ts", ST_Unknown); + EXPECT_FMT_EQ("%Ls", ST_Unknown); +} + +TEST(FormatReader, VoidPointerSpecifiers) { + EXPECT_FMT_EQ("%hhp", ST_Unknown); + EXPECT_FMT_EQ("%hp", ST_Unknown); + EXPECT_FMT_EQ("%p", ST_VoidPointer); + EXPECT_FMT_EQ("%lp", ST_Unknown); + EXPECT_FMT_EQ("%llp", ST_Unknown); + EXPECT_FMT_EQ("%jp", ST_Unknown); + EXPECT_FMT_EQ("%zp", ST_Unknown); + EXPECT_FMT_EQ("%tp", ST_Unknown); + EXPECT_FMT_EQ("%Lp", ST_Unknown); +} + +TEST(FormatReader, CountSpecifiers) { + EXPECT_FMT_EQ("%hhn", ST_Count_Char); + EXPECT_FMT_EQ("%hn", ST_Count_Short); + EXPECT_FMT_EQ("%n", ST_Count_Int); + EXPECT_FMT_EQ("%ln", ST_Count_Long); + EXPECT_FMT_EQ("%lln", ST_Count_LongLong); + EXPECT_FMT_EQ("%jn", ST_Count_IntMax); + EXPECT_FMT_EQ("%zn", ST_Count_Size); + EXPECT_FMT_EQ("%tn", ST_Count_Ptrdiff); + EXPECT_FMT_EQ("%Ln", ST_Unknown); +} diff --git a/llvm/unittests/Support/TargetParserTest.cpp b/llvm/unittests/Support/TargetParserTest.cpp index 805084ac1f295..dd4e5fcd0fa22 100644 --- a/llvm/unittests/Support/TargetParserTest.cpp +++ b/llvm/unittests/Support/TargetParserTest.cpp @@ -1016,6 +1016,19 @@ INSTANTIATE_TEST_SUITE_P( AArch64::AEK_FLAGM | AArch64::AEK_SB | AArch64::AEK_I8MM | AArch64::AEK_BF16, "9-A"), + ARMCPUTestParams("cortex-a715", "armv9-a", "neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_FP | AArch64::AEK_BF16 | + AArch64::AEK_SIMD | AArch64::AEK_RAS | + AArch64::AEK_LSE | AArch64::AEK_RDM | + AArch64::AEK_RCPC | AArch64::AEK_DOTPROD | + AArch64::AEK_MTE | AArch64::AEK_PAUTH | + AArch64::AEK_SVE | AArch64::AEK_SVE2 | + AArch64::AEK_SVE2BITPERM | AArch64::AEK_SSBS | + AArch64::AEK_SB | AArch64::AEK_I8MM | + AArch64::AEK_PERFMON | AArch64::AEK_PREDRES | + AArch64::AEK_PROFILE | AArch64::AEK_FP16FML | + AArch64::AEK_FP16 | AArch64::AEK_FLAGM, + "9-A"), ARMCPUTestParams( "neoverse-v1", "armv8.4-a", "crypto-neon-fp-armv8", AArch64::AEK_RAS | AArch64::AEK_SVE | AArch64::AEK_SSBS | @@ -1296,7 +1309,7 @@ INSTANTIATE_TEST_SUITE_P( "8.2-A"))); // Note: number of CPUs includes aliases. -static constexpr unsigned NumAArch64CPUArchs = 59; +static constexpr unsigned NumAArch64CPUArchs = 60; TEST(TargetParserTest, testAArch64CPUArchList) { SmallVector List; @@ -1506,7 +1519,8 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) { AArch64::AEK_BRBE, AArch64::AEK_PAUTH, AArch64::AEK_FLAGM, AArch64::AEK_SME, AArch64::AEK_SMEF64F64, AArch64::AEK_SMEI16I64, AArch64::AEK_SME2, AArch64::AEK_HBC, AArch64::AEK_MOPS, - AArch64::AEK_PERFMON, AArch64::AEK_SVE2p1}; + AArch64::AEK_PERFMON, AArch64::AEK_SVE2p1, AArch64::AEK_SME2p1, + AArch64::AEK_B16B16}; std::vector Features; @@ -1546,6 +1560,7 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) { EXPECT_TRUE(llvm::is_contained(Features, "+sve2-sha3")); EXPECT_TRUE(llvm::is_contained(Features, "+sve2-bitperm")); EXPECT_TRUE(llvm::is_contained(Features, "+sve2p1")); + EXPECT_TRUE(llvm::is_contained(Features, "+b16b16")); EXPECT_TRUE(llvm::is_contained(Features, "+rcpc")); EXPECT_TRUE(llvm::is_contained(Features, "+rand")); EXPECT_TRUE(llvm::is_contained(Features, "+mte")); @@ -1565,6 +1580,7 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) { EXPECT_TRUE(llvm::is_contained(Features, "+sme-f64f64")); EXPECT_TRUE(llvm::is_contained(Features, "+sme-i16i64")); EXPECT_TRUE(llvm::is_contained(Features, "+sme2")); + EXPECT_TRUE(llvm::is_contained(Features, "+sme2p1")); EXPECT_TRUE(llvm::is_contained(Features, "+hbc")); EXPECT_TRUE(llvm::is_contained(Features, "+mops")); EXPECT_TRUE(llvm::is_contained(Features, "+perfmon")); @@ -1628,6 +1644,7 @@ TEST(TargetParserTest, AArch64ArchExtFeature) { {"sve2-sm4", "nosve2-sm4", "+sve2-sm4", "-sve2-sm4"}, {"sve2-sha3", "nosve2-sha3", "+sve2-sha3", "-sve2-sha3"}, {"sve2p1", "nosve2p1", "+sve2p1", "-sve2p1"}, + {"b16b16", "nob16b16", "+b16b16", "-b16b16"}, {"sve2-bitperm", "nosve2-bitperm", "+sve2-bitperm", "-sve2-bitperm"}, {"dotprod", "nodotprod", "+dotprod", "-dotprod"}, {"rcpc", "norcpc", "+rcpc", "-rcpc"}, @@ -1645,6 +1662,7 @@ TEST(TargetParserTest, AArch64ArchExtFeature) { {"sme-f64f64", "nosme-f64f64", "+sme-f64f64", "-sme-f64f64"}, {"sme-i16i64", "nosme-i16i64", "+sme-i16i64", "-sme-i16i64"}, {"sme2", "nosme2", "+sme2", "-sme2"}, + {"sme2p1", "nosme2p1", "+sme2p1", "-sme2p1"}, {"hbc", "nohbc", "+hbc", "-hbc"}, {"mops", "nomops", "+mops", "-mops"}, {"pmuv3", "nopmuv3", "+perfmon", "-perfmon"}, diff --git a/llvm/utils/TableGen/CodeGenTarget.cpp b/llvm/utils/TableGen/CodeGenTarget.cpp index 8384a8ceab625..fba168f6e6981 100644 --- a/llvm/utils/TableGen/CodeGenTarget.cpp +++ b/llvm/utils/TableGen/CodeGenTarget.cpp @@ -91,6 +91,7 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) { case MVT::v256i1: return "MVT::v256i1"; case MVT::v512i1: return "MVT::v512i1"; case MVT::v1024i1: return "MVT::v1024i1"; + case MVT::v2048i1: return "MVT::v2048i1"; case MVT::v128i2: return "MVT::v128i2"; case MVT::v256i2: return "MVT::v256i2"; case MVT::v64i4: return "MVT::v64i4"; diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp index 05d81bae0a9d3..f46cb4c77f6dd 100644 --- a/llvm/utils/TableGen/DecoderEmitter.cpp +++ b/llvm/utils/TableGen/DecoderEmitter.cpp @@ -1906,6 +1906,69 @@ void parseVarLenInstOperand(const Record &Def, } } +static void addOneOperandFields(const Record &EncodingDef, const BitsInit &Bits, + std::map &TiedNames, + StringRef OpName, OperandInfo &OpInfo) { + // Some bits of the operand may be required to be 1 depending on the + // instruction's encoding. Collect those bits. + if (const RecordVal *EncodedValue = EncodingDef.getValue(OpName)) + if (const BitsInit *OpBits = dyn_cast(EncodedValue->getValue())) + for (unsigned I = 0; I < OpBits->getNumBits(); ++I) + if (const BitInit *OpBit = dyn_cast(OpBits->getBit(I))) + if (OpBit->getValue()) + OpInfo.InitValue |= 1ULL << I; + + unsigned Base = ~0U; + unsigned Width = 0; + unsigned Offset = 0; + + for (unsigned bi = 0; bi < Bits.getNumBits(); ++bi) { + VarInit *Var = nullptr; + VarBitInit *BI = dyn_cast(Bits.getBit(bi)); + if (BI) + Var = dyn_cast(BI->getBitVar()); + else + Var = dyn_cast(Bits.getBit(bi)); + + if (!Var) { + if (Base != ~0U) { + OpInfo.addField(Base, Width, Offset); + Base = ~0U; + Width = 0; + Offset = 0; + } + continue; + } + + if ((Var->getName() != OpName && + Var->getName() != TiedNames[std::string(OpName)])) { + if (Base != ~0U) { + OpInfo.addField(Base, Width, Offset); + Base = ~0U; + Width = 0; + Offset = 0; + } + continue; + } + + if (Base == ~0U) { + Base = bi; + Width = 1; + Offset = BI ? BI->getBitNum() : 0; + } else if (BI && BI->getBitNum() != Offset + Width) { + OpInfo.addField(Base, Width, Offset); + Base = bi; + Width = 1; + Offset = BI->getBitNum(); + } else { + ++Width; + } + } + + if (Base != ~0U) + OpInfo.addField(Base, Width, Offset); +} + static unsigned populateInstruction(CodeGenTarget &Target, const Record &EncodingDef, const CodeGenInstruction &CGI, unsigned Opc, @@ -2119,21 +2182,24 @@ populateInstruction(CodeGenTarget &Target, const Record &EncodingDef, // For each operand, see if we can figure out where it is encoded. for (const auto &Op : InOutOperands) { + Init *OpInit = Op.first; + StringRef OpName = Op.second; + if (SupportPositionalDecoding) { - if (!NumberedInsnOperands[std::string(Op.second)].empty()) { + if (!NumberedInsnOperands[std::string(OpName)].empty()) { llvm::append_range(InsnOperands, - NumberedInsnOperands[std::string(Op.second)]); + NumberedInsnOperands[std::string(OpName)]); continue; } - if (!NumberedInsnOperands[TiedNames[std::string(Op.second)]].empty()) { + if (!NumberedInsnOperands[TiedNames[std::string(OpName)]].empty()) { if (!NumberedInsnOperandsNoTie.count( - TiedNames[std::string(Op.second)])) { + TiedNames[std::string(OpName)])) { // Figure out to which (sub)operand we're tied. unsigned i = - CGI.Operands.getOperandNamed(TiedNames[std::string(Op.second)]); + CGI.Operands.getOperandNamed(TiedNames[std::string(OpName)]); int tiedTo = CGI.Operands[i].getTiedRegister(); if (tiedTo == -1) { - i = CGI.Operands.getOperandNamed(Op.second); + i = CGI.Operands.getOperandNamed(OpName); tiedTo = CGI.Operands[i].getTiedRegister(); } @@ -2142,7 +2208,7 @@ populateInstruction(CodeGenTarget &Target, const Record &EncodingDef, CGI.Operands.getSubOperandNumber(tiedTo); InsnOperands.push_back( - NumberedInsnOperands[TiedNames[std::string(Op.second)]] + NumberedInsnOperands[TiedNames[std::string(OpName)]] [SO.second]); } } @@ -2154,76 +2220,15 @@ populateInstruction(CodeGenTarget &Target, const Record &EncodingDef, // to interpret it. As a first step, require the target to provide // callbacks for decoding register classes. - Init *OpInit = Op.first; if (DagInit *Dag = dyn_cast(OpInit)) OpInit = Dag->getOperator(); OperandInfo OpInfo = getOpInfo(cast(OpInit)->getDef()); - // Some bits of the operand may be required to be 1 depending on the - // instruction's encoding. Collect those bits. - if (const RecordVal *EncodedValue = EncodingDef.getValue(Op.second)) - if (const BitsInit *OpBits = - dyn_cast(EncodedValue->getValue())) - for (unsigned I = 0; I < OpBits->getNumBits(); ++I) - if (const BitInit *OpBit = dyn_cast(OpBits->getBit(I))) - if (OpBit->getValue()) - OpInfo.InitValue |= 1ULL << I; - - unsigned Base = ~0U; - unsigned Width = 0; - unsigned Offset = 0; - - for (unsigned bi = 0; bi < Bits.getNumBits(); ++bi) { - VarInit *Var = nullptr; - VarBitInit *BI = dyn_cast(Bits.getBit(bi)); - if (BI) - Var = dyn_cast(BI->getBitVar()); - else - Var = dyn_cast(Bits.getBit(bi)); - - if (!Var) { - if (Base != ~0U) { - OpInfo.addField(Base, Width, Offset); - Base = ~0U; - Width = 0; - Offset = 0; - } - continue; - } - - if ((Var->getName() != Op.second && - Var->getName() != TiedNames[std::string(Op.second)])) { - if (Base != ~0U) { - OpInfo.addField(Base, Width, Offset); - Base = ~0U; - Width = 0; - Offset = 0; - } - continue; - } - - if (Base == ~0U) { - Base = bi; - Width = 1; - Offset = BI ? BI->getBitNum() : 0; - } else if (BI && BI->getBitNum() != Offset + Width) { - OpInfo.addField(Base, Width, Offset); - Base = bi; - Width = 1; - Offset = BI->getBitNum(); - } else { - ++Width; - } - } - - if (Base != ~0U) - OpInfo.addField(Base, Width, Offset); - + addOneOperandFields(EncodingDef, Bits, TiedNames, OpName, OpInfo); if (OpInfo.numFields() > 0) InsnOperands.push_back(OpInfo); } } - Operands[Opc] = InsnOperands; #if 0 diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchDag.cpp b/llvm/utils/TableGen/GlobalISel/GIMatchDag.cpp index 8be32d2effa6e..56082f374ae80 100644 --- a/llvm/utils/TableGen/GlobalISel/GIMatchDag.cpp +++ b/llvm/utils/TableGen/GlobalISel/GIMatchDag.cpp @@ -61,10 +61,11 @@ void GIMatchDag::writeDOTGraph(raw_ostream &OS, StringRef ID) const { const char *ToFmt = "Node%p:d%d:s"; if (E->getFromMO()->isDef() && !E->getToMO()->isDef()) std::swap(FromFmt, ToFmt); - auto From = format(FromFmt, E->getFromMI(), E->getFromMO()->getIdx()); - auto To = format(ToFmt, E->getToMI(), E->getToMO()->getIdx()); - if (E->getFromMO()->isDef() && !E->getToMO()->isDef()) - std::swap(From, To); + auto FromF = format(FromFmt, E->getFromMI(), E->getFromMO()->getIdx()); + auto ToF = format(ToFmt, E->getToMI(), E->getToMO()->getIdx()); + bool Swap = E->getFromMO()->isDef() && !E->getToMO()->isDef(); + auto &From = Swap ? ToF : FromF; + auto &To = Swap ? FromF : ToF; OS << " " << From << " -> " << To << " [label=\"$" << E->getName(); if (E->getFromMO()->isDef() == E->getToMO()->isDef()) diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp index ec3023f843db4..a6d7e2ce949bf 100644 --- a/llvm/utils/TableGen/IntrinsicEmitter.cpp +++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp @@ -776,49 +776,53 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints, case CodeGenIntrinsic::NoMem: if (Intrinsic.hasSideEffects) break; - OS << " Attribute::get(C, Attribute::ReadNone),\n"; + OS << " Attribute::getWithMemoryEffects(C, " + << "MemoryEffects::none()),\n"; break; case CodeGenIntrinsic::ReadArgMem: - OS << " Attribute::get(C, Attribute::ReadOnly),\n"; - OS << " Attribute::get(C, Attribute::ArgMemOnly),\n"; + OS << " Attribute::getWithMemoryEffects(C, " + << "MemoryEffects::argMemOnly(ModRefInfo::Ref)),\n"; break; case CodeGenIntrinsic::ReadMem: - OS << " Attribute::get(C, Attribute::ReadOnly),\n"; + OS << " Attribute::getWithMemoryEffects(C, " + << "MemoryEffects::readOnly()),\n"; break; case CodeGenIntrinsic::ReadInaccessibleMem: - OS << " Attribute::get(C, Attribute::ReadOnly),\n"; - OS << " Attribute::get(C, Attribute::InaccessibleMemOnly),\n"; + OS << " Attribute::getWithMemoryEffects(C, " + << "MemoryEffects::inaccessibleMemOnly(ModRefInfo::Ref)),\n"; break; case CodeGenIntrinsic::ReadInaccessibleMemOrArgMem: - OS << " Attribute::get(C, Attribute::ReadOnly),\n"; - OS << " Attribute::get(C, " - << "Attribute::InaccessibleMemOrArgMemOnly),\n"; + OS << " Attribute::getWithMemoryEffects(C, " + << "MemoryEffects::inaccessibleOrArgMemOnly(ModRefInfo::Ref)),\n"; + break; break; case CodeGenIntrinsic::WriteArgMem: - OS << " Attribute::get(C, Attribute::WriteOnly),\n"; - OS << " Attribute::get(C, Attribute::ArgMemOnly),\n"; + OS << " Attribute::getWithMemoryEffects(C, " + << "MemoryEffects::argMemOnly(ModRefInfo::Mod)),\n"; break; case CodeGenIntrinsic::WriteMem: - OS << " Attribute::get(C, Attribute::WriteOnly),\n"; + OS << " Attribute::getWithMemoryEffects(C, " + << "MemoryEffects::writeOnly()),\n"; break; case CodeGenIntrinsic::WriteInaccessibleMem: - OS << " Attribute::get(C, Attribute::WriteOnly),\n"; - OS << " Attribute::get(C, Attribute::InaccessibleMemOnly),\n"; + OS << " Attribute::getWithMemoryEffects(C, " + << "MemoryEffects::inaccessibleMemOnly(ModRefInfo::Mod)),\n"; break; case CodeGenIntrinsic::WriteInaccessibleMemOrArgMem: - OS << " Attribute::get(C, Attribute::WriteOnly),\n"; - OS << " Attribute::get(C, " - << "Attribute::InaccessibleMemOrArgMemOnly),\n"; + OS << " Attribute::getWithMemoryEffects(C, " + << "MemoryEffects::inaccessibleOrArgMemOnly(ModRefInfo::Mod)),\n"; break; case CodeGenIntrinsic::ReadWriteArgMem: - OS << " Attribute::get(C, Attribute::ArgMemOnly),\n"; + OS << " Attribute::getWithMemoryEffects(C, " + << "MemoryEffects::argMemOnly(ModRefInfo::ModRef)),\n"; break; case CodeGenIntrinsic::ReadWriteInaccessibleMem: - OS << " Attribute::get(C, Attribute::InaccessibleMemOnly),\n"; + OS << " Attribute::getWithMemoryEffects(C, " + << "MemoryEffects::inaccessibleMemOnly(ModRefInfo::ModRef)),\n"; break; case CodeGenIntrinsic::ReadWriteInaccessibleMemOrArgMem: - OS << " Attribute::get(C, " - << "Attribute::InaccessibleMemOrArgMemOnly),\n"; + OS << " Attribute::getWithMemoryEffects(C, " + << "MemoryEffects::inaccessibleOrArgMemOnly(ModRefInfo::ModRef)),\n"; break; case CodeGenIntrinsic::ReadWriteMem: break; diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py index 127a7404431ed..6ed9d4aac4b76 100644 --- a/llvm/utils/UpdateTestChecks/common.py +++ b/llvm/utils/UpdateTestChecks/common.py @@ -327,7 +327,7 @@ def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False): UNUSED_NOTE = 'NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:' OPT_FUNCTION_RE = re.compile( - r'^(\s*;\s*Function\sAttrs:\s(?P[\w\s]+?))?\s*define\s+(?:internal\s+)?[^@]*@(?P[\w.$-]+?)\s*' + r'^(\s*;\s*Function\sAttrs:\s(?P[\w\s():,]+?))?\s*define\s+(?:internal\s+)?[^@]*@(?P[\w.$-]+?)\s*' r'(?P\((\)|(.*?[\w.-]+?)\))[^{]*\{)\n(?P.*?)^\}$', flags=(re.M | re.S)) diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn index 535d87d9f12dd..b8e99c34ca3c2 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn @@ -112,12 +112,12 @@ static_library("clangd") { "Preamble.cpp", "Protocol.cpp", "Quality.cpp", - "QueryDriverDatabase.cpp", "RIFF.cpp", "Selection.cpp", "SemanticHighlighting.cpp", "SemanticSelection.cpp", "SourceCode.cpp", + "SystemIncludeExtractor.cpp", "TUScheduler.cpp", "TidyProvider.cpp", "URI.cpp", diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index c9234ab175973..e7976a0f9b231 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -465,6 +465,7 @@ if (current_toolchain == default_toolchain) { "__memory/allocation_guard.h", "__memory/allocator.h", "__memory/allocator_arg_t.h", + "__memory/allocator_destructor.h", "__memory/allocator_traits.h", "__memory/assume_aligned.h", "__memory/auto_ptr.h", diff --git a/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn index b5a393afda93a..397b309d524bb 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn @@ -84,6 +84,7 @@ static_library("Support") { "FileOutputBuffer.cpp", "FileUtilities.cpp", "FoldingSet.cpp", + "Format.cpp", "FormatVariadic.cpp", "FormattedStream.cpp", "GlobPattern.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn index 4766c00f8c85d..619020ca8c43d 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn @@ -44,6 +44,7 @@ unittest("SupportTests") { "FileCollectorTest.cpp", "FileOutputBufferTest.cpp", "FileUtilitiesTest.cpp", + "FormatChkTest.cpp", "FormatVariadicTest.cpp", "GlobPatternTest.cpp", "HashBuilderTest.cpp", diff --git a/llvm/utils/release/test-release.sh b/llvm/utils/release/test-release.sh index 9208161c7da1b..cb01ed5bd8843 100755 --- a/llvm/utils/release/test-release.sh +++ b/llvm/utils/release/test-release.sh @@ -12,6 +12,7 @@ #===------------------------------------------------------------------------===# System=`uname -s` +Machine=`uname -m` if [ "$System" = "FreeBSD" ]; then MAKE=gmake else @@ -35,7 +36,6 @@ do_libcxxabi="yes" do_libunwind="yes" do_test_suite="yes" do_openmp="yes" -do_bolt="no" do_lld="yes" do_lldb="yes" do_polly="yes" @@ -47,6 +47,15 @@ ExtraConfigureFlags="" ExportBranch="" git_ref="" +do_bolt="no" +if [ "$System" = "Linux" ]; then + case $Machine in + x86_64 | arm64 | aarch64 ) + do_bolt="yes" + ;; + esac +fi + function usage() { echo "usage: `basename $0` -release X.Y.Z -rc NUM [OPTIONS]" echo "" diff --git a/mlir/docs/AttributesAndTypes.md b/mlir/docs/AttributesAndTypes.md index 7e54c2ee0cd1b..d19b1bf443ad7 100644 --- a/mlir/docs/AttributesAndTypes.md +++ b/mlir/docs/AttributesAndTypes.md @@ -959,6 +959,8 @@ User defined storage classes must adhere to the following: - Provide a method to hash an instance of the `KeyTy`. (Note: This is not necessary if an `llvm::DenseMapInfo` specialization exists) - `static llvm::hash_code hashKey(const KeyTy &)` +- Provide a method to generate the `KeyTy` from an instance of the storage class. + - `static KeyTy getAsKey()` Let's look at an example: @@ -997,6 +999,11 @@ struct ComplexTypeStorage : public TypeStorage { ComplexTypeStorage(key.first, key.second); } + /// Construct an instance of the key from this storage class. + KeyTy getAsKey() const { + return KeyTy(nonZeroParam, integerType); + } + /// The parametric data held by the storage class. unsigned nonZeroParam; Type integerType; diff --git a/mlir/docs/Dialects/GPU.md b/mlir/docs/Dialects/GPU.md index fc2391dd1ed92..4b138ca23d5d6 100644 --- a/mlir/docs/Dialects/GPU.md +++ b/mlir/docs/Dialects/GPU.md @@ -12,6 +12,8 @@ manipulations to launch a GPU kernel and provide a simple path towards GPU execution from MLIR. It may be targeted, for example, by DSLs using MLIR. The dialect uses `gpu` as its canonical prefix. +[TOC] + ## Memory attribution Memory buffers are defined at the function level, either in "gpu.launch" or in diff --git a/mlir/docs/PassManagement.md b/mlir/docs/PassManagement.md index 7748448ba1df4..9842cb9214461 100644 --- a/mlir/docs/PassManagement.md +++ b/mlir/docs/PassManagement.md @@ -602,7 +602,7 @@ A pipeline view that models the structure of the pass manager, this is the default view: ```shell -$ mlir-opt -pass-pipeline='func.func(my-pass,my-pass)' foo.mlir -mlir-pass-statistics +$ mlir-opt -pass-pipeline='any(func.func(my-pass,my-pass))' foo.mlir -mlir-pass-statistics ===-------------------------------------------------------------------------=== ... Pass statistics report ... @@ -621,7 +621,7 @@ A list view that aggregates the statistics of all instances of a specific pass together: ```shell -$ mlir-opt -pass-pipeline='func.func(my-pass, my-pass)' foo.mlir -mlir-pass-statistics -mlir-pass-statistics-display=list +$ mlir-opt -pass-pipeline='any(func.func(my-pass,my-pass))' foo.mlir -mlir-pass-statistics -mlir-pass-statistics-display=list ===-------------------------------------------------------------------------=== ... Pass statistics report ... @@ -750,10 +750,10 @@ Can also be specified as (via the `-pass-pipeline` flag): ```shell # Anchor the cse and canonicalize passes on the `func.func` operation. -$ mlir-opt foo.mlir -pass-pipeline='func.func(cse,canonicalize),convert-func-to-llvm{use-bare-ptr-memref-call-conv=1}' +$ mlir-opt foo.mlir -pass-pipeline='builtin.module(func.func(cse,canonicalize),convert-func-to-llvm{use-bare-ptr-memref-call-conv=1})' # Anchor the cse and canonicalize passes on "any" viable root operation. -$ mlir-opt foo.mlir -pass-pipeline='any(cse,canonicalize),convert-func-to-llvm{use-bare-ptr-memref-call-conv=1}' +$ mlir-opt foo.mlir -pass-pipeline='builtin.module(any(cse,canonicalize),convert-func-to-llvm{use-bare-ptr-memref-call-conv=1})' ``` In order to support round-tripping a pass to the textual representation using @@ -1121,7 +1121,7 @@ pipeline. This display mode is available in mlir-opt via `-mlir-timing-display=list`. ```shell -$ mlir-opt foo.mlir -mlir-disable-threading -pass-pipeline='func.func(cse,canonicalize)' -convert-func-to-llvm -mlir-timing -mlir-timing-display=list +$ mlir-opt foo.mlir -mlir-disable-threading -pass-pipeline='builtin.module(func.func(cse,canonicalize),convert-func-to-llvm)' -mlir-timing -mlir-timing-display=list ===-------------------------------------------------------------------------=== ... Pass execution timing report ... @@ -1146,7 +1146,7 @@ the most time, and can also be used to identify when analyses are being invalidated and recomputed. This is the default display mode. ```shell -$ mlir-opt foo.mlir -mlir-disable-threading -pass-pipeline='func.func(cse,canonicalize)' -convert-func-to-llvm -mlir-timing +$ mlir-opt foo.mlir -mlir-disable-threading -pass-pipeline='builtin.module(func.func(cse,canonicalize),convert-func-to-llvm)' -mlir-timing ===-------------------------------------------------------------------------=== ... Pass execution timing report ... @@ -1177,7 +1177,7 @@ perceived time, or clock time, whereas the `User Time` will display the total cpu time. ```shell -$ mlir-opt foo.mlir -pass-pipeline='func.func(cse,canonicalize)' -convert-func-to-llvm -mlir-timing +$ mlir-opt foo.mlir -pass-pipeline='builtin.module(func.func(cse,canonicalize),convert-func-to-llvm)' -mlir-timing ===-------------------------------------------------------------------------=== ... Pass execution timing report ... @@ -1328,7 +1328,7 @@ module { {-# external_resources: { mlir_reproducer: { - pipeline: "func.func(cse,canonicalize),inline", + pipeline: "builtin.module(func.func(cse,canonicalize),inline)", disable_threading: true, verify_each: true } @@ -1371,7 +1371,7 @@ module { {-# external_resources: { mlir_reproducer: { - pipeline: "func.func(canonicalize)", + pipeline: "builtin.module(func.func(canonicalize))", disable_threading: true, verify_each: true } diff --git a/mlir/include/mlir-c/Pass.h b/mlir/include/mlir-c/Pass.h index 704121a0cb096..721f1f28fe916 100644 --- a/mlir/include/mlir-c/Pass.h +++ b/mlir/include/mlir-c/Pass.h @@ -123,10 +123,12 @@ MLIR_CAPI_EXPORTED void mlirPrintPassPipeline(MlirOpPassManager passManager, MlirStringCallback callback, void *userData); -/// Parse a textual MLIR pass pipeline and add it to the provided OpPassManager. - +/// Parse a textual MLIR pass pipeline and assign it to the provided +/// OpPassManager. If parsing fails an error message is reported using the +/// provided callback. MLIR_CAPI_EXPORTED MlirLogicalResult -mlirParsePassPipeline(MlirOpPassManager passManager, MlirStringRef pipeline); +mlirParsePassPipeline(MlirOpPassManager passManager, MlirStringRef pipeline, + MlirStringCallback callback, void *userData); //===----------------------------------------------------------------------===// // External Pass API. diff --git a/mlir/include/mlir/Conversion/ArithCommon/AttrToLLVMConverter.h b/mlir/include/mlir/Conversion/ArithCommon/AttrToLLVMConverter.h new file mode 100644 index 0000000000000..f27f7bb5975ec --- /dev/null +++ b/mlir/include/mlir/Conversion/ArithCommon/AttrToLLVMConverter.h @@ -0,0 +1,81 @@ +//===- AttrToLLVMConverter.h - Arith attributes conversion ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_CONVERSION_ARITHCOMMON_ATTRTOLLVMCONVERTER_H +#define MLIR_CONVERSION_ARITHCOMMON_ATTRTOLLVMCONVERTER_H + +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" + +//===----------------------------------------------------------------------===// +// Support for converting Arith FastMathFlags to LLVM FastmathFlags +//===----------------------------------------------------------------------===// + +namespace mlir { +namespace arith { +// Map arithmetic fastmath enum values to LLVMIR enum values. +LLVM::FastmathFlags +convertArithFastMathFlagsToLLVM(arith::FastMathFlags arithFMF); + +// Create an LLVM fastmath attribute from a given arithmetic fastmath attribute. +LLVM::FastmathFlagsAttr +convertArithFastMathAttrToLLVM(arith::FastMathFlagsAttr fmfAttr); + +// Attribute converter that populates a NamedAttrList by removing the fastmath +// attribute from the source operation attributes, and replacing it with an +// equivalent LLVM fastmath attribute. +template +class AttrConvertFastMathToLLVM { +public: + AttrConvertFastMathToLLVM(SourceOp srcOp) { + // Copy the source attributes. + convertedAttr = NamedAttrList{srcOp->getAttrs()}; + // Get the name of the arith fastmath attribute. + llvm::StringRef arithFMFAttrName = SourceOp::getFastMathAttrName(); + // Remove the source fastmath attribute. + auto arithFMFAttr = + convertedAttr.erase(arithFMFAttrName) + .template dyn_cast_or_null(); + if (arithFMFAttr) { + llvm::StringRef targetAttrName = TargetOp::getFastmathAttrName(); + convertedAttr.set(targetAttrName, + convertArithFastMathAttrToLLVM(arithFMFAttr)); + } + } + + ArrayRef getAttrs() const { return convertedAttr.getAttrs(); } + +private: + NamedAttrList convertedAttr; +}; + +// Attribute converter that populates a NamedAttrList by removing the fastmath +// attribute from the source operation attributes. This may be useful for +// target operations that do not require the fastmath attribute, or for targets +// that do not yet support the LLVM fastmath attribute. +template +class AttrDropFastMath { +public: + AttrDropFastMath(SourceOp srcOp) { + // Copy the source attributes. + convertedAttr = NamedAttrList{srcOp->getAttrs()}; + // Get the name of the arith fastmath attribute. + llvm::StringRef arithFMFAttrName = SourceOp::getFastMathAttrName(); + // Remove the source fastmath attribute. + convertedAttr.erase(arithFMFAttrName); + } + + ArrayRef getAttrs() const { return convertedAttr.getAttrs(); } + +private: + NamedAttrList convertedAttr; +}; +} // namespace arith +} // namespace mlir + +#endif // MLIR_CONVERSION_ARITHCOMMON_ATTRTOLLVMCONVERTER_H diff --git a/mlir/include/mlir/Conversion/LLVMCommon/MemRefBuilder.h b/mlir/include/mlir/Conversion/LLVMCommon/MemRefBuilder.h index ac54ee6888136..2b7735da84666 100644 --- a/mlir/include/mlir/Conversion/LLVMCommon/MemRefBuilder.h +++ b/mlir/include/mlir/Conversion/LLVMCommon/MemRefBuilder.h @@ -43,6 +43,10 @@ class MemRefDescriptor : public StructBuilder { static MemRefDescriptor fromStaticShape(OpBuilder &builder, Location loc, LLVMTypeConverter &typeConverter, MemRefType type, Value memory); + static MemRefDescriptor fromStaticShape(OpBuilder &builder, Location loc, + LLVMTypeConverter &typeConverter, + MemRefType type, Value memory, + Value alignedMemory); /// Builds IR extracting the allocated pointer from the descriptor. Value allocatedPtr(OpBuilder &builder, Location loc); diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index 66ac9eedf1bfb..5631d60025fa8 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -118,10 +118,10 @@ def ConvertArithToSPIRV : Pass<"convert-arith-to-spirv"> { let constructor = "mlir::arith::createConvertArithToSPIRVPass()"; let dependentDialects = ["spirv::SPIRVDialect"]; let options = [ - Option<"emulateNon32BitScalarTypes", "emulate-non-32-bit-scalar-types", + Option<"emulateLT32BitScalarTypes", "emulate-lt-32-bit-scalar-types", "bool", /*default=*/"true", - "Emulate non-32-bit scalar types with 32-bit ones if " - "missing native support">, + "Emulate narrower scalar types with 32-bit ones if not supported by " + "the target">, Option<"enableFastMath", "enable-fast-math", "bool", /*default=*/"false", "Enable fast math mode (assuming no NaN and infinity for floating " @@ -155,6 +155,7 @@ def ConvertAsyncToLLVM : Pass<"convert-async-to-llvm", "ModuleOp"> { "arith::ArithDialect", "async::AsyncDialect", "LLVM::LLVMDialect", + "func::FuncDialect", ]; } @@ -259,10 +260,10 @@ def ConvertControlFlowToSPIRV : Pass<"convert-cf-to-spirv"> { let constructor = "mlir::createConvertControlFlowToSPIRVPass()"; let dependentDialects = ["spirv::SPIRVDialect"]; let options = [ - Option<"emulateNon32BitScalarTypes", "emulate-non-32-bit-scalar-types", + Option<"emulateLT32BitScalarTypes", "emulate-lt-32-bit-scalar-types", "bool", /*default=*/"true", - "Emulate non-32-bit scalar types with 32-bit ones if " - "missing native support"> + "Emulate narrower scalar types with 32-bit ones if not supported by" + " the target"> ]; } @@ -320,10 +321,10 @@ def ConvertFuncToSPIRV : Pass<"convert-func-to-spirv"> { let constructor = "mlir::createConvertFuncToSPIRVPass()"; let dependentDialects = ["spirv::SPIRVDialect"]; let options = [ - Option<"emulateNon32BitScalarTypes", "emulate-non-32-bit-scalar-types", + Option<"emulateLT32BitScalarTypes", "emulate-lt-32-bit-scalar-types", "bool", /*default=*/"true", - "Emulate non-32-bit scalar types with 32-bit ones if " - "missing native support"> + "Emulate narrower scalar types with 32-bit ones if not supported by" + " the target"> ]; } @@ -815,10 +816,10 @@ def ConvertTensorToSPIRV : Pass<"convert-tensor-to-spirv"> { let constructor = "mlir::createConvertTensorToSPIRVPass()"; let dependentDialects = ["spirv::SPIRVDialect"]; let options = [ - Option<"emulateNon32BitScalarTypes", "emulate-non-32-bit-scalar-types", + Option<"emulateLT32BitScalarTypes", "emulate-lt-32-bit-scalar-types", "bool", /*default=*/"true", - "Emulate non-32-bit scalar types with 32-bit ones if " - "missing native support"> + "Emulate narrower scalar types with 32-bit ones if not supported by" + " the target"> ]; } diff --git a/mlir/include/mlir/Dialect/Arith/IR/ArithBase.td b/mlir/include/mlir/Dialect/Arith/IR/ArithBase.td index 13d252cf056e5..78fd7bdf012f8 100644 --- a/mlir/include/mlir/Dialect/Arith/IR/ArithBase.td +++ b/mlir/include/mlir/Dialect/Arith/IR/ArithBase.td @@ -121,4 +121,9 @@ def FastMathFlags : I32BitEnumAttr< let printBitEnumPrimaryGroups = 1; } +def Arith_FastMathAttr : + EnumAttr { + let assemblyFormat = "`<` $value `>`"; +} + #endif // ARITH_BASE diff --git a/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td b/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td index f12a1a33f6912..3d6cef9705ebe 100644 --- a/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td +++ b/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td @@ -20,11 +20,6 @@ include "mlir/IR/BuiltinAttributeInterfaces.td" include "mlir/IR/OpAsmInterface.td" include "mlir/IR/EnumAttr.td" -def Arith_FastMathAttr : - EnumAttr { - let assemblyFormat = "`<` $value `>`"; -} - // Base class for Arith dialect ops. Ops in this dialect have no side // effects and can be applied element-wise to vectors and tensors. class Arith_Op traits = []> : @@ -71,7 +66,7 @@ class Arith_FloatUnaryOp traits = []> : Arguments<(ins FloatLike:$operand, DefaultValuedAttr:$fastmath)>, Results<(outs FloatLike:$result)> { - let assemblyFormat = [{ $operand custom($fastmath) + let assemblyFormat = [{ $operand (`fastmath` `` $fastmath^)? attr-dict `:` type($result) }]; } @@ -83,7 +78,7 @@ class Arith_FloatBinaryOp traits = []> : Arguments<(ins FloatLike:$lhs, FloatLike:$rhs, DefaultValuedAttr:$fastmath)>, Results<(outs FloatLike:$result)> { - let assemblyFormat = [{ $lhs `,` $rhs `` custom($fastmath) + let assemblyFormat = [{ $lhs `,` $rhs (`fastmath` `` $fastmath^)? attr-dict `:` type($result) }]; } diff --git a/mlir/include/mlir/Dialect/Async/IR/Async.h b/mlir/include/mlir/Dialect/Async/IR/Async.h index 0c60a3c06c131..585a231d24739 100644 --- a/mlir/include/mlir/Dialect/Async/IR/Async.h +++ b/mlir/include/mlir/Dialect/Async/IR/Async.h @@ -18,9 +18,11 @@ #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Dialect.h" -#include "mlir/IR/OpDefinition.h" +#include "mlir/IR/FunctionInterfaces.h" #include "mlir/IR/OpImplementation.h" #include "mlir/IR/PatternMatch.h" +#include "mlir/IR/SymbolTable.h" +#include "mlir/Interfaces/CallInterfaces.h" #include "mlir/Interfaces/ControlFlowInterfaces.h" #include "mlir/Interfaces/InferTypeOpInterface.h" #include "mlir/Interfaces/SideEffectInterfaces.h" @@ -53,4 +55,19 @@ inline bool isRefCounted(Type type) { } // namespace async } // namespace mlir +namespace llvm { + +/// Allow stealing the low bits of async::FuncOp. +template <> +struct PointerLikeTypeTraits { + static inline void *getAsVoidPointer(mlir::async::FuncOp val) { + return const_cast(val.getAsOpaquePointer()); + } + static inline mlir::async::FuncOp getFromVoidPointer(void *p) { + return mlir::async::FuncOp::getFromOpaquePointer(p); + } + static constexpr int numLowBitsAvailable = 3; +}; +} // namespace llvm + #endif // MLIR_DIALECT_ASYNC_IR_ASYNC_H diff --git a/mlir/include/mlir/Dialect/Async/IR/AsyncDialect.td b/mlir/include/mlir/Dialect/Async/IR/AsyncDialect.td index e3b4db42204b8..eb1d76a180fe2 100644 --- a/mlir/include/mlir/Dialect/Async/IR/AsyncDialect.td +++ b/mlir/include/mlir/Dialect/Async/IR/AsyncDialect.td @@ -32,9 +32,8 @@ def AsyncDialect : Dialect { let extraClassDeclaration = [{ /// The name of a unit attribute on funcs that are allowed to have a - /// blocking async.runtime.await ops. Only useful in combination with - /// 'eliminate-blocking-await-ops' option, which in absence of this - /// attribute might convert a func to a coroutine. + /// blocking async.runtime.await ops. In absence of this attribute the + /// asyncification pass might convert a func to a coroutine. static constexpr StringRef kAllowedToBlockAttrName = "async.allowed_to_block"; }]; diff --git a/mlir/include/mlir/Dialect/Async/IR/AsyncOps.td b/mlir/include/mlir/Dialect/Async/IR/AsyncOps.td index d0584ef34b8e0..c8d3e2cc664b4 100644 --- a/mlir/include/mlir/Dialect/Async/IR/AsyncOps.td +++ b/mlir/include/mlir/Dialect/Async/IR/AsyncOps.td @@ -18,6 +18,11 @@ include "mlir/Dialect/Async/IR/AsyncTypes.td" include "mlir/Interfaces/ControlFlowInterfaces.td" include "mlir/Interfaces/InferTypeOpInterface.td" include "mlir/Interfaces/SideEffectInterfaces.td" +include "mlir/Interfaces/CallInterfaces.td" +include "mlir/IR/SymbolInterfaces.td" +include "mlir/IR/FunctionInterfaces.td" +include "mlir/IR/OpAsmInterface.td" + //===----------------------------------------------------------------------===// // Async op definitions @@ -99,6 +104,189 @@ def Async_ExecuteOp : }]; } +def Async_FuncOp : Async_Op<"func", + [CallableOpInterface, FunctionOpInterface, + IsolatedFromAbove, OpAsmOpInterface, Symbol]> { + let summary = "async function operation"; + let description = [{ + An async function is like a normal function, but supports non-blocking + await. Internally, async function is lowered to the LLVM coroutinue with + async runtime intrinsic. It can return an async token and/or async values. + The token represents the execution state of async function and can be used + when users want to express dependencies on some side effects, e.g., + the token becomes available once every thing in the func body is executed. + + Example: + + ```mlir + // Async function can't return void, it always must be some async thing. + async.func @async.0() -> !async.token { + return + } + + // Function returns only async value. + async.func @async.1() -> !async.value { + %0 = arith.constant 42 : i32 + return %0 : i32 + } + + // Implicit token can be added to return types. + async.func @async.2() -> !async.token, !async.value { + %0 = arith.constant 42 : i32 + return %0 : i32 + } + ``` + }]; + + let arguments = (ins SymbolNameAttr:$sym_name, + TypeAttrOf:$function_type, + OptionalAttr:$sym_visibility); + + let regions = (region AnyRegion:$body); + + let builders = [ + OpBuilder<(ins "StringRef":$name, "FunctionType":$type, + CArg<"ArrayRef", "{}">:$attrs, + CArg<"ArrayRef", "{}">:$argAttrs)> + ]; + + let extraClassDeclaration = [{ + //===------------------------------------------------------------------===// + // CallableOpInterface + //===------------------------------------------------------------------===// + + /// Returns the region on the current operation that is callable. This may + /// return null in the case of an external callable object, e.g. an external + /// function. + ::mlir::Region *getCallableRegion() { return isExternal() ? nullptr + : &getBody(); } + + /// Returns the results types that the callable region produces when + /// executed. + ArrayRef getCallableResults() { return getFunctionType() + .getResults(); } + + //===------------------------------------------------------------------===// + // FunctionOpInterface Methods + //===------------------------------------------------------------------===// + + /// Returns the argument types of this async function. + ArrayRef getArgumentTypes() { return getFunctionType().getInputs(); } + + /// Returns the result types of this async function. + ArrayRef getResultTypes() { return getFunctionType().getResults(); } + + /// Returns the number of results of this async function + unsigned getNumResults() {return getResultTypes().size();} + + /// Is the async func stateful + bool isStateful() { return isa(getFunctionType().getResult(0));} + + //===------------------------------------------------------------------===// + // OpAsmOpInterface Methods + //===------------------------------------------------------------------===// + + /// Allow the dialect prefix to be omitted. + static StringRef getDefaultDialect() { return "async"; } + + //===------------------------------------------------------------------===// + // SymbolOpInterface Methods + //===------------------------------------------------------------------===// + + bool isDeclaration() { return isExternal(); } + }]; + let hasCustomAssemblyFormat = 1; + + let hasVerifier = 1; +} + +def Async_CallOp : Async_Op<"call", + [CallOpInterface, DeclareOpInterfaceMethods]> { + let summary = "async call operation"; + let description = [{ + The `async.call` operation represents a direct call to an async function + that is within the same symbol scope as the call. The operands and result + types of the call must match the specified async function type. The callee + is encoded as a symbol reference attribute named "callee". + + Example: + + ```mlir + %2 = async.call @my_add(%0, %1) : (f32, f32) -> !async.value + ``` + }]; + + let arguments = (ins FlatSymbolRefAttr:$callee, Variadic:$operands); + let results = (outs Variadic); + + let builders = [ + OpBuilder<(ins "FuncOp":$callee, CArg<"ValueRange", "{}">:$operands), [{ + $_state.addOperands(operands); + $_state.addAttribute("callee", SymbolRefAttr::get(callee)); + $_state.addTypes(callee.getFunctionType().getResults()); + }]>, + OpBuilder<(ins "SymbolRefAttr":$callee, "TypeRange":$results, + CArg<"ValueRange", "{}">:$operands), [{ + $_state.addOperands(operands); + $_state.addAttribute("callee", callee); + $_state.addTypes(results); + }]>, + OpBuilder<(ins "StringAttr":$callee, "TypeRange":$results, + CArg<"ValueRange", "{}">:$operands), [{ + build($_builder, $_state, SymbolRefAttr::get(callee), results, operands); + }]>, + OpBuilder<(ins "StringRef":$callee, "TypeRange":$results, + CArg<"ValueRange", "{}">:$operands), [{ + build($_builder, $_state, StringAttr::get($_builder.getContext(), callee), + results, operands); + }]> + ]; + + let extraClassDeclaration = [{ + FunctionType getCalleeType(); + + /// Get the argument operands to the called function. + operand_range getArgOperands() { + return {arg_operand_begin(), arg_operand_end()}; + } + + operand_iterator arg_operand_begin() { return operand_begin(); } + operand_iterator arg_operand_end() { return operand_end(); } + + /// Return the callee of this operation. + CallInterfaceCallable getCallableForCallee() { + return (*this)->getAttrOfType("callee"); + } + }]; + + let assemblyFormat = [{ + $callee `(` $operands `)` attr-dict `:` functional-type($operands, results) + }]; +} + +def Async_ReturnOp : Async_Op<"return", + [Pure, HasParent<"FuncOp">, ReturnLike, Terminator]> { + let summary = "Async function return operation"; + let description = [{ + The `async.return` is a special terminator operation for Async function. + + Example: + + ```mlir + async.func @foo() : !async.token { + return + } + ``` + }]; + + let arguments = (ins Variadic:$operands); + + let builders = [OpBuilder<(ins), [{build($_builder, $_state, llvm::None);}]>]; + + let assemblyFormat = "attr-dict ($operands^ `:` type($operands))?"; + let hasVerifier = 1; +} + def Async_YieldOp : Async_Op<"yield", [ HasParent<"ExecuteOp">, Pure, Terminator, diff --git a/mlir/include/mlir/Dialect/Async/Passes.td b/mlir/include/mlir/Dialect/Async/Passes.td index 16fb8626c0c0e..aed5b4ff7865a 100644 --- a/mlir/include/mlir/Dialect/Async/Passes.td +++ b/mlir/include/mlir/Dialect/Async/Passes.td @@ -44,13 +44,6 @@ def AsyncToAsyncRuntime : Pass<"async-to-async-runtime", "ModuleOp"> { let summary = "Lower high level async operations (e.g. async.execute) to the" "explicit async.runtime and async.coro operations"; let constructor = "mlir::createAsyncToAsyncRuntimePass()"; - let options = [ - // Temporary for bringup, should become the default. - Option<"eliminateBlockingAwaitOps", "eliminate-blocking-await-ops", "bool", - /*default=*/"false", - "Rewrite functions with blocking async.runtime.await as coroutines " - "with async.runtime.await_and_resume.">, - ]; let dependentDialects = ["async::AsyncDialect", "func::FuncDialect"]; } diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h index aa3f6423407c7..445430ac21a00 100644 --- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h @@ -35,12 +35,25 @@ std::unique_ptr createBufferHoistingPass(); /// reallocations inside of loops. std::unique_ptr createBufferLoopHoistingPass(); +// Options struct for BufferResultsToOutParams pass. +// Note: defined only here, not in tablegen. +struct BufferResultsToOutParamsOptions { + // Filter function; returns true if the function should be converted. + // Defaults to true, i.e. all functions are converted. + llvm::function_ref filterFn = [](func::FuncOp *func) { + return true; + }; +}; + /// Creates a pass that converts memref function results to out-params. -std::unique_ptr createBufferResultsToOutParamsPass(); +std::unique_ptr createBufferResultsToOutParamsPass( + const BufferResultsToOutParamsOptions &options = {}); /// Replace buffers that are returned from a function with an out parameter. /// Also update all call sites. -LogicalResult promoteBufferResultsToOutParams(ModuleOp module); +LogicalResult +promoteBufferResultsToOutParams(ModuleOp module, + const BufferResultsToOutParamsOptions &options); /// Creates a pass that drops memref function results that are equivalent to a /// function argument. diff --git a/mlir/include/mlir/Dialect/Index/IR/IndexOps.td b/mlir/include/mlir/Dialect/Index/IR/IndexOps.td index 0896f21954603..29f4c1eb151c5 100644 --- a/mlir/include/mlir/Dialect/Index/IR/IndexOps.td +++ b/mlir/include/mlir/Dialect/Index/IR/IndexOps.td @@ -280,6 +280,69 @@ def Index_MaxUOp : IndexBinaryOp<"maxu"> { }]; } +//===----------------------------------------------------------------------===// +// ShlOp +//===----------------------------------------------------------------------===// + +def Index_ShlOp : IndexBinaryOp<"shl"> { + let summary = "index shift left"; + let description = [{ + The `index.shl` operation shifts an index value to the left by a variable + amount. The low order bits are filled with zeroes. The RHS operand is always + treated as unsigned. If the RHS operand is equal to or greater than the + index bitwidth, the operation is undefined. + + Example: + + ```mlir + // c = a << b + %c = index.shl %a, %b + ``` + }]; +} + +//===----------------------------------------------------------------------===// +// ShrSOp +//===----------------------------------------------------------------------===// + +def Index_ShrSOp : IndexBinaryOp<"shrs"> { + let summary = "signed index shift right"; + let description = [{ + The `index.shrs` operation shifts an index value to the right by a variable + amount. The LHS operand is treated as signed. The high order bits are filled + with copies of the most significant bit. If the RHS operand is equal to or + greater than the index bitwidth, the operation is undefined. + + Example: + + ```mlir + // c = a >> b + %c = index.shrs %a, %b + ``` + }]; +} + +//===----------------------------------------------------------------------===// +// ShrUOp +//===----------------------------------------------------------------------===// + +def Index_ShrUOp : IndexBinaryOp<"shru"> { + let summary = "unsigned index shift right"; + let description = [{ + The `index.shru` operation shifts an index value to the right by a variable + amount. The LHS operand is treated as unsigned. The high order bits are + filled with zeroes. If the RHS operand is equal to or greater than the index + bitwidth, the operation is undefined. + + Example: + + ```mlir + // c = a >> b + %c = index.shru %a, %b + ``` + }]; +} + //===----------------------------------------------------------------------===// // CastSOp //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td index b6b1f4c618ffd..182704defcc64 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td @@ -151,7 +151,7 @@ def LLVM_DIBasicTypeAttr : LLVM_Attr<"DIBasicType", "di_basic_type", //===----------------------------------------------------------------------===// def LLVM_DICompileUnitAttr : LLVM_Attr<"DICompileUnit", "di_compile_unit", [ - DeclareAttrInterfaceMethods + SubElementAttrInterface ], "DIScopeAttr"> { let parameters = (ins LLVM_DILanguageParameter:$sourceLanguage, @@ -168,7 +168,7 @@ def LLVM_DICompileUnitAttr : LLVM_Attr<"DICompileUnit", "di_compile_unit", [ //===----------------------------------------------------------------------===// def LLVM_DICompositeTypeAttr : LLVM_Attr<"DICompositeType", "di_composite_type", [ - DeclareAttrInterfaceMethods + SubElementAttrInterface ], "DITypeAttr"> { let parameters = (ins LLVM_DITagParameter:$tag, @@ -188,7 +188,7 @@ def LLVM_DICompositeTypeAttr : LLVM_Attr<"DICompositeType", "di_composite_type", //===----------------------------------------------------------------------===// def LLVM_DIDerivedTypeAttr : LLVM_Attr<"DIDerivedType", "di_derived_type", [ - DeclareAttrInterfaceMethods + SubElementAttrInterface ], "DITypeAttr"> { let parameters = (ins LLVM_DITagParameter:$tag, @@ -220,7 +220,7 @@ def LLVM_DIFileAttr : LLVM_Attr<"DIFile", "di_file", /*traits=*/[], "DIScopeAttr //===----------------------------------------------------------------------===// def LLVM_DILexicalBlockAttr : LLVM_Attr<"DILexicalBlock", "di_lexical_block", [ - DeclareAttrInterfaceMethods + SubElementAttrInterface ], "DIScopeAttr"> { let parameters = (ins "DIScopeAttr":$scope, @@ -244,7 +244,7 @@ def LLVM_DILexicalBlockAttr : LLVM_Attr<"DILexicalBlock", "di_lexical_block", [ //===----------------------------------------------------------------------===// def LLVM_DILexicalBlockFile : LLVM_Attr<"DILexicalBlockFile", "di_lexical_block_file", [ - DeclareAttrInterfaceMethods + SubElementAttrInterface ], "DIScopeAttr"> { let parameters = (ins "DIScopeAttr":$scope, @@ -266,7 +266,7 @@ def LLVM_DILexicalBlockFile : LLVM_Attr<"DILexicalBlockFile", "di_lexical_block_ //===----------------------------------------------------------------------===// def LLVM_DILocalVariableAttr : LLVM_Attr<"DILocalVariable", "di_local_variable", [ - DeclareAttrInterfaceMethods + SubElementAttrInterface ], "DINodeAttr"> { let parameters = (ins "DIScopeAttr":$scope, @@ -296,7 +296,7 @@ def LLVM_DILocalVariableAttr : LLVM_Attr<"DILocalVariable", "di_local_variable", //===----------------------------------------------------------------------===// def LLVM_DISubprogramAttr : LLVM_Attr<"DISubprogram", "di_subprogram", [ - DeclareAttrInterfaceMethods + SubElementAttrInterface ], "DIScopeAttr"> { let parameters = (ins "DICompileUnitAttr":$compileUnit, @@ -334,9 +334,9 @@ def LLVM_DISubrangeAttr : LLVM_Attr<"DISubrange", "di_subrange", /*traits=*/[], "DINodeAttr"> { let parameters = (ins "IntegerAttr":$count, - "IntegerAttr":$lowerBound, - "IntegerAttr":$upperBound, - "IntegerAttr":$stride + OptionalParameter<"IntegerAttr">:$lowerBound, + OptionalParameter<"IntegerAttr">:$upperBound, + OptionalParameter<"IntegerAttr">:$stride ); let assemblyFormat = "`<` struct(params) `>`"; } @@ -346,7 +346,7 @@ def LLVM_DISubrangeAttr : LLVM_Attr<"DISubrange", "di_subrange", /*traits=*/[], //===----------------------------------------------------------------------===// def LLVM_DISubroutineTypeAttr : LLVM_Attr<"DISubroutineType", "di_subroutine_type", [ - DeclareAttrInterfaceMethods + SubElementAttrInterface ], "DITypeAttr"> { let parameters = (ins LLVM_DICallingConventionParameter:$callingConvention, diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td index ca902f9d848cf..bb2668790dbfb 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td @@ -2,6 +2,7 @@ #define LLVM_INTRINSIC_OPS include "mlir/IR/OpBase.td" +include "mlir/Dialect/LLVMIR/LLVMAttrDefs.td" include "mlir/Dialect/LLVMIR/LLVMOpBase.td" include "mlir/Dialect/LLVMIR/LLVMAttrDefs.td" include "mlir/Interfaces/InferTypeOpInterface.td" @@ -12,38 +13,59 @@ include "mlir/Interfaces/InferTypeOpInterface.td" // "intr." to avoid potential name clashes. class LLVM_UnaryIntrOpBase traits = []> : + list traits = [], + dag addAttrs = (ins)> : LLVM_OneResultIntrOp { - let arguments = (ins LLVM_ScalarOrVectorOf:$in); + dag args = (ins LLVM_ScalarOrVectorOf:$in); + let arguments = !con(args, addAttrs); + let assemblyFormat = "`(` operands `)` custom(attr-dict) `:` " + "functional-type(operands, results)"; } class LLVM_UnaryIntrOpI traits = []> : LLVM_UnaryIntrOpBase; class LLVM_UnaryIntrOpF traits = []> : - LLVM_UnaryIntrOpBase; + LLVM_UnaryIntrOpBase], + traits), + (ins DefaultValuedAttr:$fastmathFlags)>; class LLVM_BinarySameArgsIntrOpBase traits = []> : + list traits = [], + dag addAttrs = (ins)> : LLVM_OneResultIntrOp { - let arguments = (ins LLVM_ScalarOrVectorOf:$a, - LLVM_ScalarOrVectorOf:$b); + dag args = (ins LLVM_ScalarOrVectorOf:$a, + LLVM_ScalarOrVectorOf:$b); + let arguments = !con(args, addAttrs); + let assemblyFormat = "`(` operands `)` custom(attr-dict) `:` " + "functional-type(operands, results)"; } class LLVM_BinarySameArgsIntrOpI traits = []> : LLVM_BinarySameArgsIntrOpBase; class LLVM_BinarySameArgsIntrOpF traits = []> : - LLVM_BinarySameArgsIntrOpBase; + LLVM_BinarySameArgsIntrOpBase], + traits), + (ins DefaultValuedAttr:$fastmathFlags)>; class LLVM_TernarySameArgsIntrOpF traits = []> : LLVM_OneResultIntrOp { + !listconcat([DeclareOpInterfaceMethods, + Pure, SameOperandsAndResultType], traits)> { let arguments = (ins LLVM_ScalarOrVectorOf:$a, LLVM_ScalarOrVectorOf:$b, - LLVM_ScalarOrVectorOf:$c); + LLVM_ScalarOrVectorOf:$c, + DefaultValuedAttr:$fastmathFlags); + let assemblyFormat = "`(` operands `)` custom(attr-dict) `:` " + "functional-type(operands, results)"; } class LLVM_CountZerosIntrOp traits = []> : @@ -83,9 +105,14 @@ def LLVM_RoundOp : LLVM_UnaryIntrOpF<"round">; def LLVM_FTruncOp : LLVM_UnaryIntrOpF<"trunc">; def LLVM_SqrtOp : LLVM_UnaryIntrOpF<"sqrt">; def LLVM_PowOp : LLVM_BinarySameArgsIntrOpF<"pow">; -def LLVM_PowIOp : LLVM_OneResultIntrOp<"powi"> { - let arguments = (ins LLVM_ScalarOrVectorOf:$val, - AnySignlessInteger:$power); +def LLVM_PowIOp : LLVM_OneResultIntrOp<"powi", [], [0,1], + [DeclareOpInterfaceMethods, Pure]> { + let arguments = + (ins LLVM_ScalarOrVectorOf:$val, + AnySignlessInteger:$power, + DefaultValuedAttr:$fastmathFlags); + let assemblyFormat = "`(` operands `)` custom(attr-dict) `:` " + "functional-type(operands, results)"; } def LLVM_BitReverseOp : LLVM_UnaryIntrOpI<"bitreverse">; def LLVM_CountLeadingZerosOp : LLVM_CountZerosIntrOp<"ctlz">; @@ -680,6 +707,25 @@ def LLVM_vector_extract }]; } +//===--------------------------------------------------------------------===// +// CallIntrinsicOp +//===--------------------------------------------------------------------===// +def LLVM_CallIntrinsicOp : LLVM_Op<"call_intrinsic", [Pure]> { + let summary = "Call to an LLVM intrinsic function."; + let description = [{ + Call the specified llvm intrinsic. If the intrinsic is overloaded, use + the MLIR function type of this op to determine which intrinsic to call. + }]; + let arguments = (ins StrAttr:$intrin, Variadic:$args); + let results = (outs Variadic:$results); + let llvmBuilder = [{ + return convertCallLLVMIntrinsicOp(op, builder, moduleTranslation); + }]; + let assemblyFormat = [{ + $intrin `(` $args `)` `:` functional-type($args, $results) attr-dict + }]; +} + // // LLVM Vector Predication operations. // diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td index ec6d565b67b00..cf39f2cb2d49c 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td @@ -49,6 +49,8 @@ def LLVM_Dialect : Dialect { static StringRef getStructRetAttrName() { return "llvm.sret"; } static StringRef getInAllocaAttrName() { return "llvm.inalloca"; } static StringRef getNoUndefAttrName() { return "llvm.noundef"; } + static StringRef getSExtAttrName() { return "llvm.signext"; } + static StringRef getZExtAttrName() { return "llvm.zeroext"; } /// Verifies if the attribute is a well-formed value for "llvm.struct_attrs" static LogicalResult verifyStructAttr( diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td index b067a1ddd1e61..9866620fd4892 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td @@ -440,7 +440,9 @@ def TransposeOp : LinalgStructuredBase_Op<"transpose", [ static std::function)> - getRegionBuilder(); + getRegionBuilder() { + return nullptr; + } static void createRegion(::mlir::OpBuilder &opBuilder, ::mlir::OperationState & odsState); @@ -450,6 +452,79 @@ def TransposeOp : LinalgStructuredBase_Op<"transpose", [ let hasVerifier = 1; } + +//===----------------------------------------------------------------------===// +// Broadcast op. +//===----------------------------------------------------------------------===// + +def BroadcastOp : LinalgStructuredBase_Op<"broadcast", [ + DeclareOpInterfaceMethods, + SameVariadicOperandSize, + SingleBlockImplicitTerminator<"YieldOp">]> { + let summary = "Static broadcast operator"; + let description = [{ + Broadcast the input into the given shape by adding dimensions. + + Each index in `dimensions` attribute maps input dimension into the + corresponding target dimension. The length of the `dimensions` list should + match the `input` rank and dimensions should be in sorted order. There is no + ambiguity at compile-time about shape information. + + Example: + ``` + %bcast = linalg.broadcast + ins(%input:tensor<16xf32>) + inits(%init:tensor<16x64xf32>) + dimensions = [0] + ``` + }]; + + let arguments = (ins + // Input arg + TensorOrMemref:$input, + // Output arg + TensorOrMemref:$init, + + DenseI64ArrayAttr:$dimensions + ); + let results = (outs Variadic:$result); + let regions = (region SizedRegion<1>:$region); + + let skipDefaultBuilders = 1; + let builders = [ + OpBuilder<(ins "Value":$input, "Value":$init, + "DenseI64ArrayAttr":$dimensions, CArg<"ArrayRef", + "{}">:$attributes)>, + OpBuilder<(ins "Value":$input, "Value":$init, + "ArrayRef":$dimensions, CArg<"ArrayRef", + "{}">:$attributes)>, + ]; + + let extraClassDeclaration = structuredOpsBaseDecls # [{ + // Declare functions necessary for LinalgStructuredInterface. + SmallVector getIteratorTypesArray(); + ArrayAttr getIndexingMaps(); + std::string getLibraryCallName() { + return "op_has_no_registered_library_name"; + } + + // Implement functions necessary for DestinationStyleOpInterface. + std::pair getDpsInitsPositionRange() { + int64_t getNumOperands = this->getNumOperands(); + return {getNumOperands - 1, getNumOperands}; + } + + static std::function)> + getRegionBuilder() { + return nullptr; + } + }]; + + let hasCustomAssemblyFormat = 1; + let hasVerifier = 1; +} + //===----------------------------------------------------------------------===// // Named Linalg ops, implemented as a declarative configurations of generic ops. //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.h b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.h index f7952db7e2a23..2583875e2d0ea 100644 --- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.h +++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.h @@ -20,6 +20,12 @@ namespace linalg { class GenericOp; class LinalgOp; } // namespace linalg + +namespace transform { +// Types needed for builders. +struct TileSizesSpec {}; +struct NumThreadsSpec {}; +} // namespace transform } // namespace mlir //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td index 5c304f5efb6ea..347def6c9d1b5 100644 --- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td +++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td @@ -112,6 +112,10 @@ def FuseIntoContainingOp : [TransformMappingAlloc, TransformMappingWrite]>:$fused_op); let assemblyFormat = "$producer_op `into` $containing_op attr-dict"; + + let builders = [ + OpBuilder<(ins "Value":$producerOp, "Value":$containingOp)> + ]; } def GeneralizeOp : Op":$opNames)> + ]; + let assemblyFormat = [{ (`ops` `{` $ops^ `}`)? (`interface` `{` $interface^ `}`)? @@ -600,6 +608,103 @@ def SplitReductionOp : Op:$innerParallel, + CArg<"bool", "false">:$useScalingAlgorithm, + CArg<"bool", "false">:$useAlloc)> + ]; + + let extraClassDeclaration = [{ + ::mlir::DiagnosedSilenceableFailure applyToOne( + ::mlir::linalg::LinalgOp target, + ::llvm::SmallVectorImpl<::mlir::Operation *> &results, + ::mlir::transform::TransformState &state); + }]; +} + + +def TileReductionUsingScfOp : Op { + let description = [{ + Indicates that the given `target` op should be transformed with the + `tileReduction` transformation with the tile size provided as attribute. + + This transformation tiles the `target` along the reduction dimensions. It + creates a tensor initialized with the identity value. Then it creates nested + loops with a parallel version of `target` op inside. The parallel op + dimensions are less or equal to the tile size passed by user. + After the loop a merge operation is created to do a final reduction with the + partial reductions. + The initial tensor always uses the tile size dimension. This may overallocate + if the tile size is greater than the reduction dimension. + + #### Return modes + + This 3 returned handles point to: + - the fill op used to initialize the neutral element, + - the parallel tiled op and + - the result-combining op. + + #### Example: + + ``` + %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, + affine_map<(d0, d1) -> (d0)>], + iterator_types = ["parallel", "reduction"]} + ins(%arg0 : tensor) + outs(%out : tensor) { + ^bb0(%arg7: f32, %arg9: f32): + %1 = arith.addf %arg7, %arg9 : f32 + linalg.yield %1 : f32 + } -> tensor + return %red : tensor + ``` + + is transformed into: + + ``` + %0 = tensor.empty(%dim_1) : tensor + %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor) -> tensor + %2 = scf.for %arg2 = %c0 to %dim_0 step %c5 iter_args(%arg3 = %1) -> (tensor) { + %extracted_slice = tensor.extract_slice %1[0, 0] [%dim, 5] [1, 1] : tensor to tensor + %extracted_slice_2 = tensor.extract_slice %arg0[0, %arg2] [%dim, 5] [1, 1] : tensor to tensor + %4 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, + affine_map<(d0, d1) -> (d0, d1)>], + iterator_types = ["parallel", "parallel"]} + ins(%extracted_slice_2 : tensor) + outs(%extracted_slice : tensor) { + ^bb0(%in: f32, %out: f32): + %5 = arith.addf %in, %out : f32 + linalg.yield %5 : f32 + } -> tensor + %dim_3 = tensor.dim %1, %c0 : tensor + %inserted_slice = tensor.insert_slice %4 into %arg3[0, 0] [%dim_3, 5] [1, 1] : tensor into tensor + scf.yield %inserted_slice : tensor + } + %3 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, + affine_map<(d0, d1) -> (d0)>], + iterator_types = ["parallel", "reduction"]} + ins(%2 : tensor) + outs(%arg1 : tensor) { + ^bb0(%in: f32, %out: f32): + %4 = arith.addf %in, %out : f32 + linalg.yield %4 : f32 + } -> tensor + ``` + }]; + + let arguments = (ins PDL_Operation:$target, + DefaultValuedAttr:$tile_sizes); + let results = (outs PDL_Operation:$fill_op, + PDL_Operation:$split_linalg_op, + PDL_Operation:$combining_linalg_op); + + let assemblyFormat = "$target attr-dict"; + let extraClassDeclaration = [{ ::mlir::DiagnosedSilenceableFailure applyToOne( ::mlir::linalg::LinalgOp target, @@ -730,6 +835,30 @@ def TileToForeachThreadOp : OptionalAttr:$thread_dim_mapping); let results = (outs PDL_Operation:$foreach_thread_op, PDL_Operation:$tiled_op); + + let builders = [ + OpBuilder<(ins "Value":$target, + "ArrayRef":$staticTileSizes, + CArg<"::mlir::transform::TileSizesSpec", + "::mlir::transform::TileSizesSpec()">, + CArg<"ArrayRef", "{}">:$threadDimMapping)>, + OpBuilder<(ins "Value":$target, + "ArrayRef":$mixedTileSizes, + CArg<"::mlir::transform::TileSizesSpec", + "::mlir::transform::TileSizesSpec()">, + CArg<"ArrayRef", "{}">:$threadDimMapping)>, + OpBuilder<(ins "Value":$target, + "ArrayRef":$staticNumThreads, + CArg<"::mlir::transform::NumThreadsSpec", + "::mlir::transform::NumThreadsSpec()">, + CArg<"ArrayRef", "{}">:$threadDimMapping)>, + OpBuilder<(ins "Value":$target, + "ArrayRef":$mixedNumThreads, + CArg<"::mlir::transform::NumThreadsSpec", + "::mlir::transform::NumThreadsSpec()">, + CArg<"ArrayRef", "{}">:$threadDimMapping)>, + ]; + let assemblyFormat = [{ $target oilist( `num_threads` custom($num_threads, @@ -855,6 +984,10 @@ def VectorizeOp : Op:$vectorizePadding)> + ]; let extraClassDeclaration = [{ ::mlir::DiagnosedSilenceableFailure applyToOne( ::mlir::Operation *target, diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h index 6a10d4332e7eb..5fc7938e0dd2f 100644 --- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h @@ -137,6 +137,10 @@ GenericOp makeMemRefCopyOp(OpBuilder &b, Location loc, Value from, Value to); Optional> getReassociationMapForFoldingUnitDims(ArrayRef mixedSizes); +/// Return the identity numeric value associated to the give op. Return +/// llvm::None if there is no known neutral element. +Optional getNeutralElement(Operation *op); + //===----------------------------------------------------------------------===// // Fusion / Tiling utilities //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Math/IR/CMakeLists.txt b/mlir/include/mlir/Dialect/Math/IR/CMakeLists.txt index b737819b8d8aa..0f98a1efbcb79 100644 --- a/mlir/include/mlir/Dialect/Math/IR/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/Math/IR/CMakeLists.txt @@ -1,2 +1,2 @@ add_mlir_dialect(MathOps math) -add_mlir_doc(MathOps MathOps Dialects/ -gen-dialect-doc) +add_mlir_doc(MathOps MathOps Dialects/ -gen-dialect-doc -dialect math) diff --git a/mlir/include/mlir/Dialect/Math/IR/Math.h b/mlir/include/mlir/Dialect/Math/IR/Math.h index 6af358bf57b37..98416d1c9abdf 100644 --- a/mlir/include/mlir/Dialect/Math/IR/Math.h +++ b/mlir/include/mlir/Dialect/Math/IR/Math.h @@ -9,6 +9,7 @@ #ifndef MLIR_DIALECT_MATH_IR_MATH_H_ #define MLIR_DIALECT_MATH_IR_MATH_H_ +#include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Dialect.h" #include "mlir/IR/OpDefinition.h" diff --git a/mlir/include/mlir/Dialect/Math/IR/MathOps.td b/mlir/include/mlir/Dialect/Math/IR/MathOps.td index 99e209000c0f5..a5b28bd0891c5 100644 --- a/mlir/include/mlir/Dialect/Math/IR/MathOps.td +++ b/mlir/include/mlir/Dialect/Math/IR/MathOps.td @@ -9,6 +9,8 @@ #ifndef MATH_OPS #define MATH_OPS +include "mlir/Dialect/Arith/IR/ArithBase.td" +include "mlir/Dialect/Arith/IR/ArithOpsInterfaces.td" include "mlir/Dialect/Math/IR/MathBase.td" include "mlir/Interfaces/InferTypeOpInterface.td" include "mlir/Interfaces/VectorInterfaces.td" @@ -36,11 +38,16 @@ class Math_IntegerUnaryOp traits = []> : // operand and result of the same type. This type can be a floating point type, // vector or tensor thereof. class Math_FloatUnaryOp traits = []> : - Math_Op { - let arguments = (ins FloatLike:$operand); + Math_Op]> { + let arguments = (ins FloatLike:$operand, + DefaultValuedAttr:$fastmath); let results = (outs FloatLike:$result); - let assemblyFormat = "$operand attr-dict `:` type($result)"; + let assemblyFormat = [{ $operand (`fastmath` `` $fastmath^)? + attr-dict `:` type($result) }]; } // Base class for binary math operations on integer types. Require two @@ -58,22 +65,32 @@ class Math_IntegerBinaryOp traits = []> : // operands and one result of the same type. This type can be a floating point // type, vector or tensor thereof. class Math_FloatBinaryOp traits = []> : - Math_Op { - let arguments = (ins FloatLike:$lhs, FloatLike:$rhs); + Math_Op]> { + let arguments = (ins FloatLike:$lhs, FloatLike:$rhs, + DefaultValuedAttr:$fastmath); let results = (outs FloatLike:$result); - let assemblyFormat = "$lhs `,` $rhs attr-dict `:` type($result)"; + let assemblyFormat = [{ $lhs `,` $rhs (`fastmath` `` $fastmath^)? + attr-dict `:` type($result) }]; } // Base class for floating point ternary operations. Require three operands and // one result of the same type. This type can be a floating point type, vector // or tensor thereof. class Math_FloatTernaryOp traits = []> : - Math_Op { - let arguments = (ins FloatLike:$a, FloatLike:$b, FloatLike:$c); + Math_Op]> { + let arguments = (ins FloatLike:$a, FloatLike:$b, FloatLike:$c, + DefaultValuedAttr:$fastmath); let results = (outs FloatLike:$result); - let assemblyFormat = "$a `,` $b `,` $c attr-dict `:` type($result)"; + let assemblyFormat = [{ $a `,` $b `,` $c (`fastmath` `` $fastmath^)? + attr-dict `:` type($result) }]; } //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td index 1f1b118087f90..cfc9d2a773087 100644 --- a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td +++ b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td @@ -483,7 +483,7 @@ def MemRef_CastOp : MemRef_Op<"cast", [ // CopyOp //===----------------------------------------------------------------------===// -def CopyOp : MemRef_Op<"copy", [CopyOpInterface, SameOperandsElementType, +def CopyOp : MemRef_Op<"copy", [CopyOpInterface, SameOperandsElementType, SameOperandsShape]> { let description = [{ @@ -809,10 +809,10 @@ def MemRef_DmaWaitOp : MemRef_Op<"dma_wait"> { // ExtractAlignedPointerAsIndexOp //===----------------------------------------------------------------------===// -def MemRef_ExtractAlignedPointerAsIndexOp : +def MemRef_ExtractAlignedPointerAsIndexOp : MemRef_Op<"extract_aligned_pointer_as_index", [ DeclareOpInterfaceMethods, - Pure, + Pure, SameVariadicResultSize]> { let summary = "Extracts a memref's underlying aligned pointer as an index"; let description = [{ @@ -852,7 +852,7 @@ def MemRef_ExtractAlignedPointerAsIndexOp : def MemRef_ExtractStridedMetadataOp : MemRef_Op<"extract_strided_metadata", [ DeclareOpInterfaceMethods, - Pure, + Pure, SameVariadicResultSize, DeclareOpInterfaceMethods]> { let summary = "Extracts a buffer base with offset and strides"; @@ -866,18 +866,18 @@ def MemRef_ExtractStridedMetadataOp : MemRef_Op<"extract_strided_metadata", [ This operation is also useful for completeness to the existing memref.dim op. While accessing strides, offsets and the base pointer independently is not - available, this is useful for composing with its natural complement op: + available, this is useful for composing with its natural complement op: `memref.reinterpret_cast`. Intended Use Cases: The main use case is to expose the logic for manipulate memref metadata at a - higher level than the LLVM dialect. + higher level than the LLVM dialect. This makes lowering more progressive and brings the following benefits: - not all users of MLIR want to lower to LLVM and the information to e.g. lower to library calls---like libxsmm---or to SPIR-V was not available. - - foldings and canonicalizations can happen at a higher level in MLIR: - before this op existed, lowering to LLVM would create large amounts of + - foldings and canonicalizations can happen at a higher level in MLIR: + before this op existed, lowering to LLVM would create large amounts of LLVMIR. Even when LLVM does a good job at folding the low-level IR from a performance perspective, it is unnecessarily opaque and inefficient to send unkempt IR to LLVM. @@ -885,11 +885,11 @@ def MemRef_ExtractStridedMetadataOp : MemRef_Op<"extract_strided_metadata", [ Example: ```mlir - %base, %offset, %sizes:2, %strides:2 = - memref.extract_strided_metadata %memref : + %base, %offset, %sizes:2, %strides:2 = + memref.extract_strided_metadata %memref : memref<10x?xf32>, index, index, index, index, index - // After folding, the type of %m2 can be memref<10x?xf32> and further + // After folding, the type of %m2 can be memref<10x?xf32> and further // folded to %memref. %m2 = memref.reinterpret_cast %base to offset: [%offset], @@ -1213,10 +1213,10 @@ def MemRef_PrefetchOp : MemRef_Op<"prefetch"> { def MemRef_ReinterpretCastOp : MemRef_OpWithOffsetSizesAndStrides<"reinterpret_cast", [ DeclareOpInterfaceMethods, - AttrSizedOperandSegments, + AttrSizedOperandSegments, MemRefsNormalizable, - Pure, - OffsetSizeAndStrideOpInterface, + Pure, + OffsetSizeAndStrideOpInterface, ViewLikeOpInterface ]> { let summary = "memref reinterpret cast operation"; @@ -2008,7 +2008,7 @@ def MemRef_TransposeOp : MemRef_Op<"transpose", [ def MemRef_ViewOp : MemRef_Op<"view", [ DeclareOpInterfaceMethods, - DeclareOpInterfaceMethods, + DeclareOpInterfaceMethods, Pure]> { let summary = "memref view operation"; let description = [{ diff --git a/mlir/include/mlir/Dialect/NVGPU/IR/NVGPU.td b/mlir/include/mlir/Dialect/NVGPU/IR/NVGPU.td index 138ffc896cb2a..db4ee53252fb3 100644 --- a/mlir/include/mlir/Dialect/NVGPU/IR/NVGPU.td +++ b/mlir/include/mlir/Dialect/NVGPU/IR/NVGPU.td @@ -98,10 +98,24 @@ def NVGPU_LdMatrixOp : NVGPU_Op<"ldmatrix", [ let hasVerifier = 1; } -def NVGPU_MmaSyncOp : NVGPU_Op<"mma.sync", [ - Pure, - PredOpTrait<"matrixA and matrixB have same element type", - TCopVTEtIsSameAs<0, 1>>]> { +class NVGPU_MmaSyncOp : + NVGPU_Op>]> { + code extraBaseClassDeclaration = [{ + std::array getMmaShapeAsArray() { + ArrayAttr mmaShape = this->getMmaShape(); + assert(mmaShape.size() == 3 && "mmaShape should be three integers"); + return {mmaShape[0].cast().getInt(), + mmaShape[1].cast().getInt(), + mmaShape[2].cast().getInt()}; + } + }]; + + let hasVerifier = 1; +} + +def NVGPU_MmaSyncOp : NVGPU_MmaSyncOp<"mma.sync"> { let description = [{ The `nvgpu.mma.sync` op represents the warp-level matrix-multiply-and- accumulate (mma) operation that is compatible with `nvvm.mma.sync`. @@ -143,9 +157,63 @@ def NVGPU_MmaSyncOp : NVGPU_Op<"mma.sync", [ `:` `(` type($matrixA) `,` type($matrixB) `,` type($matrixC) `)` `->` type($res) }]; - let hasVerifier = 1; + let extraClassDeclaration = extraBaseClassDeclaration; } +def NVGPU_MmaSparseSyncMetadataType : FixedVectorOfLengthAndType<[2], [I16]>, + BuildableType<"::mlir::VectorType::get(" + "{2},$_builder.getI16Type())">; + +def NVGPU_MmaSparseSyncOp : NVGPU_MmaSyncOp<"mma.sp.sync"> { + let description = [{ + The `nvgu.mma.sp.sync` operation performs a warp-distributed MMA operation + where operand A is "structured sparse". In this case, the `matrixA` operand + represents the (warp-distributed) non-zero values of operand A, and the + `sparse_metadata` operand provides the indices. + + The full description of the sparsity storage format and distribution scheme is + described in the PTX docs. This operation is meant to follow the semantic + described in the PTX documentation here: + https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-instructions-for-sparse-mma + + The way the indices are distributed among the threads in a warp is controlled + by the optional `sparsity_selector` operand, which is `0` by default. For + more information, please consult the PTX documentation linked above. + + Example (targetingthe f16 16x8x32 `mma.sp` PTX instruction): + + ```mlir + nvgpu.mma.sp.sync (%a, %b, %c) metadata (%meta) {mmaShape = [16, 8, 32]} : + (vector<4x2xf16>, vector<2x2xf16>, vector<2x2xf16>) -> vector<2x2xf16> + ``` + }]; + + let arguments = (ins AnyVector:$matrixA, + AnyVector:$matrixB, + AnyVector:$matrixC, + NVGPU_MmaSparseSyncMetadataType:$sparseMetadata, + I64ArrayAttr:$mmaShape, + DefaultValuedAttr:$sparsitySelector, + OptionalAttr:$tf32Enabled + ); + + let results = (outs AnyVector:$res); + + let builders = [ + OpBuilder<(ins "Value":$matrixA, + "Value":$matrixB, + "Value":$matrixC, + "Value":$sparseMetadata, + "ArrayRef":$mmaShape)> + ]; + + let assemblyFormat = [{ + `(` $matrixA`,` $matrixB`,` $matrixC `)` `metadata` `(` $sparseMetadata `)` attr-dict + `:` `(` type($matrixA) `,` type($matrixB) `,` type($matrixC) `)` `->` type($res) + }]; + + let extraClassDeclaration = extraBaseClassDeclaration; +} def NVGPU_DeviceAsyncCopyOp : NVGPU_Op<"device_async_copy", [ AttrSizedOperandSegments]> { diff --git a/mlir/include/mlir/Dialect/SCF/Transforms/TileUsingInterface.h b/mlir/include/mlir/Dialect/SCF/Transforms/TileUsingInterface.h index 6cdef2512f607..151993cc3d9a4 100644 --- a/mlir/include/mlir/Dialect/SCF/Transforms/TileUsingInterface.h +++ b/mlir/include/mlir/Dialect/SCF/Transforms/TileUsingInterface.h @@ -62,8 +62,10 @@ struct SCFTilingOptions { /// Transformation information returned after tiling. struct SCFTilingResult { - /// The tiled operation generated. - Operation *tiledOp; + /// Tiled operations that are generated during tiling. The order does not + /// matter except the last op. The replacements are expected to be the results + /// of the last op. + SmallVector tiledOps; /// The `scf.for` operations that iterate over the tiles. SmallVector loops; /// Values to use as replacements for the untiled op. Is the same size as the @@ -136,6 +138,46 @@ tileConsumerAndFuseProducerGreedilyUsingSCFForOp( FailureOr> lowerToLoopsUsingSCFForOp(RewriterBase &rewriter, TilingInterface op); +/// Transformation information returned after reduction tiling. +struct SCFReductionTilingResult { + /// The partial reduction tiled op generated. + Operation *parallelTiledOp; + /// The final reduction operation merging all the partial reductions. + Operation *mergeOp; + /// Initial op + Operation *initialOp; + /// The `scf.for` operations that iterate over the tiles. + SmallVector loops; +}; + +/// Method to tile a reduction and generate a parallel op within a serial loop. +/// Each of the partial reductions are calculated in parallel. Then after the +/// loop all the partial reduction are merged into a final reduction. +/// For example for the following sequence +/// +/// ```mlir +/// %0 = linalg.generic %in ["parallel", "reduction"] +/// : tensor<7x9xf32> -> tensor<7xf32> +/// ``` +/// +/// into: +/// +/// ```mlir +/// %0 = linalg.fill ... : tensor<7x4xf32> +/// %1 = scf.for ... iter_args(%arg0 = %0) +/// %2 = tensor.extract_slice %arg0 : tensor<7x4xf32> -> tensor<7x?xf32> +/// %3 = tensor.extract_slice %in : tensor<7x9xf32> -> tensor<7x?xf32> +/// %4 = linalg.generic %2, %3 ["parallel", "parallel"] +/// : tensor<7x?xf32> -> tensor<7x?xf32> +/// %5 = tensor.insert_slice %3, %0[0, 0] : tensor<7x4xf32> +/// } +/// %6 = linalg.generic %1 ["parallel", "reduction"] +/// : tensor<7x4xf32> -> tensor<7xf32> +/// ``` +FailureOr +tileReductionUsingScf(PatternRewriter &b, PartialReductionOpInterface op, + ArrayRef tileSize); + } // namespace scf } // namespace mlir diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVCastOps.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVCastOps.td index c985c6e94e19e..8975fa01df403 100644 --- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVCastOps.td +++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVCastOps.td @@ -88,7 +88,7 @@ def SPIRV_BitcastOp : SPIRV_Op<"Bitcast", [Pure]> { let assemblyFormat = [{ $operand attr-dict `:` type($operand) `to` type($result) }]; - let hasCanonicalizer = 1; + let hasFolder = 1; } // ----- diff --git a/mlir/include/mlir/Dialect/SPIRV/Transforms/SPIRVConversion.h b/mlir/include/mlir/Dialect/SPIRV/Transforms/SPIRVConversion.h index 9b480f6cc9e3a..7d362526cc22f 100644 --- a/mlir/include/mlir/Dialect/SPIRV/Transforms/SPIRVConversion.h +++ b/mlir/include/mlir/Dialect/SPIRV/Transforms/SPIRVConversion.h @@ -30,21 +30,21 @@ struct SPIRVConversionOptions { /// The number of bits to store a boolean value. unsigned boolNumBits{8}; - /// Whether to emulate non-32-bit scalar types with 32-bit scalar types if - /// no native support. + /// Whether to emulate narrower scalar types with 32-bit scalar types if not + /// supported by the target. /// /// Non-32-bit scalar types require special hardware support that may not /// exist on all GPUs. This is reflected in SPIR-V as that non-32-bit scalar /// types require special capabilities or extensions. This option controls - /// whether to use 32-bit types to emulate, if a scalar type of a certain - /// bitwidth is not supported in the target environment. This requires the - /// runtime to also feed in data with a matched bitwidth and layout for - /// interface types. The runtime can do that by inspecting the SPIR-V - /// module. + /// whether to use 32-bit types to emulate < 32-bits-wide scalars, if a scalar + /// type of a certain bitwidth is not supported in the target environment. + /// This requires the runtime to also feed in data with a matched bitwidth and + /// layout for interface types. The runtime can do that by inspecting the + /// SPIR-V module. /// /// If the original scalar type has less than 32-bit, a multiple of its /// values will be packed into one 32-bit value to be memory efficient. - bool emulateNon32BitScalarTypes{true}; + bool emulateLT32BitScalarTypes{true}; /// Use 64-bit integers to convert index types. bool use64bitIndex{false}; diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td index 8b8dc46297971..52a6aff752792 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td @@ -518,6 +518,45 @@ def SparseTensor_SortOp : SparseTensor_Op<"sort", [AttrSizedOperandSegments]>, let hasVerifier = 1; } +def SparseTensor_SortCooOp : SparseTensor_Op<"sort_coo">, + Arguments<(ins Index:$n, StridedMemRefRankOf<[AnyInteger, Index], [1]>:$xy, + Variadic>:$ys, + OptionalAttr:$nx, OptionalAttr:$ny, + UnitAttr:$stable)> { + let summary = "Sorts the arrays in xs and ys lexicographically on the " + "integral values found in the xs list"; + let description = [{ + Sparse_tensor.sort_coo is similar to sparse_tensor.sort, except that all the + `xs` values and some `ys` values are put in the linear buffer `xy`. The + optional index attribute `nx` provides the number of `xs` values in `xy`. + When `ns` is not explicitly specified, its value is 1. The optional index + attribute `ny` provides the number of `ys` values in `xy`. When `ny` is not + explicitly specified, its value is 0. This instruction supports the TACO + COO style storage format for better sorting performance. + + The buffer xy should have a dimension not less than n * (nx + ny) while the + buffers in `ys` should have a dimension not less than `n`. The behavior of + the operator is undefined if this condition is not met. + + Example: + + ```mlir + sparse_tensor.sort_coo %n, %x { nx = 2 : index} + : memref + ``` + + ```mlir + sparse_tensor.sort %n, %xy jointly %y1 { nx = 2 : index, ny = 2 : index} + : memref jointly memref + ``` + }]; + + let assemblyFormat = "(`stable` $stable^)? $n" + "`,`$xy (`jointly` $ys^)? attr-dict" + "`:` type($xy) (`jointly` type($ys)^)?"; + let hasVerifier = 1; +} + //===----------------------------------------------------------------------===// // Sparse Tensor Syntax Operations. //===----------------------------------------------------------------------===// @@ -857,21 +896,44 @@ def SparseTensor_YieldOp : SparseTensor_Op<"yield", [Pure, Terminator]>, def SparseTensor_ForeachOp : SparseTensor_Op<"foreach", [SingleBlockImplicitTerminator<"YieldOp">]>, - Arguments<(ins AnyTensor:$tensor)>{ + Arguments<(ins AnyTensor:$tensor, + Variadic:$initArgs)>, + Results<(outs Variadic:$results)> { let summary = "Iterates over elements in a tensor"; let description = [{ Iterates over stored elements in a tensor (which are typically, but not always, non-zero for sparse tensors) and executes the block. - For an input tensor with rank n, the block must take n + 1 arguments. The - first n arguments must be Index type, together indicating the current coordinates - of the element being visited. The last argument must have the same type as the + For an input tensor with rank n, the block must take n + 1 (and additional loop + carried variables as described below) arguments. The first n arguments must be + Index type, together indicating the current coordinates of the element being visited. + The last argument must have the same type as the tensor's element type, representing the actual value loaded from the input tensor at the given coordinates. - Note that foreach generated loop iterates over the stored elements in the storage - order. However, no matter what storage order is used, the indices passed to the block - always obey the original dimension order. + `sparse_tensor.foreach` can also operate on loop-carried variables and returns + the final values after loop termination. The initial values of the variables are + passed as additional SSA operands to the "sparse_tensor.foreach" following the n + 1 + SSA values mentioned above (n coordinate and 1 value). + + The region must terminate with a "sparse_tensor.yield" that passes the current + values of all loop-carried variables to the next iteration, or to the + result, if at the last iteration. The number and static types of loop-carried + variables may not change with iterations. + + For example: + ```mlir + %c0 = arith.constant 0 : i32 + %ret = sparse_tensor.foreach in %0 init(%c0): tensor, i32 -> i32 do { + ^bb0(%arg1: index, %arg2: index, %arg3: i32, %iter: i32): + %sum = arith.add %iter, %arg3 + sparse_tensor.yield %sum + } + ``` + + It is important to note that foreach generated loop iterates over the stored elements + in the storage order. However, no matter what storage order is used, the indices passed + to the block always obey the original dimension order. For example: ```mlir @@ -879,10 +941,10 @@ def SparseTensor_ForeachOp : SparseTensor_Op<"foreach", dimLevelType = [ "compressed", "compressed" ], dimOrdering = affine_map<(i,j) -> (j,i)> }> - + // foreach on a column-major sparse tensor sparse_tensor.foreach in %0 : tensor<2x3xf64, #COL_MAJOR> do { - ^bb0(%row: index, %col: index, %arg3: f64): + ^bb0(%row: index, %col: index, %arg3: f64): // [%row, %col] -> [0, 0], [1, 0], [2, 0], [0, 1], [1, 1], [2, 1] } @@ -892,30 +954,25 @@ def SparseTensor_ForeachOp : SparseTensor_Op<"foreach", // foreach on a row-major sparse tensor sparse_tensor.foreach in %0 : tensor<2x3xf64, #ROW_MAJOR> do { - ^bb0(%row: index, %col: index, %arg3: f64): + ^bb0(%row: index, %col: index, %arg3: f64): // [%row, %col] -> [0, 0], [0, 1], [1, 0], [1, 1], [2, 0], [2, 1] } ``` - - Example: - - ```mlir - sparse_tensor.foreach in %0 : tensor do { - ^bb0(%arg1: index, %arg2: index, %arg3: f64): - do something... - } - ``` }]; let builders = [ - OpBuilder<( - ins "Value":$tensor, - "function_ref")> + OpBuilder<(ins "Value":$tensor, + "function_ref")>, + OpBuilder<(ins "Value":$tensor, + "ValueRange":$iterArgs, + "function_ref")> ]; - let regions = (region AnyRegion:$region); - let assemblyFormat = "`in` $tensor attr-dict `:` type($tensor) `do` $region"; + let regions = (region SizedRegion<1>:$region); + let assemblyFormat = "`in` $tensor (`init``(`$initArgs^`)`)? attr-dict" + " `:` type($tensor) (`,` type($initArgs)^)?" + " (`->` type($results)^)? `do` $region"; let hasVerifier = 1; } diff --git a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td index 2cfdc6d8c6feb..552d2db97435c 100644 --- a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td +++ b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td @@ -191,14 +191,17 @@ def Tensor_EmptyOp : Tensor_Op<"empty", let builders = [ // Build with fully static sizes. - OpBuilder<(ins "ArrayRef":$staticShape, "Type":$elementType)>, + OpBuilder<(ins "ArrayRef":$staticShape, "Type":$elementType, + CArg<"Attribute", "{}">:$encoding)>, // Build with mixed static/dynamic sizes. OpBuilder<(ins "ArrayRef":$staticShape, "Type":$elementType, - "ValueRange":$dynamicSizes)>, + "ValueRange":$dynamicSizes, + CArg<"Attribute", "{}">:$encoding)>, // Build with mixed static/dynamic sizes. - OpBuilder<(ins "ArrayRef":$sizes, "Type":$elementType)> + OpBuilder<(ins "ArrayRef":$sizes, "Type":$elementType, + CArg<"Attribute", "{}">:$encoding)> ]; let hasCanonicalizer = 1; diff --git a/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td b/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td index 4b1bb02ee757a..42f8d5cb27698 100644 --- a/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td +++ b/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td @@ -253,6 +253,11 @@ def SplitHandlesOp : TransformDialectOp<"split_handles", let arguments = (ins TransformTypeInterface:$handle, I64Attr:$num_result_handles); let results = (outs Variadic:$results); + + let builders = [ + OpBuilder<(ins "Value":$handle, "int64_t":$numResultHandles)> + ]; + let assemblyFormat = [{ $handle `in` `[` $num_result_handles `]` attr-dict `:` functional-type(operands, results) @@ -305,6 +310,12 @@ def PrintOp : TransformDialectOp<"print", let arguments = (ins Optional:$target, OptionalAttr:$name); let results = (outs); + + let builders = [ + OpBuilder<(ins CArg<"StringRef", "StringRef()">:$name)>, + OpBuilder<(ins "Value":$target, CArg<"StringRef", "StringRef()">:$name)> + ]; + let assemblyFormat = "$target attr-dict (`:` type($target)^)?"; } diff --git a/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h b/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h index 5086682ac60ee..6fcfcb1dde2a0 100644 --- a/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h +++ b/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h @@ -48,37 +48,6 @@ bool isColumnMajorMatmul(ArrayAttr indexingMaps); /// the reduction. bool isRowMajorBatchMatmul(ArrayAttr indexingMaps); -/// Attribute name for the AffineArrayAttr which encodes the relationship -/// between a structured op iterators' and its operands. -constexpr StringRef getIndexingMapsAttrName() { return "indexing_maps"; } - -/// Attribute name for the StrArrayAttr which encodes the type of a structured -/// op's iterators. -constexpr StringRef getIteratorTypesAttrName() { return "iterator_types"; } - -/// Attribute name for the StrArrayAttr which encodes the distribution type for -/// `linalg.tiled_loop`. -constexpr StringRef getDistributionTypesAttrName() { - return "distribution_types"; -} - -/// Attribute name for the StringAttr which encodes an optional documentation -/// string of the structured op. -constexpr StringRef getDocAttrName() { return "doc"; } - -/// Attribute name for the StrArrayAttr which encodes the external library -/// function that implements the structured op. -constexpr StringRef getLibraryCallAttrName() { return "library_call"; } - -/// Attribute name for the StrArrayAttr which encodes the value of strides. -constexpr StringRef getStridesAttrName() { return "strides"; } - -/// Attribute name for the StrArrayAttr which encodes the value of dilations. -constexpr StringRef getDilationsAttrName() { return "dilations"; } - -/// Attribute name for the StrArrayAttr which encodes the value of paddings. -constexpr StringRef getPaddingAttrName() { return "padding"; } - /// Use to encode that a particular iterator type has parallel semantics. constexpr StringRef getParallelIteratorTypeName() { return "parallel"; } diff --git a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td index b47c5fa32904e..e952284046b54 100644 --- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td +++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td @@ -244,7 +244,7 @@ def Vector_ContractionOp : return getOperand(4).getType().cast(); } Type getResultType() { return getResult().getType(); } - ArrayRef getTraitAttrNames(); + SmallVector getTraitAttrNames(); static unsigned getAccOperandIndex() { return 2; } llvm::SmallVector<::mlir::AffineMap, 4> getIndexingMapsArray() { @@ -265,8 +265,6 @@ def Vector_ContractionOp : std::vector> getContractingDimMap(); std::vector> getBatchDimMap(); - static constexpr StringRef getKindAttrStrName() { return "kind"; } - static CombiningKind getDefaultKind() { return CombiningKind::ADD; } diff --git a/mlir/include/mlir/IR/BuiltinAttributes.td b/mlir/include/mlir/IR/BuiltinAttributes.td index 06eb6cb5f0424..70f47323fc85f 100644 --- a/mlir/include/mlir/IR/BuiltinAttributes.td +++ b/mlir/include/mlir/IR/BuiltinAttributes.td @@ -72,7 +72,7 @@ def Builtin_AffineMapAttr : Builtin_Attr<"AffineMap", [ //===----------------------------------------------------------------------===// def Builtin_ArrayAttr : Builtin_Attr<"Array", [ - DeclareAttrInterfaceMethods + SubElementAttrInterface ]> { let summary = "A collection of other Attribute values"; let description = [{ @@ -510,7 +510,7 @@ def Builtin_DenseResourceElementsAttr : Builtin_Attr<"DenseResourceElements", [ //===----------------------------------------------------------------------===// def Builtin_DictionaryAttr : Builtin_Attr<"Dictionary", [ - DeclareAttrInterfaceMethods + SubElementAttrInterface ]> { let summary = "An dictionary of named Attribute values"; let description = [{ @@ -1115,7 +1115,7 @@ def Builtin_StringAttr : Builtin_Attr<"String", [TypedAttrInterface]> { //===----------------------------------------------------------------------===// def Builtin_SymbolRefAttr : Builtin_Attr<"SymbolRef", [ - DeclareAttrInterfaceMethods + SubElementAttrInterface ]> { let summary = "An Attribute containing a symbolic reference to an Operation"; let description = [{ @@ -1190,7 +1190,7 @@ def Builtin_SymbolRefAttr : Builtin_Attr<"SymbolRef", [ //===----------------------------------------------------------------------===// def Builtin_TypeAttr : Builtin_Attr<"Type", [ - DeclareAttrInterfaceMethods + SubElementAttrInterface ]> { let summary = "An Attribute containing a Type"; let description = [{ diff --git a/mlir/include/mlir/IR/BuiltinLocationAttributes.td b/mlir/include/mlir/IR/BuiltinLocationAttributes.td index ca96fb9e53bbc..0395e13295904 100644 --- a/mlir/include/mlir/IR/BuiltinLocationAttributes.td +++ b/mlir/include/mlir/IR/BuiltinLocationAttributes.td @@ -29,7 +29,7 @@ class Builtin_LocationAttr traits = []> //===----------------------------------------------------------------------===// def CallSiteLoc : Builtin_LocationAttr<"CallSiteLoc", [ - DeclareAttrInterfaceMethods + SubElementAttrInterface ]> { let summary = "A callsite source location"; let description = [{ @@ -108,7 +108,7 @@ def FileLineColLoc : Builtin_LocationAttr<"FileLineColLoc"> { //===----------------------------------------------------------------------===// def FusedLoc : Builtin_LocationAttr<"FusedLoc", [ - DeclareAttrInterfaceMethods + SubElementAttrInterface ]> { let summary = "A tuple of other source locations"; let description = [{ @@ -149,7 +149,7 @@ def FusedLoc : Builtin_LocationAttr<"FusedLoc", [ //===----------------------------------------------------------------------===// def NameLoc : Builtin_LocationAttr<"NameLoc", [ - DeclareAttrInterfaceMethods + SubElementAttrInterface ]> { let summary = "A named source location"; let description = [{ @@ -188,7 +188,7 @@ def NameLoc : Builtin_LocationAttr<"NameLoc", [ //===----------------------------------------------------------------------===// def OpaqueLoc : Builtin_LocationAttr<"OpaqueLoc", [ - DeclareAttrInterfaceMethods + SubElementAttrInterface ]> { let summary = "An opaque source location"; let description = [{ diff --git a/mlir/include/mlir/IR/Location.h b/mlir/include/mlir/IR/Location.h index 03f6e4e55896e..b772cf4b90e39 100644 --- a/mlir/include/mlir/IR/Location.h +++ b/mlir/include/mlir/IR/Location.h @@ -107,6 +107,9 @@ class Location { return LocationAttr(reinterpret_cast(pointer)); } + /// Support llvm style casting. + static bool classof(Attribute attr) { return llvm::isa(attr); } + protected: /// The internal backing location attribute. LocationAttr impl; @@ -167,6 +170,23 @@ inline OpaqueLoc OpaqueLoc::get(T underlyingLocation, MLIRContext *context) { return get(reinterpret_cast(underlyingLocation), TypeID::get(), UnknownLoc::get(context)); } + +//===----------------------------------------------------------------------===// +// SubElementInterfaces +//===----------------------------------------------------------------------===// + +/// Enable locations to be introspected as sub-elements. +template <> +struct AttrTypeSubElementHandler { + static void walk(Location param, AttrTypeSubElementWalker &walker) { + walker.walk(param); + } + static Location replace(Location param, AttrSubElementReplacements &attrRepls, + TypeSubElementReplacements &typeRepls) { + return cast(attrRepls.take_front(1)[0]); + } +}; + } // namespace mlir //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/IR/OperationSupport.h b/mlir/include/mlir/IR/OperationSupport.h index 3ce7ff37c8252..24732decc856a 100644 --- a/mlir/include/mlir/IR/OperationSupport.h +++ b/mlir/include/mlir/IR/OperationSupport.h @@ -894,6 +894,29 @@ struct OperationEquivalence { /// Enable Bitmask enums for OperationEquivalence::Flags. LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); +//===----------------------------------------------------------------------===// +// OperationFingerPrint +//===----------------------------------------------------------------------===// + +/// A unique fingerprint for a specific operation, and all of it's internal +/// operations. +class OperationFingerPrint { +public: + OperationFingerPrint(Operation *topOp); + OperationFingerPrint(const OperationFingerPrint &) = default; + OperationFingerPrint &operator=(const OperationFingerPrint &) = default; + + bool operator==(const OperationFingerPrint &other) const { + return hash == other.hash; + } + bool operator!=(const OperationFingerPrint &other) const { + return !(*this == other); + } + +private: + std::array hash; +}; + } // namespace mlir namespace llvm { diff --git a/mlir/include/mlir/IR/StorageUniquerSupport.h b/mlir/include/mlir/IR/StorageUniquerSupport.h index 074764caf33b1..ff5a0630e4fff 100644 --- a/mlir/include/mlir/IR/StorageUniquerSupport.h +++ b/mlir/include/mlir/IR/StorageUniquerSupport.h @@ -180,6 +180,9 @@ class StorageUserBase : public BaseT, public Traits... { return ConcreteT((const typename BaseT::ImplType *)ptr); } + /// Utility for easy access to the storage instance. + ImplType *getImpl() const { return static_cast(this->impl); } + protected: /// Mutate the current storage instance. This will not change the unique key. /// The arguments are forwarded to 'ConcreteT::mutate'. @@ -199,9 +202,6 @@ class StorageUserBase : public BaseT, public Traits... { return success(); } - /// Utility for easy access to the storage instance. - ImplType *getImpl() const { return static_cast(this->impl); } - private: /// Trait to check if T provides a 'ConcreteEntity' type alias. template diff --git a/mlir/include/mlir/IR/SubElementInterfaces.h b/mlir/include/mlir/IR/SubElementInterfaces.h index 2c40e4edfa0fa..2af7642e93b25 100644 --- a/mlir/include/mlir/IR/SubElementInterfaces.h +++ b/mlir/include/mlir/IR/SubElementInterfaces.h @@ -23,6 +23,272 @@ template using SubElementReplFn = function_ref; template using SubElementResultReplFn = function_ref(T)>; + +//===----------------------------------------------------------------------===// +/// AttrTypeSubElementHandler +//===----------------------------------------------------------------------===// + +/// This class is used by AttrTypeSubElementHandler instances to walking sub +/// attributes and types. +class AttrTypeSubElementWalker { +public: + AttrTypeSubElementWalker(function_ref walkAttrsFn, + function_ref walkTypesFn) + : walkAttrsFn(walkAttrsFn), walkTypesFn(walkTypesFn) {} + + /// Walk an attribute. + void walk(Attribute element) { + if (element) + walkAttrsFn(element); + } + /// Walk a type. + void walk(Type element) { + if (element) + walkTypesFn(element); + } + /// Walk a range of attributes or types. + template + void walkRange(RangeT &&elements) { + for (auto element : elements) + walk(element); + } + +private: + function_ref walkAttrsFn; + function_ref walkTypesFn; +}; + +/// This class is used by AttrTypeSubElementHandler instances to process sub +/// element replacements. +template +class AttrTypeSubElementReplacements { +public: + AttrTypeSubElementReplacements(ArrayRef repls) : repls(repls) {} + + /// Take the first N replacements as an ArrayRef, dropping them from + /// this replacement list. + ArrayRef take_front(unsigned n) { + ArrayRef elements = repls.take_front(n); + repls = repls.drop_front(n); + return elements; + } + +private: + /// The current set of replacements. + ArrayRef repls; +}; +using AttrSubElementReplacements = AttrTypeSubElementReplacements; +using TypeSubElementReplacements = AttrTypeSubElementReplacements; + +/// This class provides support for interacting with the +/// SubElementInterfaces for different types of parameters. An +/// implementation of this class should be provided for any parameter class +/// that may contain an attribute or type. There are two main methods of +/// this class that need to be implemented: +/// +/// - walk +/// +/// This method should traverse into any sub elements of the parameter +/// using the provided walker, or by invoking handlers for sub-types. +/// +/// - replace +/// +/// This method should extract any necessary sub elements using the +/// provided replacer, or by invoking handlers for sub-types. The new +/// post-replacement parameter value should be returned. +/// +template +struct AttrTypeSubElementHandler { + /// Default walk implementation that does nothing. + static inline void walk(const T ¶m, AttrTypeSubElementWalker &walker) {} + + /// Default replace implementation just forwards the parameter. + template + static inline decltype(auto) replace(ParamT &¶m, + AttrSubElementReplacements &attrRepls, + TypeSubElementReplacements &typeRepls) { + return std::forward(param); + } + + /// Tag indicating that this handler does not support sub-elements. + using DefaultHandlerTag = void; +}; + +/// Detect if any of the given parameter types has a sub-element handler. +namespace detail { +template +using has_default_sub_element_handler_t = decltype(T::DefaultHandlerTag); +} // namespace detail +template +inline constexpr bool has_sub_attr_or_type_v = + (!llvm::is_detected::value || + ...); + +/// Implementation for derived Attributes and Types. +template +struct AttrTypeSubElementHandler< + T, std::enable_if_t || + std::is_base_of_v>> { + static void walk(T param, AttrTypeSubElementWalker &walker) { + walker.walk(param); + } + static T replace(T param, AttrSubElementReplacements &attrRepls, + TypeSubElementReplacements &typeRepls) { + if (!param) + return T(); + if constexpr (std::is_base_of_v) { + return cast(attrRepls.take_front(1)[0]); + } else { + return cast(typeRepls.take_front(1)[0]); + } + } +}; +template <> +struct AttrTypeSubElementHandler { + template + static void walk(T param, AttrTypeSubElementWalker &walker) { + walker.walk(param.getName()); + walker.walk(param.getValue()); + } + template + static T replace(T param, AttrSubElementReplacements &attrRepls, + TypeSubElementReplacements &typeRepls) { + ArrayRef paramRepls = attrRepls.take_front(2); + return T(cast(paramRepls[0]), paramRepls[1]); + } +}; +/// Implementation for derived ArrayRef. +template +struct AttrTypeSubElementHandler, + std::enable_if_t>> { + using EltHandler = AttrTypeSubElementHandler; + + static void walk(ArrayRef param, AttrTypeSubElementWalker &walker) { + for (const T &subElement : param) + EltHandler::walk(subElement, walker); + } + static auto replace(ArrayRef param, AttrSubElementReplacements &attrRepls, + TypeSubElementReplacements &typeRepls) { + // Normal attributes/types can extract using the replacer directly. + if constexpr (std::is_base_of_v && + sizeof(T) == sizeof(Attribute)) { + ArrayRef attrs = attrRepls.take_front(param.size()); + return ArrayRef((const T *)attrs.data(), attrs.size()); + } else if constexpr (std::is_base_of_v && + sizeof(T) == sizeof(Type)) { + ArrayRef types = typeRepls.take_front(param.size()); + return ArrayRef((const T *)types.data(), types.size()); + } else { + // Otherwise, we need to allocate storage for the new elements. + SmallVector newElements; + for (const T &element : param) + newElements.emplace_back( + EltHandler::replace(element, attrRepls, typeRepls)); + return newElements; + } + } +}; +/// Implementation for Tuple. +template +struct AttrTypeSubElementHandler< + std::tuple, std::enable_if_t>> { + static void walk(const std::tuple ¶m, + AttrTypeSubElementWalker &walker) { + std::apply( + [&](const Ts &...params) { + (AttrTypeSubElementHandler::walk(params, walker), ...); + }, + param); + } + static auto replace(const std::tuple ¶m, + AttrSubElementReplacements &attrRepls, + TypeSubElementReplacements &typeRepls) { + return std::apply( + [&](const Ts &...params) + -> std::tuple::replace( + params, attrRepls, typeRepls))...> { + return {AttrTypeSubElementHandler::replace(params, attrRepls, + typeRepls)...}; + }, + param); + } +}; + +namespace detail { +template +struct is_tuple : public std::false_type {}; +template +struct is_tuple> : public std::true_type {}; +template +using has_get_method = decltype(T::get(std::declval()...)); + +/// This function provides the underlying implementation for the +/// SubElementInterface walk method, using the key type of the derived +/// attribute/type to interact with the individual parameters. +template +void walkImmediateSubElementsImpl(T derived, + function_ref walkAttrsFn, + function_ref walkTypesFn) { + auto key = static_cast(derived.getImpl())->getAsKey(); + + // If we don't have any sub-elements, there is nothing to do. + if constexpr (!has_sub_attr_or_type_v) { + return; + } else { + AttrTypeSubElementWalker walker(walkAttrsFn, walkTypesFn); + AttrTypeSubElementHandler::walk(key, walker); + } +} + +/// This function invokes the proper `get` method for a type `T` with the given +/// values. +template +T constructSubElementReplacement(MLIRContext *ctx, Ts &&...params) { + // Prefer a direct `get` method if one exists. + if constexpr (llvm::is_detected::value) { + (void)ctx; + return T::get(std::forward(params)...); + } else if constexpr (llvm::is_detected::value) { + return T::get(ctx, std::forward(params)...); + } else { + // Otherwise, pass to the base get. + return T::Base::get(ctx, std::forward(params)...); + } +} + +/// This function provides the underlying implementation for the +/// SubElementInterface replace method, using the key type of the derived +/// attribute/type to interact with the individual parameters. +template +T replaceImmediateSubElementsImpl(T derived, ArrayRef &replAttrs, + ArrayRef &replTypes) { + auto key = static_cast(derived.getImpl())->getAsKey(); + + // If we don't have any sub-elements, we can just return the original. + if constexpr (!has_sub_attr_or_type_v) { + return derived; + + // Otherwise, we need to replace any necessary sub-elements. + } else { + AttrSubElementReplacements attrRepls(replAttrs); + TypeSubElementReplacements typeRepls(replTypes); + auto newKey = AttrTypeSubElementHandler::replace( + key, attrRepls, typeRepls); + if constexpr (is_tuple::value) { + return std::apply( + [&](auto &&...params) { + return constructSubElementReplacement( + derived.getContext(), + std::forward(params)...); + }, + newKey); + } else { + return constructSubElementReplacement(derived.getContext(), newKey); + } + } +} +} // namespace detail } // namespace mlir /// Include the definitions of the sub elemnt interfaces. diff --git a/mlir/include/mlir/IR/SubElementInterfaces.td b/mlir/include/mlir/IR/SubElementInterfaces.td index 3718b38238c23..abb5afcc93aa1 100644 --- a/mlir/include/mlir/IR/SubElementInterfaces.td +++ b/mlir/include/mlir/IR/SubElementInterfaces.td @@ -32,7 +32,11 @@ class SubElementInterfaceBase":$walkAttrsFn, - "llvm::function_ref":$walkTypesFn) + "llvm::function_ref":$walkTypesFn), + /*methodBody=*/[{}], /*defaultImplementation=*/[{ + ::mlir::detail::walkImmediateSubElementsImpl( + }] # derivedValue # [{, walkAttrsFn, walkTypesFn); + }] >, InterfaceMethod< /*desc=*/[{ @@ -47,10 +51,13 @@ class SubElementInterfaceBase":$replAttrs, - "::llvm::ArrayRef<::mlir::Type>":$replTypes - )>, + }], attrOrType, "replaceImmediateSubElements", + (ins "::llvm::ArrayRef<::mlir::Attribute>":$replAttrs, + "::llvm::ArrayRef<::mlir::Type>":$replTypes), + /*methodBody=*/[{}], /*defaultImplementation=*/[{ + return ::mlir::detail::replaceImmediateSubElementsImpl( + }] # derivedValue # [{, replAttrs, replTypes); + }]>, ]; code extraClassDeclaration = [{ @@ -154,6 +161,9 @@ def SubElementAttrInterface let description = [{ An interface used to query and manipulate sub-elements, such as sub-types and sub-attributes of a composite attribute. + + To support the introspection of custom parameters that hold sub-elements, + a specialization of the `AttrTypeSubElementHandler` class must be provided. }]; } @@ -168,6 +178,9 @@ def SubElementTypeInterface let description = [{ An interface used to query and manipulate sub-elements, such as sub-types and sub-attributes of a composite type. + + To support the introspection of custom parameters that hold sub-elements, + a specialization of the `AttrTypeSubElementHandler` class must be provided. }]; } diff --git a/mlir/include/mlir/IR/SymbolTable.h b/mlir/include/mlir/IR/SymbolTable.h index 3cc48000e1944..a24693575f033 100644 --- a/mlir/include/mlir/IR/SymbolTable.h +++ b/mlir/include/mlir/IR/SymbolTable.h @@ -249,7 +249,7 @@ class SymbolTableCollection { Operation *lookupSymbolIn(Operation *symbolTableOp, StringAttr symbol); Operation *lookupSymbolIn(Operation *symbolTableOp, SymbolRefAttr name); template - T lookupSymbolIn(Operation *symbolTableOp, NameT &&name) const { + T lookupSymbolIn(Operation *symbolTableOp, NameT &&name) { return dyn_cast_or_null( lookupSymbolIn(symbolTableOp, std::forward(name))); } diff --git a/mlir/include/mlir/IR/TypeRange.h b/mlir/include/mlir/IR/TypeRange.h index 5bbab1f994ece..7f65707e98726 100644 --- a/mlir/include/mlir/IR/TypeRange.h +++ b/mlir/include/mlir/IR/TypeRange.h @@ -165,6 +165,23 @@ inline bool operator==(ArrayRef lhs, const ValueTypeRange &rhs) { std::equal(lhs.begin(), lhs.end(), rhs.begin()); } +//===----------------------------------------------------------------------===// +// SubElementInterfaces +//===----------------------------------------------------------------------===// + +/// Enable TypeRange to be introspected for sub-elements. +template <> +struct AttrTypeSubElementHandler { + static void walk(TypeRange param, AttrTypeSubElementWalker &walker) { + walker.walkRange(param); + } + static TypeRange replace(TypeRange param, + AttrSubElementReplacements &attrRepls, + TypeSubElementReplacements &typeRepls) { + return typeRepls.take_front(param.size()); + } +}; + } // namespace mlir namespace llvm { diff --git a/mlir/include/mlir/Interfaces/TilingInterface.td b/mlir/include/mlir/Interfaces/TilingInterface.td index 0cdf7a8eb649a..dc6ffcbb7accc 100644 --- a/mlir/include/mlir/Interfaces/TilingInterface.td +++ b/mlir/include/mlir/Interfaces/TilingInterface.td @@ -155,4 +155,72 @@ def TilingInterface : OpInterface<"TilingInterface"> { > ]; } + +def PartialReductionOpInterface : OpInterface<"PartialReductionOpInterface"> { + let description = [{ + Interface for allowing operations to expose information needed to + tile reductions using partial reduction followed by merge. This is + complementary to TilingInterface to tile reductions. + }]; + let cppNamespace = "::mlir"; + let methods = [ + InterfaceMethod< + /*desc=*/[{ + Method to generate a tensor initalized with the identity value of the + operation reduction. The tensor shape is equal to operation result + shape with new dimension for each non zero tile size. + }], + /*retType=*/"FailureOr", + /*methodName=*/"generateInitialTensorForPartialReduction", + /*args=*/(ins + "OpBuilder &":$b, + "Location ":$loc, + "ArrayRef":$sizes, + "ArrayRef":$reductionDim), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + return failure(); + }] + >, + InterfaceMethod< + /*desc=*/[{ + Method to generate a tiled version of the operation where the tiled + reduction dimension are converted to parallel dimensions with a size + less or equal to the tile size. This is meant to be used with + `mergeReductions` method which will combine the partial reductions. + }], + /*retType=*/"Operation*", + /*methodName=*/"tileToPartialReduction", + /*args=*/(ins + "OpBuilder &":$b, + "Location ":$loc, + "ValueRange":$init, + "ArrayRef":$offsets, + "ArrayRef":$sizes, + "ArrayRef":$reductionDims), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + return nullptr; + }] + >, + InterfaceMethod< + /*desc=*/[{ + Method to merge partial reductions for an operation that has been + tiled along the reduction dimensions. This will only apply the + reduction the operation. + }], + /*retType=*/"Operation*", + /*methodName=*/"mergeReductions", + /*args=*/(ins + "OpBuilder &":$b, + "Location ":$loc, + "ValueRange":$partialReduce, + "ArrayRef":$reductionDim), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + return nullptr; + }] + > + ]; +} #endif // MLIR_TILINGINTERFACE diff --git a/mlir/include/mlir/Pass/PassManager.h b/mlir/include/mlir/Pass/PassManager.h index ec858a7415ab1..058e2d738b388 100644 --- a/mlir/include/mlir/Pass/PassManager.h +++ b/mlir/include/mlir/Pass/PassManager.h @@ -75,6 +75,7 @@ class OpPassManager { OpPassManager(const OpPassManager &rhs); ~OpPassManager(); OpPassManager &operator=(const OpPassManager &rhs); + OpPassManager &operator=(OpPassManager &&rhs); /// Iterator over the passes in this pass manager. using pass_iterator = diff --git a/mlir/include/mlir/Pass/PassRegistry.h b/mlir/include/mlir/Pass/PassRegistry.h index 4f261e533ad15..97692262acc8e 100644 --- a/mlir/include/mlir/Pass/PassRegistry.h +++ b/mlir/include/mlir/Pass/PassRegistry.h @@ -231,7 +231,8 @@ struct PassPipelineCLParserImpl; /// options for each of the passes and pipelines that have been registered with /// the pass registry; Meaning that `-cse` will refer to the CSE pass in MLIR. /// It also registers an argument, `pass-pipeline`, that supports parsing a -/// textual description of a pipeline. +/// textual description of a pipeline. This option is mutually exclusive with +/// the individual pass options. class PassPipelineCLParser { public: /// Construct a pass pipeline parser with the given command line description. @@ -254,6 +255,8 @@ class PassPipelineCLParser { private: std::unique_ptr impl; + + llvm::cl::opt passPipeline; }; /// This class implements a command-line parser specifically for MLIR pass diff --git a/mlir/include/mlir/Transforms/DialectConversion.h b/mlir/include/mlir/Transforms/DialectConversion.h index 061edb196f0fc..6045b2237976e 100644 --- a/mlir/include/mlir/Transforms/DialectConversion.h +++ b/mlir/include/mlir/Transforms/DialectConversion.h @@ -507,6 +507,9 @@ void populateFunctionOpInterfaceTypeConversionPattern( patterns, converter); } +void populateAnyFunctionOpInterfaceTypeConversionPattern( + RewritePatternSet &patterns, TypeConverter &converter); + //===----------------------------------------------------------------------===// // Conversion PatternRewriter //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Analysis/SliceAnalysis.cpp b/mlir/lib/Analysis/SliceAnalysis.cpp index e343c47bb0f01..4684ccfa21b8a 100644 --- a/mlir/lib/Analysis/SliceAnalysis.cpp +++ b/mlir/lib/Analysis/SliceAnalysis.cpp @@ -98,10 +98,11 @@ static void getBackwardSliceImpl(Operation *op, // TODO: determine whether we want to recurse backward into the other // blocks of parentOp, which are not technically backward unless they flow // into us. For now, just bail. - assert(parentOp->getNumRegions() == 1 && - parentOp->getRegion(0).getBlocks().size() == 1); - if (backwardSlice->count(parentOp) == 0) + if (parentOp && backwardSlice->count(parentOp) == 0) { + assert(parentOp->getNumRegions() == 1 && + parentOp->getRegion(0).getBlocks().size() == 1); getBackwardSliceImpl(parentOp, backwardSlice, filter); + } } else { llvm_unreachable("No definingOp and not a block argument."); } diff --git a/mlir/lib/Bindings/Python/Pass.cpp b/mlir/lib/Bindings/Python/Pass.cpp index 99d67582d1780..cb3c1586eb996 100644 --- a/mlir/lib/Bindings/Python/Pass.cpp +++ b/mlir/lib/Bindings/Python/Pass.cpp @@ -56,11 +56,14 @@ void mlir::python::populatePassManagerSubmodule(py::module &m) { // Mapping of the top-level PassManager //---------------------------------------------------------------------------- py::class_(m, "PassManager", py::module_local()) - .def(py::init<>([](DefaultingPyMlirContext context) { - MlirPassManager passManager = - mlirPassManagerCreate(context->get()); + .def(py::init<>([](const std::string &anchorOp, + DefaultingPyMlirContext context) { + MlirPassManager passManager = mlirPassManagerCreateOnOperation( + context->get(), + mlirStringRefCreate(anchorOp.data(), anchorOp.size())); return new PyPassManager(passManager); }), + py::arg("anchor_op") = py::str("any"), py::arg("context") = py::none(), "Create a new PassManager for the current (or provided) Context.") .def_property_readonly(MLIR_PYTHON_CAPI_PTR_ATTR, @@ -85,7 +88,7 @@ void mlir::python::populatePassManagerSubmodule(py::module &m) { [](const std::string &pipeline, DefaultingPyMlirContext context) { MlirPassManager passManager = mlirPassManagerCreate(context->get()); PyPrintAccumulator errorMsg; - MlirLogicalResult status = mlirOpPassManagerAddPipeline( + MlirLogicalResult status = mlirParsePassPipeline( mlirPassManagerGetAsOpPassManager(passManager), mlirStringRefCreate(pipeline.data(), pipeline.size()), errorMsg.getCallback(), errorMsg.getUserData()); @@ -97,6 +100,20 @@ void mlir::python::populatePassManagerSubmodule(py::module &m) { "Parse a textual pass-pipeline and return a top-level PassManager " "that can be applied on a Module. Throw a ValueError if the pipeline " "can't be parsed") + .def( + "add", + [](PyPassManager &passManager, const std::string &pipeline) { + PyPrintAccumulator errorMsg; + MlirLogicalResult status = mlirOpPassManagerAddPipeline( + mlirPassManagerGetAsOpPassManager(passManager.get()), + mlirStringRefCreate(pipeline.data(), pipeline.size()), + errorMsg.getCallback(), errorMsg.getUserData()); + if (mlirLogicalResultIsFailure(status)) + throw SetPyError(PyExc_ValueError, std::string(errorMsg.join())); + }, + py::arg("pipeline"), + "Add textual pipeline elements to the pass manager. Throws a " + "ValueError if the pipeline can't be parsed.") .def( "run", [](PyPassManager &passManager, PyModule &module) { diff --git a/mlir/lib/CAPI/IR/Pass.cpp b/mlir/lib/CAPI/IR/Pass.cpp index 30f5804876940..4afc668592bd8 100644 --- a/mlir/lib/CAPI/IR/Pass.cpp +++ b/mlir/lib/CAPI/IR/Pass.cpp @@ -86,10 +86,14 @@ void mlirPrintPassPipeline(MlirOpPassManager passManager, } MlirLogicalResult mlirParsePassPipeline(MlirOpPassManager passManager, - MlirStringRef pipeline) { - // TODO: errors are sent to std::errs() at the moment, we should pass in a - // stream and redirect to a diagnostic. - return wrap(mlir::parsePassPipeline(unwrap(pipeline), *unwrap(passManager))); + MlirStringRef pipeline, + MlirStringCallback callback, + void *userData) { + detail::CallbackOstream stream(callback, userData); + FailureOr pm = parsePassPipeline(unwrap(pipeline), stream); + if (succeeded(pm)) + *unwrap(passManager) = std::move(*pm); + return wrap(pm); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Conversion/ArithCommon/AttrToLLVMConverter.cpp b/mlir/lib/Conversion/ArithCommon/AttrToLLVMConverter.cpp new file mode 100644 index 0000000000000..8c5d76f9f2d72 --- /dev/null +++ b/mlir/lib/Conversion/ArithCommon/AttrToLLVMConverter.cpp @@ -0,0 +1,38 @@ +//===- AttrToLLVMConverter.cpp - Arith attributes conversion to LLVM ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Conversion/ArithCommon/AttrToLLVMConverter.h" + +using namespace mlir; + +// Map arithmetic fastmath enum values to LLVMIR enum values. +LLVM::FastmathFlags +mlir::arith::convertArithFastMathFlagsToLLVM(arith::FastMathFlags arithFMF) { + LLVM::FastmathFlags llvmFMF{}; + const std::pair flags[] = { + {arith::FastMathFlags::nnan, LLVM::FastmathFlags::nnan}, + {arith::FastMathFlags::ninf, LLVM::FastmathFlags::ninf}, + {arith::FastMathFlags::nsz, LLVM::FastmathFlags::nsz}, + {arith::FastMathFlags::arcp, LLVM::FastmathFlags::arcp}, + {arith::FastMathFlags::contract, LLVM::FastmathFlags::contract}, + {arith::FastMathFlags::afn, LLVM::FastmathFlags::afn}, + {arith::FastMathFlags::reassoc, LLVM::FastmathFlags::reassoc}}; + for (auto fmfMap : flags) { + if (bitEnumContainsAny(arithFMF, fmfMap.first)) + llvmFMF = llvmFMF | fmfMap.second; + } + return llvmFMF; +} + +// Create an LLVM fastmath attribute from a given arithmetic fastmath attribute. +LLVM::FastmathFlagsAttr +mlir::arith::convertArithFastMathAttrToLLVM(arith::FastMathFlagsAttr fmfAttr) { + arith::FastMathFlags arithFMF = fmfAttr.getValue(); + return LLVM::FastmathFlagsAttr::get( + fmfAttr.getContext(), convertArithFastMathFlagsToLLVM(arithFMF)); +} diff --git a/mlir/lib/Conversion/ArithCommon/CMakeLists.txt b/mlir/lib/Conversion/ArithCommon/CMakeLists.txt new file mode 100644 index 0000000000000..888c45f2e52fe --- /dev/null +++ b/mlir/lib/Conversion/ArithCommon/CMakeLists.txt @@ -0,0 +1,10 @@ +add_mlir_conversion_library(MLIRArithAttrToLLVMConversion + AttrToLLVMConverter.cpp + + LINK_COMPONENTS + Core + + LINK_LIBS PUBLIC + MLIRArithDialect + MLIRLLVMDialect + ) diff --git a/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp b/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp index f2814b56d4d34..3ad01556b2f69 100644 --- a/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp +++ b/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp @@ -8,6 +8,7 @@ #include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h" +#include "mlir/Conversion/ArithCommon/AttrToLLVMConverter.h" #include "mlir/Conversion/LLVMCommon/ConversionTarget.h" #include "mlir/Conversion/LLVMCommon/VectorPattern.h" #include "mlir/Dialect/Arith/IR/Arith.h" @@ -24,93 +25,20 @@ using namespace mlir; namespace { -// Map arithmetic fastmath enum values to LLVMIR enum values. -static LLVM::FastmathFlags -convertArithFastMathFlagsToLLVM(arith::FastMathFlags arithFMF) { - LLVM::FastmathFlags llvmFMF{}; - const std::pair flags[] = { - {arith::FastMathFlags::nnan, LLVM::FastmathFlags::nnan}, - {arith::FastMathFlags::ninf, LLVM::FastmathFlags::ninf}, - {arith::FastMathFlags::nsz, LLVM::FastmathFlags::nsz}, - {arith::FastMathFlags::arcp, LLVM::FastmathFlags::arcp}, - {arith::FastMathFlags::contract, LLVM::FastmathFlags::contract}, - {arith::FastMathFlags::afn, LLVM::FastmathFlags::afn}, - {arith::FastMathFlags::reassoc, LLVM::FastmathFlags::reassoc}}; - for (auto fmfMap : flags) { - if (bitEnumContainsAny(arithFMF, fmfMap.first)) - llvmFMF = llvmFMF | fmfMap.second; - } - return llvmFMF; -} - -// Create an LLVM fastmath attribute from a given arithmetic fastmath attribute. -static LLVM::FastmathFlagsAttr -convertArithFastMathAttr(arith::FastMathFlagsAttr fmfAttr) { - arith::FastMathFlags arithFMF = fmfAttr.getValue(); - return LLVM::FastmathFlagsAttr::get( - fmfAttr.getContext(), convertArithFastMathFlagsToLLVM(arithFMF)); -} - -// Attribute converter that populates a NamedAttrList by removing the fastmath -// attribute from the source operation attributes, and replacing it with an -// equivalent LLVM fastmath attribute. -template -class AttrConvertFastMath { -public: - AttrConvertFastMath(SourceOp srcOp) { - // Copy the source attributes. - convertedAttr = NamedAttrList{srcOp->getAttrs()}; - // Get the name of the arith fastmath attribute. - llvm::StringRef arithFMFAttrName = SourceOp::getFastMathAttrName(); - // Remove the source fastmath attribute. - auto arithFMFAttr = convertedAttr.erase(arithFMFAttrName) - .template dyn_cast_or_null(); - if (arithFMFAttr) { - llvm::StringRef targetAttrName = TargetOp::getFastmathAttrName(); - convertedAttr.set(targetAttrName, convertArithFastMathAttr(arithFMFAttr)); - } - } - - ArrayRef getAttrs() const { return convertedAttr.getAttrs(); } - -private: - NamedAttrList convertedAttr; -}; - -// Attribute converter that populates a NamedAttrList by removing the fastmath -// attribute from the source operation attributes. This may be useful for -// target operations that do not require the fastmath attribute, or for targets -// that do not yet support the LLVM fastmath attribute. -template -class AttrDropFastMath { -public: - AttrDropFastMath(SourceOp srcOp) { - // Copy the source attributes. - convertedAttr = NamedAttrList{srcOp->getAttrs()}; - // Get the name of the arith fastmath attribute. - llvm::StringRef arithFMFAttrName = SourceOp::getFastMathAttrName(); - // Remove the source fastmath attribute. - convertedAttr.erase(arithFMFAttrName); - } - - ArrayRef getAttrs() const { return convertedAttr.getAttrs(); } - -private: - NamedAttrList convertedAttr; -}; - //===----------------------------------------------------------------------===// // Straightforward Op Lowerings //===----------------------------------------------------------------------===// -using AddFOpLowering = VectorConvertToLLVMPattern; +using AddFOpLowering = + VectorConvertToLLVMPattern; using AddIOpLowering = VectorConvertToLLVMPattern; using AndIOpLowering = VectorConvertToLLVMPattern; using BitcastOpLowering = VectorConvertToLLVMPattern; -using DivFOpLowering = VectorConvertToLLVMPattern; +using DivFOpLowering = + VectorConvertToLLVMPattern; using DivSIOpLowering = VectorConvertToLLVMPattern; using DivUIOpLowering = @@ -124,29 +52,31 @@ using FPToSIOpLowering = VectorConvertToLLVMPattern; using FPToUIOpLowering = VectorConvertToLLVMPattern; -// TODO: Add LLVM intrinsic support for fastmath using MaxFOpLowering = - VectorConvertToLLVMPattern; + VectorConvertToLLVMPattern; using MaxSIOpLowering = VectorConvertToLLVMPattern; using MaxUIOpLowering = VectorConvertToLLVMPattern; -// TODO: Add LLVM intrinsic support for fastmath using MinFOpLowering = - VectorConvertToLLVMPattern; + VectorConvertToLLVMPattern; using MinSIOpLowering = VectorConvertToLLVMPattern; using MinUIOpLowering = VectorConvertToLLVMPattern; -using MulFOpLowering = VectorConvertToLLVMPattern; +using MulFOpLowering = + VectorConvertToLLVMPattern; using MulIOpLowering = VectorConvertToLLVMPattern; -using NegFOpLowering = VectorConvertToLLVMPattern; +using NegFOpLowering = + VectorConvertToLLVMPattern; using OrIOpLowering = VectorConvertToLLVMPattern; -// TODO: Add LLVM intrinsic support for fastmath using RemFOpLowering = - VectorConvertToLLVMPattern; + VectorConvertToLLVMPattern; using RemSIOpLowering = VectorConvertToLLVMPattern; using RemUIOpLowering = @@ -160,8 +90,9 @@ using ShRUIOpLowering = VectorConvertToLLVMPattern; using SIToFPOpLowering = VectorConvertToLLVMPattern; -using SubFOpLowering = VectorConvertToLLVMPattern; +using SubFOpLowering = + VectorConvertToLLVMPattern; using SubIOpLowering = VectorConvertToLLVMPattern; using TruncFOpLowering = VectorConvertToLLVMPattern; diff --git a/mlir/lib/Conversion/ArithToLLVM/CMakeLists.txt b/mlir/lib/Conversion/ArithToLLVM/CMakeLists.txt index 45ee8708aa155..bb1fa2fbb6577 100644 --- a/mlir/lib/Conversion/ArithToLLVM/CMakeLists.txt +++ b/mlir/lib/Conversion/ArithToLLVM/CMakeLists.txt @@ -11,6 +11,7 @@ add_mlir_conversion_library(MLIRArithToLLVM Core LINK_LIBS PUBLIC + MLIRArithAttrToLLVMConversion MLIRArithDialect MLIRLLVMCommonConversion MLIRLLVMDialect diff --git a/mlir/lib/Conversion/ArithToSPIRV/ArithToSPIRV.cpp b/mlir/lib/Conversion/ArithToSPIRV/ArithToSPIRV.cpp index 2452928dd4503..cf65beb924fb7 100644 --- a/mlir/lib/Conversion/ArithToSPIRV/ArithToSPIRV.cpp +++ b/mlir/lib/Conversion/ArithToSPIRV/ArithToSPIRV.cpp @@ -1031,7 +1031,7 @@ struct ConvertArithToSPIRVPass auto target = SPIRVConversionTarget::get(targetAttr); SPIRVConversionOptions options; - options.emulateNon32BitScalarTypes = this->emulateNon32BitScalarTypes; + options.emulateLT32BitScalarTypes = this->emulateLT32BitScalarTypes; options.enableFastMathMode = this->enableFastMath; SPIRVTypeConverter typeConverter(targetAttr, options); diff --git a/mlir/lib/Conversion/CMakeLists.txt b/mlir/lib/Conversion/CMakeLists.txt index a65814d36b5b4..62dae19a31344 100644 --- a/mlir/lib/Conversion/CMakeLists.txt +++ b/mlir/lib/Conversion/CMakeLists.txt @@ -1,5 +1,6 @@ add_subdirectory(AffineToStandard) add_subdirectory(AMDGPUToROCDL) +add_subdirectory(ArithCommon) add_subdirectory(ArithToLLVM) add_subdirectory(ArithToSPIRV) add_subdirectory(ArmNeon2dToIntr) diff --git a/mlir/lib/Conversion/ControlFlowToSPIRV/ControlFlowToSPIRVPass.cpp b/mlir/lib/Conversion/ControlFlowToSPIRV/ControlFlowToSPIRVPass.cpp index 0d1e8b8079465..d8aecae257b46 100644 --- a/mlir/lib/Conversion/ControlFlowToSPIRV/ControlFlowToSPIRVPass.cpp +++ b/mlir/lib/Conversion/ControlFlowToSPIRV/ControlFlowToSPIRVPass.cpp @@ -41,7 +41,7 @@ void ConvertControlFlowToSPIRVPass::runOnOperation() { SPIRVConversionTarget::get(targetAttr); SPIRVConversionOptions options; - options.emulateNon32BitScalarTypes = this->emulateNon32BitScalarTypes; + options.emulateLT32BitScalarTypes = this->emulateLT32BitScalarTypes; SPIRVTypeConverter typeConverter(targetAttr, options); RewritePatternSet patterns(context); diff --git a/mlir/lib/Conversion/FuncToSPIRV/FuncToSPIRVPass.cpp b/mlir/lib/Conversion/FuncToSPIRV/FuncToSPIRVPass.cpp index a82ba5dd12a5d..9fffc5e3182e9 100644 --- a/mlir/lib/Conversion/FuncToSPIRV/FuncToSPIRVPass.cpp +++ b/mlir/lib/Conversion/FuncToSPIRV/FuncToSPIRVPass.cpp @@ -40,7 +40,7 @@ void ConvertFuncToSPIRVPass::runOnOperation() { SPIRVConversionTarget::get(targetAttr); SPIRVConversionOptions options; - options.emulateNon32BitScalarTypes = this->emulateNon32BitScalarTypes; + options.emulateLT32BitScalarTypes = this->emulateLT32BitScalarTypes; SPIRVTypeConverter typeConverter(targetAttr, options); RewritePatternSet patterns(context); diff --git a/mlir/lib/Conversion/IndexToLLVM/IndexToLLVM.cpp b/mlir/lib/Conversion/IndexToLLVM/IndexToLLVM.cpp index 844c57a74a198..4461d5121ef01 100644 --- a/mlir/lib/Conversion/IndexToLLVM/IndexToLLVM.cpp +++ b/mlir/lib/Conversion/IndexToLLVM/IndexToLLVM.cpp @@ -268,6 +268,11 @@ using ConvertIndexMaxS = mlir::OneToOneConvertToLLVMPattern; using ConvertIndexMaxU = mlir::OneToOneConvertToLLVMPattern; +using ConvertIndexShl = mlir::OneToOneConvertToLLVMPattern; +using ConvertIndexShrS = + mlir::OneToOneConvertToLLVMPattern; +using ConvertIndexShrU = + mlir::OneToOneConvertToLLVMPattern; using ConvertIndexBoolConstant = mlir::OneToOneConvertToLLVMPattern; @@ -290,6 +295,9 @@ void index::populateIndexToLLVMConversionPatterns( ConvertIndexRemU, ConvertIndexMaxS, ConvertIndexMaxU, + ConvertIndexShl, + ConvertIndexShrS, + ConvertIndexShrU, ConvertIndexCeilDivS, ConvertIndexCeilDivU, ConvertIndexFloorDivS, diff --git a/mlir/lib/Conversion/LLVMCommon/MemRefBuilder.cpp b/mlir/lib/Conversion/LLVMCommon/MemRefBuilder.cpp index a65ac51c31c63..4f72cd1081f0e 100644 --- a/mlir/lib/Conversion/LLVMCommon/MemRefBuilder.cpp +++ b/mlir/lib/Conversion/LLVMCommon/MemRefBuilder.cpp @@ -43,6 +43,12 @@ MemRefDescriptor MemRefDescriptor::fromStaticShape(OpBuilder &builder, Location loc, LLVMTypeConverter &typeConverter, MemRefType type, Value memory) { + return fromStaticShape(builder, loc, typeConverter, type, memory, memory); +} + +MemRefDescriptor MemRefDescriptor::fromStaticShape( + OpBuilder &builder, Location loc, LLVMTypeConverter &typeConverter, + MemRefType type, Value memory, Value alignedMemory) { assert(type.hasStaticShape() && "unexpected dynamic shape"); // Extract all strides and offsets and verify they are static. @@ -61,7 +67,7 @@ MemRefDescriptor::fromStaticShape(OpBuilder &builder, Location loc, auto descr = MemRefDescriptor::undef(builder, loc, convertedType); descr.setAllocatedPtr(builder, loc, memory); - descr.setAlignedPtr(builder, loc, memory); + descr.setAlignedPtr(builder, loc, alignedMemory); descr.setConstantOffset(builder, loc, offset); // Fill in sizes and strides diff --git a/mlir/lib/Conversion/MathToLLVM/CMakeLists.txt b/mlir/lib/Conversion/MathToLLVM/CMakeLists.txt index a6e6b4f56d37e..97393fc849691 100644 --- a/mlir/lib/Conversion/MathToLLVM/CMakeLists.txt +++ b/mlir/lib/Conversion/MathToLLVM/CMakeLists.txt @@ -11,6 +11,7 @@ add_mlir_conversion_library(MLIRMathToLLVM Core LINK_LIBS PUBLIC + MLIRArithAttrToLLVMConversion MLIRLLVMCommonConversion MLIRLLVMDialect MLIRMathDialect diff --git a/mlir/lib/Conversion/MathToLLVM/MathToLLVM.cpp b/mlir/lib/Conversion/MathToLLVM/MathToLLVM.cpp index b67a86f443b5c..b5ce019b20832 100644 --- a/mlir/lib/Conversion/MathToLLVM/MathToLLVM.cpp +++ b/mlir/lib/Conversion/MathToLLVM/MathToLLVM.cpp @@ -8,6 +8,7 @@ #include "mlir/Conversion/MathToLLVM/MathToLLVM.h" +#include "mlir/Conversion/ArithCommon/AttrToLLVMConverter.h" #include "mlir/Conversion/LLVMCommon/ConversionTarget.h" #include "mlir/Conversion/LLVMCommon/Pattern.h" #include "mlir/Conversion/LLVMCommon/VectorPattern.h" @@ -24,31 +25,39 @@ namespace mlir { using namespace mlir; namespace { -using AbsFOpLowering = VectorConvertToLLVMPattern; -using CeilOpLowering = VectorConvertToLLVMPattern; + +template +using ConvertFastMath = arith::AttrConvertFastMathToLLVM; + +template +using ConvertFMFMathToLLVMPattern = + VectorConvertToLLVMPattern; + +using AbsFOpLowering = ConvertFMFMathToLLVMPattern; +using CeilOpLowering = ConvertFMFMathToLLVMPattern; using CopySignOpLowering = - VectorConvertToLLVMPattern; -using CosOpLowering = VectorConvertToLLVMPattern; + ConvertFMFMathToLLVMPattern; +using CosOpLowering = ConvertFMFMathToLLVMPattern; using CtPopFOpLowering = VectorConvertToLLVMPattern; -using Exp2OpLowering = VectorConvertToLLVMPattern; -using ExpOpLowering = VectorConvertToLLVMPattern; +using Exp2OpLowering = ConvertFMFMathToLLVMPattern; +using ExpOpLowering = ConvertFMFMathToLLVMPattern; using FloorOpLowering = - VectorConvertToLLVMPattern; -using FmaOpLowering = VectorConvertToLLVMPattern; + ConvertFMFMathToLLVMPattern; +using FmaOpLowering = ConvertFMFMathToLLVMPattern; using Log10OpLowering = - VectorConvertToLLVMPattern; -using Log2OpLowering = VectorConvertToLLVMPattern; -using LogOpLowering = VectorConvertToLLVMPattern; -using PowFOpLowering = VectorConvertToLLVMPattern; + ConvertFMFMathToLLVMPattern; +using Log2OpLowering = ConvertFMFMathToLLVMPattern; +using LogOpLowering = ConvertFMFMathToLLVMPattern; +using PowFOpLowering = ConvertFMFMathToLLVMPattern; using RoundEvenOpLowering = - VectorConvertToLLVMPattern; + ConvertFMFMathToLLVMPattern; using RoundOpLowering = - VectorConvertToLLVMPattern; -using SinOpLowering = VectorConvertToLLVMPattern; -using SqrtOpLowering = VectorConvertToLLVMPattern; + ConvertFMFMathToLLVMPattern; +using SinOpLowering = ConvertFMFMathToLLVMPattern; +using SqrtOpLowering = ConvertFMFMathToLLVMPattern; using FTruncOpLowering = - VectorConvertToLLVMPattern; + ConvertFMFMathToLLVMPattern; // A `CtLz/CtTz/absi(a)` is converted into `CtLz/CtTz/absi(a, false)`. template @@ -113,6 +122,8 @@ struct ExpM1OpLowering : public ConvertOpToLLVMPattern { auto resultType = op.getResult().getType(); auto floatType = getElementTypeOrSelf(resultType).cast(); auto floatOne = rewriter.getFloatAttr(floatType, 1.0); + ConvertFastMath expAttrs(op); + ConvertFastMath subAttrs(op); if (!operandType.isa()) { LLVM::ConstantOp one; @@ -123,8 +134,10 @@ struct ExpM1OpLowering : public ConvertOpToLLVMPattern { } else { one = rewriter.create(loc, operandType, floatOne); } - auto exp = rewriter.create(loc, adaptor.getOperand()); - rewriter.replaceOpWithNewOp(op, operandType, exp, one); + auto exp = rewriter.create(loc, adaptor.getOperand(), + expAttrs.getAttrs()); + rewriter.replaceOpWithNewOp( + op, operandType, ValueRange{exp, one}, subAttrs.getAttrs()); return success(); } @@ -142,9 +155,10 @@ struct ExpM1OpLowering : public ConvertOpToLLVMPattern { floatOne); auto one = rewriter.create(loc, llvm1DVectorTy, splatAttr); - auto exp = - rewriter.create(loc, llvm1DVectorTy, operands[0]); - return rewriter.create(loc, llvm1DVectorTy, exp, one); + auto exp = rewriter.create( + loc, llvm1DVectorTy, operands[0], expAttrs.getAttrs()); + return rewriter.create( + loc, llvm1DVectorTy, ValueRange{exp, one}, subAttrs.getAttrs()); }, rewriter); } @@ -166,6 +180,8 @@ struct Log1pOpLowering : public ConvertOpToLLVMPattern { auto resultType = op.getResult().getType(); auto floatType = getElementTypeOrSelf(resultType).cast(); auto floatOne = rewriter.getFloatAttr(floatType, 1.0); + ConvertFastMath addAttrs(op); + ConvertFastMath logAttrs(op); if (!operandType.isa()) { LLVM::ConstantOp one = @@ -176,9 +192,11 @@ struct Log1pOpLowering : public ConvertOpToLLVMPattern { floatOne)) : rewriter.create(loc, operandType, floatOne); - auto add = rewriter.create(loc, operandType, one, - adaptor.getOperand()); - rewriter.replaceOpWithNewOp(op, operandType, add); + auto add = rewriter.create( + loc, operandType, ValueRange{one, adaptor.getOperand()}, + addAttrs.getAttrs()); + rewriter.replaceOpWithNewOp(op, operandType, ValueRange{add}, + logAttrs.getAttrs()); return success(); } @@ -196,9 +214,11 @@ struct Log1pOpLowering : public ConvertOpToLLVMPattern { floatOne); auto one = rewriter.create(loc, llvm1DVectorTy, splatAttr); - auto add = rewriter.create(loc, llvm1DVectorTy, one, - operands[0]); - return rewriter.create(loc, llvm1DVectorTy, add); + auto add = rewriter.create(loc, llvm1DVectorTy, + ValueRange{one, operands[0]}, + addAttrs.getAttrs()); + return rewriter.create( + loc, llvm1DVectorTy, ValueRange{add}, logAttrs.getAttrs()); }, rewriter); } @@ -220,6 +240,8 @@ struct RsqrtOpLowering : public ConvertOpToLLVMPattern { auto resultType = op.getResult().getType(); auto floatType = getElementTypeOrSelf(resultType).cast(); auto floatOne = rewriter.getFloatAttr(floatType, 1.0); + ConvertFastMath sqrtAttrs(op); + ConvertFastMath divAttrs(op); if (!operandType.isa()) { LLVM::ConstantOp one; @@ -230,8 +252,10 @@ struct RsqrtOpLowering : public ConvertOpToLLVMPattern { } else { one = rewriter.create(loc, operandType, floatOne); } - auto sqrt = rewriter.create(loc, adaptor.getOperand()); - rewriter.replaceOpWithNewOp(op, operandType, one, sqrt); + auto sqrt = rewriter.create(loc, adaptor.getOperand(), + sqrtAttrs.getAttrs()); + rewriter.replaceOpWithNewOp( + op, operandType, ValueRange{one, sqrt}, divAttrs.getAttrs()); return success(); } @@ -249,9 +273,10 @@ struct RsqrtOpLowering : public ConvertOpToLLVMPattern { floatOne); auto one = rewriter.create(loc, llvm1DVectorTy, splatAttr); - auto sqrt = - rewriter.create(loc, llvm1DVectorTy, operands[0]); - return rewriter.create(loc, llvm1DVectorTy, one, sqrt); + auto sqrt = rewriter.create( + loc, llvm1DVectorTy, operands[0], sqrtAttrs.getAttrs()); + return rewriter.create( + loc, llvm1DVectorTy, ValueRange{one, sqrt}, divAttrs.getAttrs()); }, rewriter); } diff --git a/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp b/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp index d63b84ccdf856..4685590fa1d32 100644 --- a/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp +++ b/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp @@ -2115,7 +2115,7 @@ class ExtractStridedMetadataOpLowering return failure(); // Create the descriptor. - MemRefDescriptor sourceMemRef(adaptor.getOperands().front()); + MemRefDescriptor sourceMemRef(adaptor.getSource()); Location loc = extractStridedMetadataOp.getLoc(); Value source = extractStridedMetadataOp.getSource(); @@ -2125,7 +2125,13 @@ class ExtractStridedMetadataOpLowering results.reserve(2 + rank * 2); // Base buffer. - results.push_back(sourceMemRef.allocatedPtr(rewriter, loc)); + Value baseBuffer = sourceMemRef.allocatedPtr(rewriter, loc); + Value alignedBuffer = sourceMemRef.alignedPtr(rewriter, loc); + MemRefDescriptor dstMemRef = MemRefDescriptor::fromStaticShape( + rewriter, loc, *getTypeConverter(), + extractStridedMetadataOp.getBaseBuffer().getType().cast(), + baseBuffer, alignedBuffer); + results.push_back((Value)dstMemRef); // Offset. results.push_back(sourceMemRef.offset(rewriter, loc)); diff --git a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp index c4c49f2edd5ff..d9f54b8cb55d7 100644 --- a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp +++ b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp @@ -11,8 +11,10 @@ #include "mlir/Conversion/LLVMCommon/ConversionTarget.h" #include "mlir/Conversion/LLVMCommon/Pattern.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/LLVMIR/NVVMDialect.h" #include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h" +#include "mlir/IR/TypeUtilities.h" #include "mlir/Pass/Pass.h" namespace mlir { @@ -253,6 +255,23 @@ struct MmaLdMatrixOpToNVVM : public ConvertOpToLLVMPattern { } }; +/// Convert the given type into the corresponding PTX type (NVVM::MMATypes +/// enum). +static FailureOr getNvvmMmaType(Type t) { + Type elType = getElementTypeOrSelf(t); + if (elType.isInteger(8)) + return NVVM::MMATypes::s8; + if (elType.isInteger(4)) + return NVVM::MMATypes::s4; + if (elType.isF16()) + return NVVM::MMATypes::f16; + if (elType.isF64()) + return NVVM::MMATypes::f64; + if (elType.isF32()) + return NVVM::MMATypes::tf32; + return failure(); +} + struct MmaSyncOptoNVVM : public ConvertOpToLLVMPattern { using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; @@ -262,53 +281,38 @@ struct MmaSyncOptoNVVM : public ConvertOpToLLVMPattern { Location loc = op->getLoc(); // Get the shapes of the MMAMatrix type being used. The shapes will // choose which intrinsic this op will be lowered to. - auto aType = op.getMatrixA().getType().cast(); - auto cType = op.getMatrixC().getType().cast(); + VectorType aType = op.getMatrixA().getType(); + VectorType bType = op.getMatrixA().getType(); + VectorType cType = op.getMatrixC().getType(); - int64_t m = op.getMmaShape()[0].cast().getInt(); - int64_t n = op.getMmaShape()[1].cast().getInt(); - int64_t k = op.getMmaShape()[2].cast().getInt(); - std::array gemmShape{m, n, k}; + std::array gemmShape = op.getMmaShapeAsArray(); + + // Tensor Cores (mma.sync) on F32 works only with TensorFloat32 (TF32). + bool tf32Enabled = op->hasAttr(op.getTf32EnabledAttrName()); + if (aType.getElementType().isF32() && !tf32Enabled) + return failure(); - NVVM::MMATypes ptxTypeA; - NVVM::MMATypes ptxTypeB; + FailureOr ptxTypeA = getNvvmMmaType(aType); + if (failed(ptxTypeA)) + return op->emitOpError("failed to deduce operand PTX types"); + FailureOr ptxTypeB = getNvvmMmaType(bType); + if (failed(ptxTypeB)) + return op->emitOpError("failed to deduce operand PTX types"); Optional ptxTypeC = NVVM::MmaOp::inferOperandMMAType( cType.getElementType(), /*isAccumulator=*/true); if (!ptxTypeC) return op->emitError( "could not infer the PTX type for the accumulator/result"); - // Tensor Cores (mma.sync) on F32 works only with TensorFloat32 (TF32). - bool tf32Enabled = op->hasAttr(op.getTf32EnabledAttrName()); - if (aType.getElementType().isF32() && !tf32Enabled) - return failure(); - + // TODO: add an attribute to the op to customize this behavior. Optional overflow(llvm::None); - if (aType.getElementType().isInteger(8)) { - ptxTypeA = NVVM::MMATypes::s8; - ptxTypeB = NVVM::MMATypes::s8; + if (aType.getElementType().isa()) overflow = NVVM::MMAIntOverflow::satfinite; - } else if (aType.getElementType().isInteger(4)) { - ptxTypeA = NVVM::MMATypes::s4; - ptxTypeB = NVVM::MMATypes::s4; - overflow = NVVM::MMAIntOverflow::satfinite; - } else if (aType.getElementType().isF16()) { - ptxTypeA = NVVM::MMATypes::f16; - ptxTypeB = NVVM::MMATypes::f16; - } else if (aType.getElementType().isF64()) { - ptxTypeA = NVVM::MMATypes::f64; - ptxTypeB = NVVM::MMATypes::f64; - } else if (aType.getElementType().isF32()) { - ptxTypeA = NVVM::MMATypes::tf32; - ptxTypeB = NVVM::MMATypes::tf32; - } else { - return op->emitError("could not deduce operand PTX types"); - } SmallVector matA = - unpackOperandVector(rewriter, loc, adaptor.getMatrixA(), ptxTypeA); + unpackOperandVector(rewriter, loc, adaptor.getMatrixA(), *ptxTypeA); SmallVector matB = - unpackOperandVector(rewriter, loc, adaptor.getMatrixB(), ptxTypeB); + unpackOperandVector(rewriter, loc, adaptor.getMatrixB(), *ptxTypeB); SmallVector matC = unpackOperandVector(rewriter, loc, adaptor.getMatrixC(), *ptxTypeC); @@ -321,7 +325,7 @@ struct MmaSyncOptoNVVM : public ConvertOpToLLVMPattern { /*b1Op=*/llvm::None, /*intOverflow=*/overflow, /*multiplicandPtxTypes=*/ - std::array{ptxTypeA, ptxTypeB}, + std::array{*ptxTypeA, *ptxTypeB}, /*multiplicandLayouts=*/ std::array{NVVM::MMALayout::row, NVVM::MMALayout::col}); @@ -376,13 +380,182 @@ static void emitCpAsyncOpZfillAsm(Location loc, Value dstPtr, Value srcPtr, SmallVector asmVals{dstPtr, srcPtr, dstBytes, srcBytes}; rewriter.create( - loc, LLVM::LLVMVoidType::get(rewriter.getContext()), /*operands=*/asmVals, + loc, LLVM::LLVMVoidType::get(rewriter.getContext()), + /*operands=*/asmVals, /*asm_string=*/asmStr, /*constraints=*/asmConstraints, /*has_side_effects=*/true, /*is_align_stack=*/false, /*asm_dialect=*/asmDialectAttr, /*operand_attrs=*/ArrayAttr()); } +/// Returns the constraints for the sparse MMA inline assembly instruction. +static std::string buildMmaSparseAsmConstraintString(unsigned matASize, + unsigned matBSize, + unsigned matCSize) { + std::string str; + llvm::raw_string_ostream ss(str); + for (unsigned i = 0; i < matCSize; i++) + ss << "=r,"; + for (unsigned i = 0; i < matASize + matBSize + matCSize; i++) + ss << "r,"; + // The final two operands are for the sparsity metadata and sparsity selector. + ss << "r,r"; + ss.flush(); + return str; +} + +/// Returns the string for the `mma.sp.sync` instruction that corresponds to +/// the give parameters. Note that this function doesn't do any validation, +/// it's expected that the provided parameters correspond to a valid +/// instruction. +static std::string +buildMmaSparseAsmString(const std::array &shape, unsigned matASize, + unsigned matBSize, unsigned matCSize, + NVVM::MMATypes ptxTypeA, NVVM::MMATypes ptxTypeB, + NVVM::MMATypes ptxTypeC, NVVM::MMATypes ptxTypeD, + Optional overflow) { + auto ptxTypeStr = [](NVVM::MMATypes ptxType) { + return NVVM::stringifyMMATypes(ptxType); + }; + + std::string asmStr; + llvm::raw_string_ostream ss(asmStr); + ss << "mma.sp.sync.aligned.m" << shape[0] << "n" << shape[1] << "k" + << shape[2] << ".row.col."; + + if (overflow) + ss << NVVM::stringifyMMAIntOverflow(*overflow) << "."; + + ss << ptxTypeStr(ptxTypeD) << "." << ptxTypeStr(ptxTypeA) << "." + << ptxTypeStr(ptxTypeB) << "." << ptxTypeStr(ptxTypeC) << " "; + unsigned asmArgIdx = 0; + + // The operand string is structured into sections `{matC elements...}, + // {matA elements...}, {matB elements...}, {matC elements}`. + for (const auto arrSize : {matCSize, matASize, matBSize, matCSize}) { + ss << "{"; + for (unsigned i = 0; i < arrSize; i++) + ss << "$" << asmArgIdx++ << (i < arrSize - 1 ? "," : ""); + ss << "},"; + } + ss << "$" << asmArgIdx++ << ",$" << asmArgIdx++ << ";"; + ss.flush(); + return asmStr; +} + +/// Builds an inline assembly operation corresponding to the specified MMA +/// sparse sync operation. +static FailureOr emitMmaSparseSyncOpAsm( + Location loc, NVVM::MMATypes ptxTypeA, NVVM::MMATypes ptxTypeB, + NVVM::MMATypes ptxTypeC, NVVM::MMATypes ptxTypeD, + Optional overflow, ArrayRef unpackedAData, + ArrayRef unpackedB, ArrayRef unpackedC, Value indexData, + int64_t metadataSelector, const std::array &shape, + Type intrinsicResultType, ConversionPatternRewriter &rewriter) { + auto asmDialectAttr = LLVM::AsmDialectAttr::get(rewriter.getContext(), + LLVM::AsmDialect::AD_ATT); + + std::string asmStr = buildMmaSparseAsmString( + shape, unpackedAData.size(), unpackedB.size(), unpackedC.size(), ptxTypeA, + ptxTypeB, ptxTypeC, ptxTypeD, overflow); + std::string constraintStr = buildMmaSparseAsmConstraintString( + unpackedAData.size(), unpackedB.size(), unpackedC.size()); + + Value selectorVal = rewriter.create( + loc, rewriter.getI32Type(), rewriter.getI32IntegerAttr(metadataSelector)); + + SmallVector asmVals; + asmVals.reserve(unpackedAData.size() + unpackedB.size() + unpackedC.size() + + 2); + for (ArrayRef args : {unpackedAData, unpackedB, unpackedC}) + llvm::append_range(asmVals, args); + asmVals.push_back(indexData); + asmVals.push_back(selectorVal); + + return rewriter.create(loc, + /*resultTypes=*/intrinsicResultType, + /*operands=*/asmVals, + /*asm_string=*/asmStr, + /*constraints=*/constraintStr, + /*has_side_effects=*/true, + /*is_align_stack=*/false, + /*asm_dialect=*/asmDialectAttr, + /*operand_attrs=*/ArrayAttr()); +} + +/// Lowers `nvgpu.mma.sp.sync` to inline assembly. +struct NVGPUMmaSparseSyncLowering + : public ConvertOpToLLVMPattern { + using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; + + LogicalResult + matchAndRewrite(nvgpu::MmaSparseSyncOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + Location loc = op->getLoc(); + // Get the shapes of the MMAMatrix type being used. The shapes will + // choose which intrinsic this op will be lowered to. + VectorType aType = op.getMatrixA().getType(); + VectorType bType = op.getMatrixB().getType(); + VectorType cType = op.getMatrixC().getType(); + + FailureOr ptxTypeA = getNvvmMmaType(aType); + if (failed(ptxTypeA)) + return op->emitOpError("failed to deduce operand PTX types"); + FailureOr ptxTypeB = getNvvmMmaType(bType); + if (failed(ptxTypeB)) + return op->emitOpError("failed to deduce operand PTX types"); + Optional ptxTypeC = NVVM::MmaOp::inferOperandMMAType( + cType.getElementType(), /*isAccumulator=*/true); + if (!ptxTypeC) + return op->emitError( + "could not infer the PTX type for the accumulator/result"); + + // Same as `mma.sync`, F32 works only with TensorFloat32 (TF32). + bool tf32Enabled = op->hasAttr(op.getTf32EnabledAttrName()); + if (aType.getElementType().isF32() && !tf32Enabled) + return failure(); + + // TODO: add an attribute to the op to customize this behavior. + Optional overflow(llvm::None); + if (aType.getElementType().isa()) + overflow = NVVM::MMAIntOverflow::satfinite; + + SmallVector matA = + unpackOperandVector(rewriter, loc, adaptor.getMatrixA(), *ptxTypeA); + SmallVector matB = + unpackOperandVector(rewriter, loc, adaptor.getMatrixB(), *ptxTypeB); + SmallVector matC = + unpackOperandVector(rewriter, loc, adaptor.getMatrixC(), *ptxTypeC); + + Type desiredRetTy = typeConverter->convertType(op->getResultTypes()[0]); + Type intrinsicResTy = inferIntrinsicResultType( + typeConverter->convertType(op->getResultTypes()[0])); + + // Bitcast the sparse metadata from vector<2xf16> to an i32. + Value sparseMetadata = adaptor.getSparseMetadata(); + if (sparseMetadata.getType() != + LLVM::getFixedVectorType(rewriter.getI16Type(), 2)) + return op->emitOpError() << "Expected metadata type to be LLVM " + "VectorType of 2 i16 elements"; + sparseMetadata = rewriter.create( + loc, rewriter.getI32Type(), sparseMetadata); + + FailureOr intrinsicResult = emitMmaSparseSyncOpAsm( + loc, *ptxTypeA, *ptxTypeB, *ptxTypeC, *ptxTypeC, overflow, matA, matB, + matC, sparseMetadata, op.getSparsitySelector(), op.getMmaShapeAsArray(), + intrinsicResTy, rewriter); + if (failed(intrinsicResult)) + return failure(); + + assert((*intrinsicResult).getNumResults() == 1 && + "expected inline asm op returns a single LLVM struct type"); + rewriter.replaceOp( + op, convertIntrinsicResult(op.getLoc(), intrinsicResTy, desiredRetTy, + (*intrinsicResult)->getResult(0), rewriter)); + return success(); + } +}; + struct NVGPUAsyncCopyLowering : public ConvertOpToLLVMPattern { using ConvertOpToLLVMPattern< @@ -488,8 +661,8 @@ struct NVGPUAsyncWaitLowering void mlir::populateNVGPUToNVVMConversionPatterns(LLVMTypeConverter &converter, RewritePatternSet &patterns) { patterns.add( - converter); + NVGPUAsyncCreateGroupLowering, NVGPUAsyncWaitLowering, + NVGPUMmaSparseSyncLowering>(converter); } std::unique_ptr mlir::createConvertNVGPUToNVVMPass() { diff --git a/mlir/lib/Conversion/TensorToSPIRV/TensorToSPIRVPass.cpp b/mlir/lib/Conversion/TensorToSPIRV/TensorToSPIRVPass.cpp index 6b1145c464787..313172614268d 100644 --- a/mlir/lib/Conversion/TensorToSPIRV/TensorToSPIRVPass.cpp +++ b/mlir/lib/Conversion/TensorToSPIRV/TensorToSPIRVPass.cpp @@ -38,7 +38,7 @@ class ConvertTensorToSPIRVPass SPIRVConversionTarget::get(targetAttr); SPIRVConversionOptions options; - options.emulateNon32BitScalarTypes = this->emulateNon32BitScalarTypes; + options.emulateLT32BitScalarTypes = this->emulateLT32BitScalarTypes; SPIRVTypeConverter typeConverter(targetAttr, options); RewritePatternSet patterns(context); diff --git a/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp b/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp index 01654fdd6024a..b64b0d88a3e37 100644 --- a/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp +++ b/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp @@ -212,7 +212,7 @@ extractStridedSliceSupportsMMAMatrixType(vector::ExtractStridedSliceOp op) { if (warpMatrixInfo->operandRole == nvgpu::MatMulOperandRole::B) return (op->getResult(0).getType().cast() == (*contractOp).getRhs().getType().cast()); - else if (warpMatrixInfo->operandRole == nvgpu::MatMulOperandRole::C) + if (warpMatrixInfo->operandRole == nvgpu::MatMulOperandRole::C) return (op->getResult(0).getType().cast() == (*contractOp).getAcc().getType().cast()); @@ -768,7 +768,7 @@ convertExtractStridedSlice(vector::ExtractStridedSliceOp op, if (offsets[0] && offsets[1]) return op->emitError() << "Slicing fragments in 2D is not supported. "; - else if (offsets[0]) + if (offsets[0]) sliceOffset[0] = (warpVectorShape[0] / offsets[0]); else if (offsets[1]) sliceOffset[0] = (warpVectorShape[1] / offsets[1]); diff --git a/mlir/lib/Dialect/Arith/IR/ArithOps.cpp b/mlir/lib/Dialect/Arith/IR/ArithOps.cpp index 5693ad1c0e8d1..2c0fc51d08a40 100644 --- a/mlir/lib/Dialect/Arith/IR/ArithOps.cpp +++ b/mlir/lib/Dialect/Arith/IR/ArithOps.cpp @@ -19,35 +19,11 @@ #include "llvm/ADT/APSInt.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/TypeSwitch.h" using namespace mlir; using namespace mlir::arith; -//===----------------------------------------------------------------------===// -// Floating point op parse/print helpers -//===----------------------------------------------------------------------===// -static ParseResult parseArithFastMathAttr(OpAsmParser &parser, - Attribute &attr) { - if (succeeded( - parser.parseOptionalKeyword(FastMathFlagsAttr::getMnemonic()))) { - attr = FastMathFlagsAttr::parse(parser, Type{}); - return success(static_cast(attr)); - } else { - // No fastmath attribute mnemonic present - defer attribute creation and use - // the default value. - return success(); - } -} - -static void printArithFastMathAttr(OpAsmPrinter &printer, Operation *op, - FastMathFlagsAttr fmAttr) { - // Elide printing the fastmath attribute when fastmath=none - if (fmAttr && (fmAttr.getValue() != FastMathFlags::none)) { - printer << " " << FastMathFlagsAttr::getMnemonic(); - fmAttr.print(printer); - } -} - //===----------------------------------------------------------------------===// // Pattern helpers //===----------------------------------------------------------------------===// @@ -1469,6 +1445,16 @@ static Attribute getBoolAttribute(Type type, MLIRContext *ctx, bool value) { return DenseElementsAttr::get(shapedType, boolAttr); } +static Optional getIntegerWidth(Type t) { + if (auto intType = t.dyn_cast()) { + return intType.getWidth(); + } + if (auto vectorIntType = t.dyn_cast()) { + return vectorIntType.getElementType().cast().getWidth(); + } + return llvm::None; +} + OpFoldResult arith::CmpIOp::fold(ArrayRef operands) { assert(operands.size() == 2 && "cmpi takes two operands"); @@ -1481,13 +1467,17 @@ OpFoldResult arith::CmpIOp::fold(ArrayRef operands) { if (matchPattern(getRhs(), m_Zero())) { if (auto extOp = getLhs().getDefiningOp()) { // extsi(%x : i1 -> iN) != 0 -> %x - if (extOp.getOperand().getType().cast().getWidth() == 1 && + Optional integerWidth = + getIntegerWidth(extOp.getOperand().getType()); + if (integerWidth && integerWidth.value() == 1 && getPredicate() == arith::CmpIPredicate::ne) return extOp.getOperand(); } if (auto extOp = getLhs().getDefiningOp()) { // extui(%x : i1 -> iN) != 0 -> %x - if (extOp.getOperand().getType().cast().getWidth() == 1 && + Optional integerWidth = + getIntegerWidth(extOp.getOperand().getType()); + if (integerWidth && integerWidth.value() == 1 && getPredicate() == arith::CmpIPredicate::ne) return extOp.getOperand(); } diff --git a/mlir/lib/Dialect/Async/IR/Async.cpp b/mlir/lib/Dialect/Async/IR/Async.cpp index 4b5d6a1d78fe1..fbbb7357610e5 100644 --- a/mlir/lib/Dialect/Async/IR/Async.cpp +++ b/mlir/lib/Dialect/Async/IR/Async.cpp @@ -8,7 +8,10 @@ #include "mlir/Dialect/Async/IR/Async.h" +#include "mlir/IR/BlockAndValueMapping.h" #include "mlir/IR/DialectImplementation.h" +#include "mlir/IR/FunctionImplementation.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/TypeSwitch.h" using namespace mlir; @@ -320,6 +323,134 @@ LogicalResult AwaitOp::verify() { return success(); } +//===----------------------------------------------------------------------===// +// FuncOp +//===----------------------------------------------------------------------===// + +void FuncOp::build(OpBuilder &builder, OperationState &state, StringRef name, + FunctionType type, ArrayRef attrs, + ArrayRef argAttrs) { + state.addAttribute(SymbolTable::getSymbolAttrName(), + builder.getStringAttr(name)); + state.addAttribute(FunctionOpInterface::getTypeAttrName(), + TypeAttr::get(type)); + + state.attributes.append(attrs.begin(), attrs.end()); + state.addRegion(); + + if (argAttrs.empty()) + return; + assert(type.getNumInputs() == argAttrs.size()); + function_interface_impl::addArgAndResultAttrs(builder, state, argAttrs, + /*resultAttrs=*/llvm::None); +} + +ParseResult FuncOp::parse(OpAsmParser &parser, OperationState &result) { + auto buildFuncType = + [](Builder &builder, ArrayRef argTypes, ArrayRef results, + function_interface_impl::VariadicFlag, + std::string &) { return builder.getFunctionType(argTypes, results); }; + + return function_interface_impl::parseFunctionOp( + parser, result, /*allowVariadic=*/false, buildFuncType); +} + +void FuncOp::print(OpAsmPrinter &p) { + function_interface_impl::printFunctionOp(p, *this, /*isVariadic=*/false); +} + +/// Check that the result type of async.func is not void and must be +/// some async token or async values. +LogicalResult FuncOp::verify() { + auto resultTypes = getResultTypes(); + if (resultTypes.empty()) + return emitOpError() + << "result is expected to be at least of size 1, but got " + << resultTypes.size(); + + for (unsigned i = 0, e = resultTypes.size(); i != e; ++i) { + auto type = resultTypes[i]; + if (!type.isa() && !type.isa()) + return emitOpError() << "result type must be async value type or async " + "token type, but got " + << type; + // We only allow AsyncToken appear as the first return value + if (type.isa() && i != 0) { + return emitOpError() + << " results' (optional) async token type is expected " + "to appear as the 1st return value, but got " + << i + 1; + } + } + + return success(); +} + +//===----------------------------------------------------------------------===// +/// CallOp +//===----------------------------------------------------------------------===// + +LogicalResult CallOp::verifySymbolUses(SymbolTableCollection &symbolTable) { + // Check that the callee attribute was specified. + auto fnAttr = (*this)->getAttrOfType("callee"); + if (!fnAttr) + return emitOpError("requires a 'callee' symbol reference attribute"); + FuncOp fn = symbolTable.lookupNearestSymbolFrom(*this, fnAttr); + if (!fn) + return emitOpError() << "'" << fnAttr.getValue() + << "' does not reference a valid async function"; + + // Verify that the operand and result types match the callee. + auto fnType = fn.getFunctionType(); + if (fnType.getNumInputs() != getNumOperands()) + return emitOpError("incorrect number of operands for callee"); + + for (unsigned i = 0, e = fnType.getNumInputs(); i != e; ++i) + if (getOperand(i).getType() != fnType.getInput(i)) + return emitOpError("operand type mismatch: expected operand type ") + << fnType.getInput(i) << ", but provided " + << getOperand(i).getType() << " for operand number " << i; + + if (fnType.getNumResults() != getNumResults()) + return emitOpError("incorrect number of results for callee"); + + for (unsigned i = 0, e = fnType.getNumResults(); i != e; ++i) + if (getResult(i).getType() != fnType.getResult(i)) { + auto diag = emitOpError("result type mismatch at index ") << i; + diag.attachNote() << " op result types: " << getResultTypes(); + diag.attachNote() << "function result types: " << fnType.getResults(); + return diag; + } + + return success(); +} + +FunctionType CallOp::getCalleeType() { + return FunctionType::get(getContext(), getOperandTypes(), getResultTypes()); +} + +//===----------------------------------------------------------------------===// +/// ReturnOp +//===----------------------------------------------------------------------===// + +LogicalResult ReturnOp::verify() { + auto funcOp = (*this)->getParentOfType(); + ArrayRef resultTypes = funcOp.isStateful() + ? funcOp.getResultTypes().drop_front() + : funcOp.getResultTypes(); + // Get the underlying value types from async types returned from the + // parent `async.func` operation. + auto types = llvm::map_range(resultTypes, [](const Type &result) { + return result.cast().getValueType(); + }); + + if (getOperandTypes() != types) + return emitOpError("operand types do not match the types returned from " + "the parent FuncOp"); + + return success(); +} + //===----------------------------------------------------------------------===// // TableGen'd op method definitions //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Async/Transforms/AsyncToAsyncRuntime.cpp b/mlir/lib/Dialect/Async/Transforms/AsyncToAsyncRuntime.cpp index b4880c0e3b3f5..66c5b731b6e76 100644 --- a/mlir/lib/Dialect/Async/Transforms/AsyncToAsyncRuntime.cpp +++ b/mlir/lib/Dialect/Async/Transforms/AsyncToAsyncRuntime.cpp @@ -51,10 +51,6 @@ class AsyncToAsyncRuntimePass } // namespace -//===----------------------------------------------------------------------===// -// async.execute op outlining to the coroutine functions. -//===----------------------------------------------------------------------===// - /// Function targeted for coroutine transformation has two additional blocks at /// the end: coroutine cleanup and coroutine suspension. /// @@ -64,6 +60,12 @@ namespace { struct CoroMachinery { func::FuncOp func; + // Async function returns an optional token, followed by some async values + // + // async.func @foo() -> !async.value { + // %cst = arith.constant 42.0 : T + // return %cst: T + // } // Async execute region returns a completion token, and an async value for // each yielded value. // @@ -71,12 +73,12 @@ struct CoroMachinery { // %0 = arith.constant ... : T // async.yield %0 : T // } - Value asyncToken; // token representing completion of the async region + Optional asyncToken; // returned completion token llvm::SmallVector returnValues; // returned async values Value coroHandle; // coroutine handle (!async.coro.getHandle value) Block *entry; // coroutine entry block - Block *setError; // switch completion token and all values to error state + Optional setError; // set returned values to error state Block *cleanup; // coroutine cleanup block Block *suspend; // coroutine suspension block }; @@ -87,13 +89,9 @@ struct CoroMachinery { /// `async.runtime.*` and `async.coro.*` operations. Adds a new entry block /// that branches into preexisting entry block. Also inserts trailing blocks. /// -/// The result types of the passed `func` must start with an `async.token` +/// The result types of the passed `func` start with an optional `async.token` /// and be continued with some number of `async.value`s. /// -/// The func given to this function needs to have been preprocessed to have -/// either branch or yield ops as terminators. Branches to the cleanup block are -/// inserted after each yield. -/// /// See LLVM coroutines documentation: https://llvm.org/docs/Coroutines.html /// /// - `entry` block sets up the coroutine. @@ -110,7 +108,7 @@ struct CoroMachinery { /// ^entry(): /// %token = : !async.token // create async runtime token /// %value = : !async.value // create async value -/// %id = async.coro.getId // create a coroutine id +/// %id = async.coro.getId // create a coroutine id /// %hdl = async.coro.begin %id // create a coroutine handle /// cf.br ^preexisting_entry_block /// @@ -142,11 +140,20 @@ static CoroMachinery setupCoroMachinery(func::FuncOp func) { // ------------------------------------------------------------------------ // // Allocate async token/values that we will return from a ramp function. // ------------------------------------------------------------------------ // - auto retToken = - builder.create(TokenType::get(ctx)).getResult(); + + // We treat TokenType as state update marker to represent side-effects of + // async computations + bool isStateful = func.getCallableResults().front().isa(); + + Optional retToken; + if (isStateful) + retToken.emplace(builder.create(TokenType::get(ctx))); llvm::SmallVector retValues; - for (auto resType : func.getCallableResults().drop_front()) + ArrayRef resValueTypes = isStateful + ? func.getCallableResults().drop_front() + : func.getCallableResults(); + for (auto resType : resValueTypes) retValues.emplace_back( builder.create(resType).getResult()); @@ -179,26 +186,17 @@ static CoroMachinery setupCoroMachinery(func::FuncOp func) { // Mark the end of a coroutine: async.coro.end builder.create(coroHdlOp.getHandle()); - // Return created `async.token` and `async.values` from the suspend block. - // This will be the return value of a coroutine ramp function. - SmallVector ret{retToken}; + // Return created optional `async.token` and `async.values` from the suspend + // block. This will be the return value of a coroutine ramp function. + SmallVector ret; + if (retToken) + ret.push_back(*retToken); ret.insert(ret.end(), retValues.begin(), retValues.end()); builder.create(ret); // `async.await` op lowering will create resume blocks for async // continuations, and will conditionally branch to cleanup or suspend blocks. - for (Block &block : func.getBody().getBlocks()) { - if (&block == entryBlock || &block == cleanupBlock || - &block == suspendBlock) - continue; - Operation *terminator = block.getTerminator(); - if (auto yield = dyn_cast(terminator)) { - builder.setInsertionPointToEnd(&block); - builder.create(cleanupBlock); - } - } - // The switch-resumed API based coroutine should be marked with // coroutine.presplit attribute to mark the function as a coroutine. func->setAttr("passthrough", builder.getArrayAttr( @@ -210,7 +208,7 @@ static CoroMachinery setupCoroMachinery(func::FuncOp func) { machinery.returnValues = retValues; machinery.coroHandle = coroHdlOp.getHandle(); machinery.entry = entryBlock; - machinery.setError = nullptr; // created lazily only if needed + machinery.setError = None; // created lazily only if needed machinery.cleanup = cleanupBlock; machinery.suspend = suspendBlock; return machinery; @@ -220,25 +218,31 @@ static CoroMachinery setupCoroMachinery(func::FuncOp func) { // runtime operations (see for example lowering of assert operation). static Block *setupSetErrorBlock(CoroMachinery &coro) { if (coro.setError) - return coro.setError; + return *coro.setError; coro.setError = coro.func.addBlock(); - coro.setError->moveBefore(coro.cleanup); + (*coro.setError)->moveBefore(coro.cleanup); auto builder = - ImplicitLocOpBuilder::atBlockBegin(coro.func->getLoc(), coro.setError); + ImplicitLocOpBuilder::atBlockBegin(coro.func->getLoc(), *coro.setError); // Coroutine set_error block: set error on token and all returned values. - builder.create(coro.asyncToken); + if (coro.asyncToken) + builder.create(*coro.asyncToken); + for (Value retValue : coro.returnValues) builder.create(retValue); // Branch into the cleanup block. builder.create(coro.cleanup); - return coro.setError; + return *coro.setError; } +//===----------------------------------------------------------------------===// +// async.execute op outlining to the coroutine functions. +//===----------------------------------------------------------------------===// + /// Outline the body region attached to the `async.execute` op into a standalone /// function. /// @@ -382,6 +386,118 @@ class AddToGroupOpLowering : public OpConversionPattern { }; } // namespace +//===----------------------------------------------------------------------===// +// Convert async.func, async.return and async.call operations to non-blocking +// operations based on llvm coroutine +//===----------------------------------------------------------------------===// + +namespace { + +//===----------------------------------------------------------------------===// +// Convert async.func operation to func.func +//===----------------------------------------------------------------------===// + +class AsyncFuncOpLowering : public OpConversionPattern { +public: + AsyncFuncOpLowering(MLIRContext *ctx, + llvm::DenseMap &coros) + : OpConversionPattern(ctx), coros(coros) {} + + LogicalResult + matchAndRewrite(async::FuncOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + Location loc = op->getLoc(); + + auto newFuncOp = + rewriter.create(loc, op.getName(), op.getFunctionType()); + + SymbolTable::setSymbolVisibility(newFuncOp, + SymbolTable::getSymbolVisibility(op)); + // Copy over all attributes other than the name. + for (const auto &namedAttr : op->getAttrs()) { + if (namedAttr.getName() != SymbolTable::getSymbolAttrName()) + newFuncOp->setAttr(namedAttr.getName(), namedAttr.getValue()); + } + + rewriter.inlineRegionBefore(op.getBody(), newFuncOp.getBody(), + newFuncOp.end()); + + CoroMachinery coro = setupCoroMachinery(newFuncOp); + coros[newFuncOp] = coro; + // no initial suspend, we should hot-start + + rewriter.eraseOp(op); + return success(); + } + +private: + llvm::DenseMap &coros; +}; + +//===----------------------------------------------------------------------===// +// Convert async.call operation to func.call +//===----------------------------------------------------------------------===// + +class AsyncCallOpLowering : public OpConversionPattern { +public: + AsyncCallOpLowering(MLIRContext *ctx) + : OpConversionPattern(ctx) {} + + LogicalResult + matchAndRewrite(async::CallOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + rewriter.replaceOpWithNewOp( + op, op.getCallee(), op.getResultTypes(), op.getOperands()); + return success(); + } +}; + +//===----------------------------------------------------------------------===// +// Convert async.return operation to async.runtime operations. +//===----------------------------------------------------------------------===// + +class AsyncReturnOpLowering : public OpConversionPattern { +public: + AsyncReturnOpLowering(MLIRContext *ctx, + llvm::DenseMap &coros) + : OpConversionPattern(ctx), coros(coros) {} + + LogicalResult + matchAndRewrite(async::ReturnOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + auto func = op->template getParentOfType(); + auto funcCoro = coros.find(func); + if (funcCoro == coros.end()) + return rewriter.notifyMatchFailure( + op, "operation is not inside the async coroutine function"); + + Location loc = op->getLoc(); + const CoroMachinery &coro = funcCoro->getSecond(); + rewriter.setInsertionPointAfter(op); + + // Store return values into the async values storage and switch async + // values state to available. + for (auto tuple : llvm::zip(adaptor.getOperands(), coro.returnValues)) { + Value returnValue = std::get<0>(tuple); + Value asyncValue = std::get<1>(tuple); + rewriter.create(loc, returnValue, asyncValue); + rewriter.create(loc, asyncValue); + } + + if (coro.asyncToken) + // Switch the coroutine completion token to available state. + rewriter.create(loc, *coro.asyncToken); + + rewriter.eraseOp(op); + rewriter.create(loc, coro.cleanup); + return success(); + } + +private: + llvm::DenseMap &coros; +}; +} // namespace + //===----------------------------------------------------------------------===// // Convert async.await and async.await_all operations to the async.runtime.await // or async.runtime.await_and_resume operations. @@ -393,11 +509,9 @@ class AwaitOpLoweringBase : public OpConversionPattern { using AwaitAdaptor = typename AwaitType::Adaptor; public: - AwaitOpLoweringBase( - MLIRContext *ctx, - llvm::DenseMap &outlinedFunctions) - : OpConversionPattern(ctx), - outlinedFunctions(outlinedFunctions) {} + AwaitOpLoweringBase(MLIRContext *ctx, + llvm::DenseMap &coros) + : OpConversionPattern(ctx), coros(coros) {} LogicalResult matchAndRewrite(AwaitType op, typename AwaitType::Adaptor adaptor, @@ -409,8 +523,8 @@ class AwaitOpLoweringBase : public OpConversionPattern { // Check if await operation is inside the outlined coroutine function. auto func = op->template getParentOfType(); - auto outlined = outlinedFunctions.find(func); - const bool isInCoroutine = outlined != outlinedFunctions.end(); + auto funcCoro = coros.find(func); + const bool isInCoroutine = funcCoro != coros.end(); Location loc = op->getLoc(); Value operand = adaptor.getOperand(); @@ -436,7 +550,7 @@ class AwaitOpLoweringBase : public OpConversionPattern { // Inside the coroutine we convert await operation into coroutine suspension // point, and resume execution asynchronously. if (isInCoroutine) { - CoroMachinery &coro = outlined->getSecond(); + CoroMachinery &coro = funcCoro->getSecond(); Block *suspended = op->getBlock(); ImplicitLocOpBuilder builder(loc, op, rewriter.getListener()); @@ -488,7 +602,7 @@ class AwaitOpLoweringBase : public OpConversionPattern { } private: - llvm::DenseMap &outlinedFunctions; + llvm::DenseMap &coros; }; /// Lowering for `async.await` with a token operand. @@ -531,24 +645,22 @@ class AwaitAllOpLowering : public AwaitOpLoweringBase { class YieldOpLowering : public OpConversionPattern { public: - YieldOpLowering( - MLIRContext *ctx, - const llvm::DenseMap &outlinedFunctions) - : OpConversionPattern(ctx), - outlinedFunctions(outlinedFunctions) {} + YieldOpLowering(MLIRContext *ctx, + const llvm::DenseMap &coros) + : OpConversionPattern(ctx), coros(coros) {} LogicalResult matchAndRewrite(async::YieldOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { // Check if yield operation is inside the async coroutine function. auto func = op->template getParentOfType(); - auto outlined = outlinedFunctions.find(func); - if (outlined == outlinedFunctions.end()) + auto funcCoro = coros.find(func); + if (funcCoro == coros.end()) return rewriter.notifyMatchFailure( op, "operation is not inside the async coroutine function"); Location loc = op->getLoc(); - const CoroMachinery &coro = outlined->getSecond(); + const CoroMachinery &coro = funcCoro->getSecond(); // Store yielded values into the async values storage and switch async // values state to available. @@ -559,14 +671,18 @@ class YieldOpLowering : public OpConversionPattern { rewriter.create(loc, asyncValue); } - // Switch the coroutine completion token to available state. - rewriter.replaceOpWithNewOp(op, coro.asyncToken); + if (coro.asyncToken) + // Switch the coroutine completion token to available state. + rewriter.create(loc, *coro.asyncToken); + + rewriter.eraseOp(op); + rewriter.create(loc, coro.cleanup); return success(); } private: - const llvm::DenseMap &outlinedFunctions; + const llvm::DenseMap &coros; }; //===----------------------------------------------------------------------===// @@ -575,24 +691,22 @@ class YieldOpLowering : public OpConversionPattern { class AssertOpLowering : public OpConversionPattern { public: - AssertOpLowering( - MLIRContext *ctx, - llvm::DenseMap &outlinedFunctions) - : OpConversionPattern(ctx), - outlinedFunctions(outlinedFunctions) {} + AssertOpLowering(MLIRContext *ctx, + llvm::DenseMap &coros) + : OpConversionPattern(ctx), coros(coros) {} LogicalResult matchAndRewrite(cf::AssertOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { // Check if assert operation is inside the async coroutine function. auto func = op->template getParentOfType(); - auto outlined = outlinedFunctions.find(func); - if (outlined == outlinedFunctions.end()) + auto funcCoro = coros.find(func); + if (funcCoro == coros.end()) return rewriter.notifyMatchFailure( op, "operation is not inside the async coroutine function"); Location loc = op->getLoc(); - CoroMachinery &coro = outlined->getSecond(); + CoroMachinery &coro = funcCoro->getSecond(); Block *cont = rewriter.splitBlock(op->getBlock(), Block::iterator(op)); rewriter.setInsertionPointToEnd(cont->getPrevNode()); @@ -607,169 +721,33 @@ class AssertOpLowering : public OpConversionPattern { } private: - llvm::DenseMap &outlinedFunctions; + llvm::DenseMap &coros; }; -//===----------------------------------------------------------------------===// - -/// Rewrite a func as a coroutine by: -/// 1) Wrapping the results into `async.value`. -/// 2) Prepending the results with `async.token`. -/// 3) Setting up coroutine blocks. -/// 4) Rewriting return ops as yield op and branch op into the suspend block. -static CoroMachinery rewriteFuncAsCoroutine(func::FuncOp func) { - auto *ctx = func->getContext(); - auto loc = func.getLoc(); - SmallVector resultTypes; - resultTypes.reserve(func.getCallableResults().size()); - llvm::transform(func.getCallableResults(), std::back_inserter(resultTypes), - [](Type type) { return ValueType::get(type); }); - func.setType( - FunctionType::get(ctx, func.getFunctionType().getInputs(), resultTypes)); - func.insertResult(0, TokenType::get(ctx), {}); - for (Block &block : func.getBlocks()) { - Operation *terminator = block.getTerminator(); - if (auto returnOp = dyn_cast(*terminator)) { - ImplicitLocOpBuilder builder(loc, returnOp); - builder.create(returnOp.getOperands()); - returnOp.erase(); - } - } - return setupCoroMachinery(func); -} - -/// Rewrites a call into a function that has been rewritten as a coroutine. -/// -/// The invocation of this function is safe only when call ops are traversed in -/// reverse order of how they appear in a single block. See `funcsToCoroutines`. -static void rewriteCallsiteForCoroutine(func::CallOp oldCall, - func::FuncOp func) { - auto loc = func.getLoc(); - ImplicitLocOpBuilder callBuilder(loc, oldCall); - auto newCall = callBuilder.create( - func.getName(), func.getCallableResults(), oldCall.getArgOperands()); - - // Await on the async token and all the value results and unwrap the latter. - callBuilder.create(loc, newCall.getResults().front()); - SmallVector unwrappedResults; - unwrappedResults.reserve(newCall->getResults().size() - 1); - for (Value result : newCall.getResults().drop_front()) - unwrappedResults.push_back( - callBuilder.create(loc, result).getResult()); - // Careful, when result of a call is piped into another call this could lead - // to a dangling pointer. - oldCall.replaceAllUsesWith(unwrappedResults); - oldCall.erase(); -} - -static bool isAllowedToBlock(func::FuncOp func) { - return !!func->getAttrOfType(AsyncDialect::kAllowedToBlockAttrName); -} - -static LogicalResult funcsToCoroutines( - ModuleOp module, - llvm::DenseMap &outlinedFunctions) { - // The following code supports the general case when 2 functions mutually - // recurse into each other. Because of this and that we are relying on - // SymbolUserMap to find pointers to calling FuncOps, we cannot simply erase - // a FuncOp while inserting an equivalent coroutine, because that could lead - // to dangling pointers. - - SmallVector funcWorklist; - - // Careful, it's okay to add a func to the worklist multiple times if and only - // if the loop processing the worklist will skip the functions that have - // already been converted to coroutines. - auto addToWorklist = [&](func::FuncOp func) { - if (isAllowedToBlock(func)) - return; - // N.B. To refactor this code into a separate pass the lookup in - // outlinedFunctions is the most obvious obstacle. Looking at an arbitrary - // func and recognizing if it has a coroutine structure is messy. Passing - // this dict between the passes is ugly. - if (isAllowedToBlock(func) || - outlinedFunctions.find(func) == outlinedFunctions.end()) { - for (Operation &op : func.getBody().getOps()) { - if (isa(op)) { - funcWorklist.push_back(func); - break; - } - } - } - }; - - // Traverse in post-order collecting for each func op the await ops it has. - for (func::FuncOp func : module.getOps()) - addToWorklist(func); - - SymbolTableCollection symbolTable; - SymbolUserMap symbolUserMap(symbolTable, module); - - // Rewrite funcs, while updating call sites and adding them to the worklist. - while (!funcWorklist.empty()) { - auto func = funcWorklist.pop_back_val(); - auto insertion = outlinedFunctions.insert({func, CoroMachinery{}}); - if (!insertion.second) - // This function has already been processed because this is either - // the corecursive case, or a caller with multiple calls to a newly - // created corouting. Either way, skip updating the call sites. - continue; - insertion.first->second = rewriteFuncAsCoroutine(func); - SmallVector users(symbolUserMap.getUsers(func).begin(), - symbolUserMap.getUsers(func).end()); - // If there are multiple calls from the same block they need to be traversed - // in reverse order so that symbolUserMap references are not invalidated - // when updating the users of the call op which is earlier in the block. - llvm::sort(users, [](Operation *a, Operation *b) { - Block *blockA = a->getBlock(); - Block *blockB = b->getBlock(); - // Impose arbitrary order on blocks so that there is a well-defined order. - return blockA > blockB || (blockA == blockB && !a->isBeforeInBlock(b)); - }); - // Rewrite the callsites to await on results of the newly created coroutine. - for (Operation *op : users) { - if (func::CallOp call = dyn_cast(*op)) { - func::FuncOp caller = call->getParentOfType(); - rewriteCallsiteForCoroutine(call, func); // Careful, erases the call op. - addToWorklist(caller); - } else { - op->emitError("Unexpected reference to func referenced by symbol"); - return failure(); - } - } - } - return success(); -} - //===----------------------------------------------------------------------===// void AsyncToAsyncRuntimePass::runOnOperation() { ModuleOp module = getOperation(); SymbolTable symbolTable(module); - // Outline all `async.execute` body regions into async functions (coroutines). - llvm::DenseMap outlinedFunctions; + // Functions with coroutine CFG setups, which are results of outlining + // `async.execute` body regions and converting async.func. + llvm::DenseMap coros; module.walk([&](ExecuteOp execute) { - outlinedFunctions.insert(outlineExecuteOp(symbolTable, execute)); + coros.insert(outlineExecuteOp(symbolTable, execute)); }); LLVM_DEBUG({ - llvm::dbgs() << "Outlined " << outlinedFunctions.size() + llvm::dbgs() << "Outlined " << coros.size() << " functions built from async.execute operations\n"; }); // Returns true if operation is inside the coroutine. auto isInCoroutine = [&](Operation *op) -> bool { auto parentFunc = op->getParentOfType(); - return outlinedFunctions.find(parentFunc) != outlinedFunctions.end(); + return coros.find(parentFunc) != coros.end(); }; - if (eliminateBlockingAwaitOps && - failed(funcsToCoroutines(module, outlinedFunctions))) { - signalPassFailure(); - return; - } - // Lower async operations to async.runtime operations. MLIRContext *ctx = module->getContext(); RewritePatternSet asyncPatterns(ctx); @@ -783,18 +761,23 @@ void AsyncToAsyncRuntimePass::runOnOperation() { // Async lowering does not use type converter because it must preserve all // types for async.runtime operations. asyncPatterns.add(ctx); + + // Lower async.func to func.func with coroutine cfg. + asyncPatterns.add(ctx); + asyncPatterns.add(ctx, coros); + asyncPatterns.add(ctx, - outlinedFunctions); + AwaitAllOpLowering, YieldOpLowering>(ctx, coros); // Lower assertions to conditional branches into error blocks. - asyncPatterns.add(ctx, outlinedFunctions); + asyncPatterns.add(ctx, coros); // All high level async operations must be lowered to the runtime operations. ConversionTarget runtimeTarget(*ctx); - runtimeTarget.addLegalDialect(); + runtimeTarget.addLegalDialect(); runtimeTarget.addIllegalOp(); - runtimeTarget.addIllegalOp(); + runtimeTarget.addIllegalOp(); // Decide if structured control flow has to be lowered to branch-based CFG. runtimeTarget.addDynamicallyLegalDialect([&](Operation *op) { @@ -812,15 +795,9 @@ void AsyncToAsyncRuntimePass::runOnOperation() { runtimeTarget.addDynamicallyLegalOp( [&](cf::AssertOp op) -> bool { auto func = op->getParentOfType(); - return outlinedFunctions.find(func) == outlinedFunctions.end(); + return coros.find(func) == coros.end(); }); - if (eliminateBlockingAwaitOps) - runtimeTarget.addDynamicallyLegalOp( - [&](RuntimeAwaitOp op) -> bool { - return isAllowedToBlock(op->getParentOfType()); - }); - if (failed(applyPartialConversion(module, runtimeTarget, std::move(asyncPatterns)))) { signalPassFailure(); diff --git a/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp b/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp index 996e7b729c373..bff3b664ede55 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp @@ -119,9 +119,21 @@ static void updateReturnOps(func::FuncOp func, // Updates all CallOps in the scope of the given ModuleOp by allocating // temporary buffers for newly introduced out params. -static LogicalResult updateCalls(ModuleOp module) { +static LogicalResult +updateCalls(ModuleOp module, + const bufferization::BufferResultsToOutParamsOptions &options) { bool didFail = false; + SymbolTable symtab(module); module.walk([&](func::CallOp op) { + auto callee = symtab.lookup(op.getCallee()); + if (!callee) { + op.emitError() << "cannot find callee '" << op.getCallee() << "' in " + << "symbol table"; + didFail = true; + return; + } + if (!options.filterFn(&callee)) + return; SmallVector replaceWithNewCallResults; SmallVector replaceWithOutParams; for (OpResult result : op.getResults()) { @@ -169,9 +181,12 @@ static LogicalResult updateCalls(ModuleOp module) { return failure(didFail); } -LogicalResult -mlir::bufferization::promoteBufferResultsToOutParams(ModuleOp module) { +LogicalResult mlir::bufferization::promoteBufferResultsToOutParams( + ModuleOp module, + const bufferization::BufferResultsToOutParamsOptions &options) { for (auto func : module.getOps()) { + if (!options.filterFn(&func)) + continue; SmallVector appendedEntryArgs; if (failed(updateFuncOp(func, appendedEntryArgs))) return failure(); @@ -179,7 +194,7 @@ mlir::bufferization::promoteBufferResultsToOutParams(ModuleOp module) { continue; updateReturnOps(func, appendedEntryArgs); } - if (failed(updateCalls(module))) + if (failed(updateCalls(module, options))) return failure(); return success(); } @@ -188,14 +203,22 @@ namespace { struct BufferResultsToOutParamsPass : bufferization::impl::BufferResultsToOutParamsBase< BufferResultsToOutParamsPass> { + explicit BufferResultsToOutParamsPass( + const bufferization::BufferResultsToOutParamsOptions &options) + : options(options) {} + void runOnOperation() override { - if (failed(bufferization::promoteBufferResultsToOutParams(getOperation()))) + if (failed(bufferization::promoteBufferResultsToOutParams(getOperation(), + options))) return signalPassFailure(); } + +private: + bufferization::BufferResultsToOutParamsOptions options; }; } // namespace -std::unique_ptr -mlir::bufferization::createBufferResultsToOutParamsPass() { - return std::make_unique(); +std::unique_ptr mlir::bufferization::createBufferResultsToOutParamsPass( + const bufferization::BufferResultsToOutParamsOptions &options) { + return std::make_unique(options); } diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp index 3eee6081e7eef..2c30425f72458 100644 --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -311,13 +311,10 @@ static void printAsyncDependencies(OpAsmPrinter &printer, Operation *op, static bool verifyReduceOpAndType(gpu::AllReduceOperation opName, Type resType) { - if ((opName == gpu::AllReduceOperation::AND || - opName == gpu::AllReduceOperation::OR || - opName == gpu::AllReduceOperation::XOR) && - !resType.isa()) - return false; - - return true; + return !((opName == gpu::AllReduceOperation::AND || + opName == gpu::AllReduceOperation::OR || + opName == gpu::AllReduceOperation::XOR) && + !resType.isa()); } LogicalResult gpu::AllReduceOp::verifyRegions() { diff --git a/mlir/lib/Dialect/Index/IR/IndexOps.cpp b/mlir/lib/Dialect/Index/IR/IndexOps.cpp index fcbb076f2e16f..2178a758b3dc2 100644 --- a/mlir/lib/Dialect/Index/IR/IndexOps.cpp +++ b/mlir/lib/Dialect/Index/IR/IndexOps.cpp @@ -62,17 +62,19 @@ Operation *IndexDialect::materializeConstant(OpBuilder &b, Attribute value, /// the integer result, which in turn must satisfy the above property. static OpFoldResult foldBinaryOpUnchecked( ArrayRef operands, - function_ref calculate) { + function_ref(const APInt &, const APInt &)> calculate) { assert(operands.size() == 2 && "binary operation expected 2 operands"); auto lhs = dyn_cast_if_present(operands[0]); auto rhs = dyn_cast_if_present(operands[1]); if (!lhs || !rhs) return {}; - APInt result = calculate(lhs.getValue(), rhs.getValue()); - assert(result.trunc(32) == + Optional result = calculate(lhs.getValue(), rhs.getValue()); + if (!result) + return {}; + assert(result->trunc(32) == calculate(lhs.getValue().trunc(32), rhs.getValue().trunc(32))); - return IntegerAttr::get(IndexType::get(lhs.getContext()), std::move(result)); + return IntegerAttr::get(IndexType::get(lhs.getContext()), *result); } /// Fold an index operation only if the truncated 64-bit result matches the @@ -105,8 +107,7 @@ static OpFoldResult foldBinaryOpChecked( if (result64->trunc(32) != *result32) return {}; // The operation can be folded for these particular operands. - return IntegerAttr::get(IndexType::get(lhs.getContext()), - std::move(*result64)); + return IntegerAttr::get(IndexType::get(lhs.getContext()), *result64); } //===----------------------------------------------------------------------===// @@ -284,6 +285,50 @@ OpFoldResult MaxUOp::fold(ArrayRef operands) { }); } +//===----------------------------------------------------------------------===// +// ShlOp +//===----------------------------------------------------------------------===// + +OpFoldResult ShlOp::fold(ArrayRef operands) { + return foldBinaryOpUnchecked( + operands, [](const APInt &lhs, const APInt &rhs) -> Optional { + // We cannot fold if the RHS is greater than or equal to 32 because + // this would be UB in 32-bit systems but not on 64-bit systems. RHS is + // already treated as unsigned. + if (rhs.uge(32)) + return {}; + return lhs << rhs; + }); +} + +//===----------------------------------------------------------------------===// +// ShrSOp +//===----------------------------------------------------------------------===// + +OpFoldResult ShrSOp::fold(ArrayRef operands) { + return foldBinaryOpChecked( + operands, [](const APInt &lhs, const APInt &rhs) -> Optional { + // Don't fold if RHS is greater than or equal to 32. + if (rhs.uge(32)) + return {}; + return lhs.ashr(rhs); + }); +} + +//===----------------------------------------------------------------------===// +// ShrUOp +//===----------------------------------------------------------------------===// + +OpFoldResult ShrUOp::fold(ArrayRef operands) { + return foldBinaryOpChecked( + operands, [](const APInt &lhs, const APInt &rhs) -> Optional { + // Don't fold if RHS is greater than or equal to 32. + if (rhs.uge(32)) + return {}; + return lhs.lshr(rhs); + }); +} + //===----------------------------------------------------------------------===// // CastSOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/LLVMIR/CMakeLists.txt b/mlir/lib/Dialect/LLVMIR/CMakeLists.txt index b3f13edb07eee..9515b2c9990af 100644 --- a/mlir/lib/Dialect/LLVMIR/CMakeLists.txt +++ b/mlir/lib/Dialect/LLVMIR/CMakeLists.txt @@ -4,7 +4,6 @@ add_mlir_dialect_library(MLIRLLVMDialect IR/FunctionCallUtils.cpp IR/LLVMAttrs.cpp IR/LLVMDialect.cpp - IR/LLVMIntrinsicOps.cpp IR/LLVMTypes.cpp IR/LLVMTypeSyntax.cpp diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp index 7d0c2297736b2..74cce2a9ae40a 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMAttrs.cpp @@ -44,7 +44,8 @@ bool DINodeAttr::classof(Attribute attr) { return llvm::isa(attr); + DISubprogramAttr, DISubrangeAttr, DISubroutineTypeAttr>( + attr); } //===----------------------------------------------------------------------===// @@ -61,170 +62,8 @@ bool DIScopeAttr::classof(Attribute attr) { //===----------------------------------------------------------------------===// bool DITypeAttr::classof(Attribute attr) { - return llvm::isa(attr); -} - -//===----------------------------------------------------------------------===// -// DICompileUnitAttr -//===----------------------------------------------------------------------===// - -void DICompileUnitAttr::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkAttrsFn(getFile()); - walkAttrsFn(getProducer()); -} - -Attribute -DICompileUnitAttr::replaceImmediateSubElements(ArrayRef replAttrs, - ArrayRef replTypes) const { - return get(getContext(), getSourceLanguage(), replAttrs[0].cast(), - replAttrs[1].cast(), getIsOptimized(), - getEmissionKind()); -} - -//===----------------------------------------------------------------------===// -// DICompositeTypeAttr -//===----------------------------------------------------------------------===// - -void DICompositeTypeAttr::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkAttrsFn(getName()); - walkAttrsFn(getFile()); - walkAttrsFn(getScope()); - for (DINodeAttr element : getElements()) - walkAttrsFn(element); -} - -Attribute DICompositeTypeAttr::replaceImmediateSubElements( - ArrayRef replAttrs, ArrayRef replTypes) const { - ArrayRef elements = replAttrs.drop_front(3); - return get( - getContext(), getTag(), replAttrs[0].cast(), - cast_or_null(replAttrs[1]), getLine(), - cast_or_null(replAttrs[2]), getSizeInBits(), - getAlignInBits(), - ArrayRef(static_cast(elements.data()), - elements.size())); -} - -//===----------------------------------------------------------------------===// -// DIDerivedTypeAttr -//===----------------------------------------------------------------------===// - -void DIDerivedTypeAttr::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkAttrsFn(getName()); - walkAttrsFn(getBaseType()); -} - -Attribute -DIDerivedTypeAttr::replaceImmediateSubElements(ArrayRef replAttrs, - ArrayRef replTypes) const { - return get(getContext(), getTag(), replAttrs[0].cast(), - replAttrs[1].cast(), getSizeInBits(), getAlignInBits(), - getOffsetInBits()); -} - -//===----------------------------------------------------------------------===// -// DILexicalBlockAttr -//===----------------------------------------------------------------------===// - -void DILexicalBlockAttr::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkAttrsFn(getScope()); - walkAttrsFn(getFile()); -} - -Attribute DILexicalBlockAttr::replaceImmediateSubElements( - ArrayRef replAttrs, ArrayRef replTypes) const { - return get(replAttrs[0].cast(), replAttrs[1].cast(), - getLine(), getColumn()); -} - -//===----------------------------------------------------------------------===// -// DILexicalBlockFileAttr -//===----------------------------------------------------------------------===// - -void DILexicalBlockFileAttr::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkAttrsFn(getScope()); - walkAttrsFn(getFile()); -} - -Attribute DILexicalBlockFileAttr::replaceImmediateSubElements( - ArrayRef replAttrs, ArrayRef replTypes) const { - return get(replAttrs[0].cast(), replAttrs[1].cast(), - getDescriminator()); -} - -//===----------------------------------------------------------------------===// -// DILocalVariableAttr -//===----------------------------------------------------------------------===// - -void DILocalVariableAttr::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkAttrsFn(getScope()); - walkAttrsFn(getName()); - walkAttrsFn(getFile()); - walkAttrsFn(getType()); -} - -Attribute DILocalVariableAttr::replaceImmediateSubElements( - ArrayRef replAttrs, ArrayRef replTypes) const { - return get(getContext(), replAttrs[0].cast(), - replAttrs[1].cast(), replAttrs[2].cast(), - getLine(), getArg(), getAlignInBits(), - replAttrs[3].cast()); -} - -//===----------------------------------------------------------------------===// -// DISubprogramAttr -//===----------------------------------------------------------------------===// - -void DISubprogramAttr::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkAttrsFn(getCompileUnit()); - walkAttrsFn(getScope()); - walkAttrsFn(getName()); - walkAttrsFn(getLinkageName()); - walkAttrsFn(getFile()); - walkAttrsFn(getType()); -} - -Attribute -DISubprogramAttr::replaceImmediateSubElements(ArrayRef replAttrs, - ArrayRef replTypes) const { - return get(getContext(), replAttrs[0].cast(), - replAttrs[1].cast(), replAttrs[2].cast(), - replAttrs[3].cast(), replAttrs[4].cast(), - getLine(), getScopeLine(), getSubprogramFlags(), - replAttrs[5].cast()); -} - -//===----------------------------------------------------------------------===// -// DISubroutineTypeAttr -//===----------------------------------------------------------------------===// - -void DISubroutineTypeAttr::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - for (DITypeAttr type : getTypes()) - walkAttrsFn(type); -} - -Attribute DISubroutineTypeAttr::replaceImmediateSubElements( - ArrayRef replAttrs, ArrayRef replTypes) const { - return get( - getContext(), getCallingConvention(), - ArrayRef(static_cast(replAttrs.data()), - replAttrs.size())); + return llvm::isa(attr); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index 375ec6ff0e78a..c2b09619a5f52 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -2612,6 +2612,9 @@ void LLVMDialect::initialize() { #define GET_OP_CLASSES #include "mlir/Dialect/LLVMIR/LLVMOps.cpp.inc" +#define GET_OP_CLASSES +#include "mlir/Dialect/LLVMIR/LLVMIntrinsicOps.cpp.inc" + LogicalResult LLVMDialect::verifyDataLayoutString( StringRef descr, llvm::function_ref reportError) { llvm::Expected maybeDataLayout = diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMIntrinsicOps.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMIntrinsicOps.cpp deleted file mode 100644 index a5d85a7cf4ccb..0000000000000 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMIntrinsicOps.cpp +++ /dev/null @@ -1,7 +0,0 @@ -#include "mlir/Dialect/LLVMIR/LLVMDialect.h" - -using namespace mlir; -using namespace mlir::LLVM; - -#define GET_OP_CLASSES -#include "mlir/Dialect/LLVMIR/LLVMIntrinsicOps.cpp.inc" diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp index 99fa193185a61..133fc6036931e 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp @@ -183,20 +183,6 @@ LLVMArrayType::getPreferredAlignment(const DataLayout &dataLayout, return dataLayout.getTypePreferredAlignment(getElementType()); } -//===----------------------------------------------------------------------===// -// SubElementTypeInterface - -void LLVMArrayType::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkTypesFn(getElementType()); -} - -Type LLVMArrayType::replaceImmediateSubElements( - ArrayRef replAttrs, ArrayRef replTypes) const { - return get(replTypes.front(), getNumElements()); -} - //===----------------------------------------------------------------------===// // Function type. //===----------------------------------------------------------------------===// @@ -247,22 +233,6 @@ LLVMFunctionType::verify(function_ref emitError, return success(); } -//===----------------------------------------------------------------------===// -// SubElementTypeInterface - -void LLVMFunctionType::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkTypesFn(getReturnType()); - for (Type type : getParams()) - walkTypesFn(type); -} - -Type LLVMFunctionType::replaceImmediateSubElements( - ArrayRef replAttrs, ArrayRef replTypes) const { - return get(replTypes.front(), replTypes.drop_front(), isVarArg()); -} - //===----------------------------------------------------------------------===// // LLVMPointerType //===----------------------------------------------------------------------===// @@ -439,20 +409,6 @@ LogicalResult LLVMPointerType::verifyEntries(DataLayoutEntryListRef entries, return success(); } -//===----------------------------------------------------------------------===// -// SubElementTypeInterface - -void LLVMPointerType::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkTypesFn(getElementType()); -} - -Type LLVMPointerType::replaceImmediateSubElements( - ArrayRef replAttrs, ArrayRef replTypes) const { - return get(getContext(), replTypes.front(), getAddressSpace()); -} - //===----------------------------------------------------------------------===// // Struct type. //===----------------------------------------------------------------------===// @@ -749,17 +705,6 @@ LLVMFixedVectorType::verify(function_ref emitError, emitError, elementType, numElements); } -void LLVMFixedVectorType::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkTypesFn(getElementType()); -} - -Type LLVMFixedVectorType::replaceImmediateSubElements( - ArrayRef replAttrs, ArrayRef replTypes) const { - return get(replTypes[0], getNumElements()); -} - //===----------------------------------------------------------------------===// // LLVMScalableVectorType. //===----------------------------------------------------------------------===// @@ -792,17 +737,6 @@ LLVMScalableVectorType::verify(function_ref emitError, emitError, elementType, numElements); } -void LLVMScalableVectorType::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkTypesFn(getElementType()); -} - -Type LLVMScalableVectorType::replaceImmediateSubElements( - ArrayRef replAttrs, ArrayRef replTypes) const { - return get(replTypes[0], getMinNumElements()); -} - //===----------------------------------------------------------------------===// // Utility functions. //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index ae2514c3eecdb..8ce1ad070f46a 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -662,7 +662,7 @@ void FillOp::getCanonicalizationPatterns(RewritePatternSet &results, //===----------------------------------------------------------------------===// static void buildGenericRegion( - OpBuilder &builder, OperationState &result, ValueRange inputs, + OpBuilder &builder, Location loc, Region ®ion, ValueRange inputs, ValueRange outputs, function_ref bodyBuild) { SmallVector blockArgTypes; @@ -675,10 +675,9 @@ static void buildGenericRegion( } OpBuilder::InsertionGuard guard(builder); - auto ®ion = *result.regions.front(); Block *bodyBlock = builder.createBlock(®ion, region.end(), blockArgTypes, blockArgLocs); - bodyBuild(builder, result.location, bodyBlock->getArguments()); + bodyBuild(builder, loc, bodyBlock->getArguments()); } void GenericOp::getAsmBlockArgumentNames(Region ®ion, @@ -699,7 +698,8 @@ void GenericOp::build( iteratorTypes, doc, libraryCall); result.addAttributes(attributes); if (bodyBuild) - buildGenericRegion(builder, result, inputs, outputs, bodyBuild); + buildGenericRegion(builder, result.location, *result.regions.front(), + inputs, outputs, bodyBuild); } void GenericOp::build( @@ -836,8 +836,8 @@ ParseResult GenericOp::parse(OpAsmParser &parser, OperationState &result) { static void getGenericEffectsImpl( SmallVectorImpl> &effects, - ValueRange results, OpOperandVector inputOperands, - OpOperandVector outputOperands) { + ValueRange results, const OpOperandVector &inputOperands, + const OpOperandVector &outputOperands) { for (auto *operand : inputOperands) { if (!operand->get().getType().isa()) continue; @@ -935,10 +935,9 @@ struct DeduplicateAndRemoveDeadOperandsAndResults // Create the new op with the body being empty. Location loc = genericOp.getLoc(); SmallVector newResultTypes; - if (genericOp.hasTensorSemantics()) { - newResultTypes = llvm::to_vector(llvm::map_range( - newOutputOperands, [](Value v) { return v.getType(); })); - } + for (Value v : newOutputOperands) + if (v.getType().isa()) + newResultTypes.push_back(v.getType()); auto newOp = rewriter.create( loc, newResultTypes, newInputOperands, newOutputOperands, rewriter.getAffineMapArrayAttr(newIndexingMaps), @@ -1347,7 +1346,8 @@ void MapOp::build( result.addTypes(initType); if (bodyBuild) - buildGenericRegion(builder, result, inputs, /*outputs=*/{}, bodyBuild); + buildGenericRegion(builder, result.location, *result.regions.front(), + inputs, /*outputs=*/{}, bodyBuild); } ParseResult MapOp::parse(OpAsmParser &parser, OperationState &result) { @@ -1472,7 +1472,8 @@ void ReduceOp::build( } if (bodyBuild) - buildGenericRegion(builder, result, inputs, inits, bodyBuild); + buildGenericRegion(builder, result.location, *result.regions.front(), + inputs, inits, bodyBuild); } SmallVector ReduceOp::getIteratorTypesArray() { @@ -1649,13 +1650,13 @@ LogicalResult ReduceOp::verify() { // TransposeOp //===----------------------------------------------------------------------===// -std::function)> -TransposeOp::getRegionBuilder() { - return [](mlir::ImplicitLocOpBuilder &b, mlir::Block &block, - mlir::ArrayRef) { - b.create(block.getArguments().front()); - }; +static void buildIdentityRegion(OpBuilder &builder, Location loc, + Region ®ion, ValueRange inputs, + ValueRange outputs) { + buildGenericRegion(builder, loc, region, inputs, outputs, + [](OpBuilder &b, Location loc, ValueRange args) { + b.create(loc, args[0]); + }); } void TransposeOp::build(::mlir::OpBuilder &builder, @@ -1672,11 +1673,8 @@ void TransposeOp::build(::mlir::OpBuilder &builder, if (initType.isa()) result.addTypes(initType); - (void)result.addRegion(); - buildGenericRegion(builder, result, input, init, - [&](OpBuilder &b, Location loc, ValueRange args) { - b.create(loc, args[0]); - }); + buildIdentityRegion(builder, result.location, *result.addRegion(), input, + init); } void TransposeOp::build(::mlir::OpBuilder &builder, @@ -1694,13 +1692,10 @@ ParseResult TransposeOp::parse(OpAsmParser &parser, OperationState &result) { }))) return failure(); - (void)result.addRegion(); OpBuilder builder(parser.getContext()); - buildGenericRegion(builder, result, /*inputs=*/result.operands, - /*outputs=*/{}, - [&](OpBuilder &b, Location loc, ValueRange args) { - b.create(loc, args[0]); - }); + buildIdentityRegion(builder, result.location, *result.addRegion(), + /*inputs=*/result.operands, + /*outputs=*/{}); return success(); } @@ -1779,6 +1774,144 @@ void TransposeOp::getEffects( getDpsInputOperands(), getDpsInitOperands()); } +//===----------------------------------------------------------------------===// +// BroadcastOp +//===----------------------------------------------------------------------===// + +void BroadcastOp::build(::mlir::OpBuilder &builder, + ::mlir::OperationState &result, Value input, Value init, + DenseI64ArrayAttr dimensions, + ArrayRef attributes) { + result.addOperands(input); + result.addOperands(init); + result.addAttribute(getDimensionsAttrName(result.name), dimensions); + result.addAttributes(attributes); + + // Add output types for `RankedTensorType` output arguments. + Type initType = init.getType(); + if (initType.isa()) + result.addTypes(initType); + + buildIdentityRegion(builder, result.location, *result.addRegion(), input, + init); +} + +void BroadcastOp::build(::mlir::OpBuilder &builder, + ::mlir::OperationState &result, Value input, Value init, + ArrayRef dimensions, + ArrayRef attributes) { + build(builder, result, input, init, builder.getDenseI64ArrayAttr(dimensions), + attributes); +} + +ParseResult BroadcastOp::parse(OpAsmParser &parser, OperationState &result) { + if (failed(parseDstStyleOp( + parser, result, [&](OpAsmParser &parser, NamedAttrList &attributes) { + return parseDenseI64ArrayAttr(parser, attributes, "dimensions"); + }))) + return failure(); + + OpBuilder builder(parser.getContext()); + buildIdentityRegion(builder, result.location, *result.addRegion(), + /*inputs=*/result.operands, + /*outputs=*/{}); + return success(); +} + +void BroadcastOp::getAsmResultNames( + function_ref setNameFn) { + if (!getResults().empty()) + setNameFn(getResults().front(), "broadcasted"); +} + +void BroadcastOp::print(OpAsmPrinter &p) { + p.increaseIndent(); + printCommonStructuredOpPartsWithNewLine( + p, SmallVector(getDpsInputOperands()), + SmallVector(getDpsInitOperands())); + p.printNewline(); + + printDenseI64ArrayAttr(p, getDimensionsAttrName(), getDimensions()); + p.printOptionalAttrDict((*this)->getAttrs(), {getDimensionsAttrName()}); + p.decreaseIndent(); +} + +LogicalResult BroadcastOp::verify() { + ArrayRef dimensionsRef = getDimensions(); + + if (!llvm::is_sorted(dimensionsRef)) + return emitOpError() << "dimensions should be in sorted order, implicit " + "transpose is not supported"; + + auto inputType = getInput().getType(); + auto initType = getInit().getType(); + + int64_t inputRank = inputType.getRank(); + int64_t initRank = initType.getRank(); + + auto inputShape = inputType.getShape(); + auto initShape = initType.getShape(); + + if ((size_t)inputRank != dimensionsRef.size()) + return emitOpError() + << "input rank does match the number of dimensions. expected: " + << inputRank << ", got: " << dimensionsRef.size(); + + // Mapping from init dims to input dims. + const int64_t kUnmappedDim = -1; + SmallVector reverseDimMap(initRank, kUnmappedDim); + + for (const auto &[idx, dim] : llvm::enumerate(dimensionsRef)) { + if (dim < 0 || dim >= initRank) + return emitOpError() << "dimension " << idx + << " is out of range. expected range: [0, " + << initRank - 1 << "], got: " << dim; + + reverseDimMap[dim] = idx; + } + + for (const auto &[idx, inputDimIdx] : llvm::enumerate(reverseDimMap)) { + if (inputDimIdx == kUnmappedDim) { + // This dimensions is being added. Should be statically known. + if (ShapedType::isDynamic(initShape[idx])) + return emitOpError() + << "init dim " << idx + << " can't be dynamic, because it's not matched to input"; + } else { + // This dimensions is mapped from the input. Init and input dims should + // match. + if (inputShape[inputDimIdx] != initShape[idx]) + return emitOpError() + << "input dim " << inputDimIdx << " should match init dim " + << idx << ". input: " << inputShape[inputDimIdx] + << ", init: " << initShape[idx]; + } + } + + return success(); +} + +SmallVector BroadcastOp::getIteratorTypesArray() { + int64_t rank = getInit().getType().getRank(); + return SmallVector(rank, getParallelIteratorTypeName()); +} + +ArrayAttr BroadcastOp::getIndexingMaps() { + Builder builder(getContext()); + int64_t rank = getInit().getType().getRank(); + return builder.getAffineMapArrayAttr( + {builder.getMultiDimIdentityMap(rank).getSubMap( + llvm::to_vector_of(getDimensions())), + builder.getMultiDimIdentityMap(rank)}); +} + +void BroadcastOp::getEffects( + SmallVectorImpl> + &effects) { + getGenericEffectsImpl(effects, getOperation()->getResults(), + getDpsInputOperands(), getDpsInitOperands()); +} + //===----------------------------------------------------------------------===// // YieldOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp index 513882ec91260..6b8ca9125c82d 100644 --- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp +++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp @@ -254,6 +254,14 @@ LogicalResult transform::FuseOp::verify() { // FuseIntoContainingOp //===----------------------------------------------------------------------===// +void transform::FuseIntoContainingOp::build(OpBuilder &builder, + OperationState &result, + Value producerOp, + Value containingOp) { + result.addOperands({producerOp, containingOp}); + result.addTypes(pdl::OperationType::get(builder.getContext())); +} + /// Find the first "extract" user of `producerOp` and tile it right before its /// use. The tiled op is fused under the `containingOp`. /// Return this fused op on success or nullptr if anything fails. @@ -628,6 +636,14 @@ LogicalResult transform::InterchangeOp::verify() { // MatchOp //===---------------------------------------------------------------------===// +void transform::MatchOp::build(OpBuilder &builder, OperationState &result, + Value target, ArrayRef opNames) { + result.addOperands(target); + result.addAttribute(MatchOp::getOpsAttrName(result.name), + builder.getStrArrayAttr(opNames)); + result.addTypes(pdl::OperationType::get(builder.getContext())); +} + DiagnosedSilenceableFailure transform::MatchOp::apply(transform::TransformResults &results, transform::TransformState &state) { @@ -915,7 +931,7 @@ transform::ScalarizeOp::applyToOne(linalg::LinalgOp target, if (failed(maybeTilingResult)) return DiagnosedSilenceableFailure(reportUnknownTransformError(target)); - results.push_back(maybeTilingResult->tiledOp); + results.append(maybeTilingResult->tiledOps); return DiagnosedSilenceableFailure(success()); } @@ -1069,6 +1085,34 @@ LogicalResult SplitOp::verify() { // SplitReductionOp //===----------------------------------------------------------------------===// +void transform::SplitReductionOp::build( + OpBuilder &builder, OperationState &result, Value target, + int64_t splitFactor, int64_t insertSplitDimension, bool innerParallel, + bool useScalingAlgorithm, bool useAlloc) { + MLIRContext *ctx = builder.getContext(); + result.addOperands(target); + result.addAttribute(SplitReductionOp::getSplitFactorAttrName(result.name), + builder.getI64IntegerAttr(splitFactor)); + result.addAttribute( + SplitReductionOp::getInsertSplitDimensionAttrName(result.name), + builder.getI64IntegerAttr(insertSplitDimension)); + if (innerParallel) { + result.addAttribute(SplitReductionOp::getInnerParallelAttrName(result.name), + builder.getUnitAttr()); + } + if (useScalingAlgorithm) { + result.addAttribute( + SplitReductionOp::getUseScalingAlgorithmAttrName(result.name), + builder.getUnitAttr()); + } + if (useAlloc) { + result.addAttribute(SplitReductionOp::getUseAllocAttrName(result.name), + builder.getUnitAttr()); + } + auto resultType = pdl::OperationType::get(ctx); + result.addTypes({resultType, resultType, resultType, resultType}); +} + DiagnosedSilenceableFailure transform::SplitReductionOp::applyToOne(linalg::LinalgOp target, SmallVectorImpl &results, @@ -1094,6 +1138,33 @@ transform::SplitReductionOp::applyToOne(linalg::LinalgOp target, return DiagnosedSilenceableFailure(success()); } +//===----------------------------------------------------------------------===// +// SplitReductionOp +//===----------------------------------------------------------------------===// + +DiagnosedSilenceableFailure transform::TileReductionUsingScfOp::applyToOne( + linalg::LinalgOp target, SmallVectorImpl &results, + transform::TransformState &state) { + SimpleRewriter rewriter(getContext()); + rewriter.setInsertionPoint(target); + SmallVector tileSizes = extractFromI64ArrayAttr(getTileSizes()); + SmallVector sizes; + for (int64_t size : tileSizes) { + sizes.push_back(rewriter.getIndexAttr(size)); + } + + FailureOr result = scf::tileReductionUsingScf( + rewriter, cast(target.getOperation()), + sizes); + + if (failed(result)) + return DiagnosedSilenceableFailure(reportUnknownTransformError(target)); + results.push_back(result->initialOp); + results.push_back(result->parallelTiledOp); + results.push_back(result->mergeOp); + return DiagnosedSilenceableFailure(success()); +} + //===----------------------------------------------------------------------===// // TileOp //===----------------------------------------------------------------------===// @@ -1180,7 +1251,7 @@ transform::TileOp::apply(TransformResults &transformResults, rewriter.replaceOp(linalgOp, maybeTilingResult->loops.front()->getResults()); - tiled.push_back(maybeTilingResult->tiledOp); + tiled.append(maybeTilingResult->tiledOps); for (const auto &en2 : llvm::enumerate(maybeTilingResult->loops)) loops[en2.index()].push_back(en2.value()); } @@ -1250,13 +1321,75 @@ void transform::TileOp::getEffects( // TileToForeachThreadOp //===----------------------------------------------------------------------===// +void transform::TileToForeachThreadOp::build( + OpBuilder &builder, OperationState &result, Value target, + ArrayRef staticTileSizes, transform::TileSizesSpec, + ArrayRef threadDimMapping) { + return build(builder, result, target, + getAsOpFoldResult(builder.getI64ArrayAttr(staticTileSizes)), + TileSizesSpec(), threadDimMapping); +} + +void transform::TileToForeachThreadOp::build( + OpBuilder &builder, OperationState &result, Value target, + ArrayRef mixedTileSizes, transform::TileSizesSpec, + ArrayRef threadDimMapping) { + SmallVector staticTileSizes; + SmallVector dynamicTileSizes; + dispatchIndexOpFoldResults(mixedTileSizes, dynamicTileSizes, staticTileSizes, + ShapedType::kDynamicSize); + // Call the default builder which sets up the proper operands segment sizes + // attributes for multiple variadic operands. In the absence of this, horrible + // bugs ensue. + MLIRContext *ctx = builder.getContext(); + auto operationType = pdl::OperationType::get(ctx); + auto staticTileSizesAttr = builder.getI64ArrayAttr(staticTileSizes); + ArrayAttr threadDimMappingAttr; + if (!threadDimMapping.empty()) + threadDimMappingAttr = builder.getI64ArrayAttr(threadDimMapping); + build(builder, result, TypeRange{operationType, operationType}, target, + /*numThreads=*/ValueRange{}, dynamicTileSizes, + /*staticNumThreads=*/ArrayAttr(), staticTileSizesAttr, + threadDimMappingAttr); +} + +void transform::TileToForeachThreadOp::build( + OpBuilder &builder, OperationState &result, Value target, + ArrayRef staticNumThreads, transform::NumThreadsSpec, + ArrayRef threadDimMapping) { + return build(builder, result, target, + getAsOpFoldResult(builder.getI64ArrayAttr(staticNumThreads)), + NumThreadsSpec(), threadDimMapping); +} + +void transform::TileToForeachThreadOp::build( + OpBuilder &builder, OperationState &result, Value target, + ArrayRef mixedNumThreads, transform::NumThreadsSpec, + ArrayRef threadDimMapping) { + SmallVector staticNumThreads; + SmallVector dynamicNumThreads; + dispatchIndexOpFoldResults(mixedNumThreads, dynamicNumThreads, + staticNumThreads, ShapedType::kDynamicSize); + // Call the default builder which sets up the proper operands segment sizes + // attributes for multiple variadic operands. In the absence of this, horrible + // bugs ensue. + MLIRContext *ctx = builder.getContext(); + auto operationType = pdl::OperationType::get(ctx); + auto staticNumThreadsAttr = builder.getI64ArrayAttr(staticNumThreads); + ArrayAttr threadDimMappingAttr; + if (!threadDimMapping.empty()) + threadDimMappingAttr = builder.getI64ArrayAttr(threadDimMapping); + build(builder, result, TypeRange{operationType, operationType}, target, + dynamicNumThreads, /*tileSizes=*/ValueRange{}, staticNumThreadsAttr, + /*staticTileSizes=*/ArrayAttr(), threadDimMappingAttr); +} + DiagnosedSilenceableFailure transform::tileToForeachThreadOpImpl( RewriterBase &rewriter, transform::TransformState &state, TransformOpInterface transformOp, ArrayRef targets, ArrayRef mixedNumThreads, ArrayRef mixedTileSizes, Optional threadDimMapping, SmallVector &tileOps, SmallVector &tiledOps) { - if (targets.empty()) return DiagnosedSilenceableFailure(success()); @@ -1476,7 +1609,7 @@ transform::TileToScfForOp::apply(TransformResults &transformResults, rewriter.replaceOp(tilingInterfaceOp, tilingResult->replacements); - tiled.push_back(tilingResult->tiledOp); + tiled.append(tilingResult->tiledOps); for (const auto &en2 : llvm::enumerate(tilingResult->loops)) loops[en2.index()].push_back(en2.value()); } @@ -1546,6 +1679,16 @@ void transform::TileToScfForOp::getEffects( // VectorizeOp //===----------------------------------------------------------------------===// +void transform::VectorizeOp::build(OpBuilder &builder, OperationState &result, + Value target, bool vectorizePadding) { + result.addOperands(target); + if (vectorizePadding) { + result.addAttribute(VectorizeOp::getVectorizePaddingAttrName(result.name), + builder.getUnitAttr()); + } + result.addTypes(pdl::OperationType::get(builder.getContext())); +} + namespace { /// This is an helper only to call vectorize via a pattern inside of /// VectorizeOp::applyToOne. diff --git a/mlir/lib/Dialect/Linalg/Transforms/Interchange.cpp b/mlir/lib/Dialect/Linalg/Transforms/Interchange.cpp index a74538767d76a..c6a9989d971fc 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Interchange.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Interchange.cpp @@ -73,8 +73,8 @@ mlir::linalg::interchangeGenericOp(RewriterBase &rewriter, GenericOp genericOp, m = m.compose(permutationMap); newIndexingMaps.push_back(m); } - genericOp->setAttr(getIndexingMapsAttrName(), - rewriter.getAffineMapArrayAttr(newIndexingMaps)); + genericOp.setIndexingMapsAttr( + rewriter.getAffineMapArrayAttr(newIndexingMaps)); // 3. Compute the interchanged iterator types. ArrayRef itTypes = genericOp.getIteratorTypes().getValue(); @@ -83,8 +83,7 @@ mlir::linalg::interchangeGenericOp(RewriterBase &rewriter, GenericOp genericOp, SmallVector permutation(interchangeVector.begin(), interchangeVector.end()); applyPermutationToVector(itTypesVector, permutation); - genericOp->setAttr(getIteratorTypesAttrName(), - ArrayAttr::get(context, itTypesVector)); + genericOp.setIteratorTypesAttr(rewriter.getArrayAttr(itTypesVector)); // 4. Transform the index operations by applying the permutation map. if (genericOp.hasIndexSemantics()) { diff --git a/mlir/lib/Dialect/Linalg/Transforms/SplitReduction.cpp b/mlir/lib/Dialect/Linalg/Transforms/SplitReduction.cpp index 32d05c5acbe6c..0608c361e774b 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/SplitReduction.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/SplitReduction.cpp @@ -26,38 +26,6 @@ using namespace mlir; using namespace mlir::linalg; -/// Return the identity numeric value associated to the give op. -static Attribute getNeutralElement(Operation *op) { - // Builder only used as helper for attribute creation. - OpBuilder b(op->getContext()); - Type resultType = op->getResult(0).getType(); - if (auto floatType = resultType.dyn_cast()) { - const llvm::fltSemantics &semantic = floatType.getFloatSemantics(); - if (isa(op)) - return b.getFloatAttr(resultType, llvm::APFloat::getZero(semantic)); - if (isa(op)) - return b.getFloatAttr(resultType, llvm::APFloat(semantic, 1)); - if (isa(op)) - return b.getFloatAttr(resultType, - llvm::APFloat::getLargest(semantic, true)); - if (isa(op)) - return b.getFloatAttr(resultType, - llvm::APFloat::getLargest(semantic, true)); - return Attribute(); - } - if (isa(op)) - return b.getIntegerAttr(resultType, 0); - if (isa(op)) - return b.getIntegerAttr(resultType, -1); - if (isa(op)) - return b.getIntegerAttr(resultType, std::numeric_limits::min()); - if (isa(op)) - return b.getIntegerAttr(resultType, std::numeric_limits::max()); - if (isa(op)) - return b.getIntegerAttr(resultType, 1); - return Attribute(); -} - FailureOr mlir::linalg::splitReduction( PatternRewriter &b, LinalgOp op, const ControlSplitReductionFn &controlSplitReductionFn, bool useAlloc) { @@ -88,8 +56,8 @@ FailureOr mlir::linalg::splitReduction( return b.notifyMatchFailure(op, "Cannot match the reduction pattern"); Operation *reductionOp = combinerOps[0]; - Attribute identity = getNeutralElement(reductionOp); - if (!identity) + Optional identity = getNeutralElement(reductionOp); + if (!identity.has_value()) return b.notifyMatchFailure(op, "Unknown identity value for the reduction"); Location loc = op->getLoc(); @@ -187,7 +155,7 @@ FailureOr mlir::linalg::splitReduction( emptyOrAllocTensor = b.create( loc, newOutputShape, op.getRegionOutputArgs()[0].getType()); } - Value constantOp = b.create(loc, identity); + Value constantOp = b.create(loc, *identity); Value identityTensor = b.create(op->getLoc(), constantOp, emptyOrAllocTensor) .getResult(0); @@ -309,10 +277,13 @@ FailureOr mlir::linalg::splitReductionByScaling( if (!matchReduction(op.getRegionOutputArgs(), 0, combinerOps)) return b.notifyMatchFailure(op, "cannot match a reduction pattern"); - SmallVector neutralElements = llvm::to_vector<4>( - llvm::map_range(combinerOps, [&](Operation *reductionOp) { - return getNeutralElement(reductionOp); - })); + SmallVector neutralElements; + for (Operation *reductionOp : combinerOps) { + Optional neutralElement = getNeutralElement(reductionOp); + if (!neutralElement.has_value()) + return b.notifyMatchFailure(op, "cannot find neutral element."); + neutralElements.push_back(*neutralElement); + } if (!llvm::all_of(neutralElements, [](Attribute attr) { return attr; })) return b.notifyMatchFailure(op, "unknown reduction neutral"); diff --git a/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp index c843f0f400793..d1fcc01ca853d 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp @@ -8,6 +8,7 @@ #include "mlir/Dialect/Linalg/Transforms/TilingInterfaceImpl.h" +#include "mlir/Analysis/SliceAnalysis.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Arith/Utils/Utils.h" @@ -240,11 +241,170 @@ struct LinalgOpTilingInterface } }; +//===----------------------------------------------------------------------===// +// External Model for implementing `PartialReductionInterface` for `LinalgOp`s. +//===----------------------------------------------------------------------===// + +/// External model implementation of PartialReductionInterface for LinalgOps. +template +struct LinalgOpPartialReductionInterface + : public PartialReductionOpInterface::ExternalModel< + LinalgOpPartialReductionInterface, LinalgOpTy> { + FailureOr generateInitialTensorForPartialReduction( + Operation *op, OpBuilder &b, Location loc, ArrayRef sizes, + ArrayRef reductionDims) const { + auto linalgOp = cast(op); + OpBuilder::InsertionGuard guard(b); + assert(reductionDims.size() == 1 && + "only support single reduction right now."); + if (linalgOp.hasBufferSemantics()) + return op->emitOpError("expected operation to have tensor semantics"); + // Insert the new parallel dimension based on the index of the reduction + // loop. This could be controlled by user for more flexibility. + int64_t insertSplitDimension = reductionDims[0]; + + SmallVector combinerOps; + if (!matchReduction(linalgOp.getRegionOutputArgs(), 0, combinerOps) || + combinerOps.size() != 1) + return op->emitOpError("Failed to anaysis the reduction operation."); + + Operation *reductionOp = combinerOps[0]; + Optional identity = getNeutralElement(reductionOp); + if (!identity.has_value()) + return op->emitOpError( + "Failed to get an identity value for the reduction operation."); + + // Calculate the new shape, we insert the new dimension based on the index + // of the reduction dimension. + SmallVector newOutputShape; + ArrayRef oldShape = + linalgOp.getShape(linalgOp.getDpsInitOperand(0)); + SmallVector dynamicDims; + for (int64_t idx : llvm::seq(0, oldShape.size() + 1)) { + if (idx == insertSplitDimension) { + dispatchIndexOpFoldResults(sizes[idx], dynamicDims, newOutputShape, + ShapedType::kDynamicStrideOrOffset); + continue; + } + int64_t oldIdx = idx < insertSplitDimension ? idx : idx - 1; + int64_t dim = oldShape[oldIdx]; + newOutputShape.push_back(dim); + if (ShapedType::isDynamic(dim)) + dynamicDims.push_back(b.createOrFold( + loc, linalgOp.getDpsInitOperand(0)->get(), oldIdx)); + } + Value emptyTensor = b.create( + loc, newOutputShape, linalgOp.getRegionOutputArgs()[0].getType(), + dynamicDims); + Value constantOp = b.create(loc, *identity); + auto identityTensor = + b.create(loc, constantOp, emptyTensor); + return identityTensor.getOperation(); + } + + Operation *tileToPartialReduction(Operation *op, OpBuilder &b, Location loc, + ValueRange init, + ArrayRef offsets, + ArrayRef sizes, + ArrayRef reductionDims) const { + OpBuilder::InsertionGuard guard(b); + auto linalgOp = cast(op); + assert(reductionDims.size() == 1 && + "only support single reduction right now."); + int64_t insertSplitDimension = reductionDims[0]; + + AffineMap oldOutputMap = + linalgOp.getMatchingIndexingMap(linalgOp.getDpsInitOperand(0)); + SmallVector outputExpr; + for (auto &[idx, expr] : llvm::enumerate(oldOutputMap.getResults())) { + if (static_cast(idx) == insertSplitDimension) { + outputExpr.push_back(b.getAffineDimExpr(reductionDims[0])); + } + outputExpr.push_back(expr); + } + if (insertSplitDimension == oldOutputMap.getNumResults()) + outputExpr.push_back(b.getAffineDimExpr(reductionDims[0])); + + // Step 1: Extract a slice of the input operands. + SmallVector valuesToTile = linalgOp.getDpsInputOperands(); + SmallVector tiledOperands = + makeTiledShapes(b, loc, op, valuesToTile, offsets, sizes, {}, true); + + // Step 2: Extract the accumulator operands + SmallVector strides(offsets.size(), b.getIndexAttr(1)); + SmallVector outOffsets(offsets.size(), b.getIndexAttr(0)); + // TODO: use SubsetExtractOpInterface once it is available. + Value out = b.create(loc, init[0], outOffsets, + sizes, strides); + + // Step3. create a generic op where the reduction dimension is replaced by a + // parallel dimension of the size of reduction. + SmallVector newIteratorTypes = linalgOp.getIteratorTypesArray(); + newIteratorTypes[reductionDims[0]] = getParallelIteratorTypeName(); + SmallVector newMaps = linalgOp.getIndexingMapsArray(); + newMaps.back() = AffineMap::get(newMaps.back().getNumDims(), 0, outputExpr, + linalgOp.getContext()); + auto genericOp = + b.create(loc, TypeRange({out.getType()}), tiledOperands, + ValueRange({out}), newMaps, newIteratorTypes); + BlockAndValueMapping mapping; + op->getRegion(0).cloneInto(&genericOp.getRegion(), + genericOp.getRegion().begin(), mapping); + return genericOp.getOperation(); + } + + Operation *mergeReductions(Operation *op, OpBuilder &b, Location loc, + ValueRange partialReduce, + ArrayRef reductionDims) const { + auto linalgOp = cast(op); + assert(reductionDims.size() == 1 && + "only support single reduction right now."); + int64_t dimToMerge = reductionDims[0]; + + // Then create a new reduction that only reduce the newly added dimension + // from the previous op. + int64_t intermRank = + partialReduce[0].getType().cast().getRank(); + AffineMap inputMap = b.getMultiDimIdentityMap(intermRank); + SmallVector reductionIteratorTypes; + SmallVector exprs; + for (int64_t i : llvm::seq(0, intermRank)) { + if (dimToMerge == i) { + reductionIteratorTypes.push_back(getReductionIteratorTypeName()); + } else { + exprs.push_back(b.getAffineDimExpr(i)); + reductionIteratorTypes.push_back(getParallelIteratorTypeName()); + } + } + AffineMap outputMap = + AffineMap::get(intermRank, 0, exprs, op->getContext()); + SmallVector reductionMaps = {inputMap, outputMap}; + + SmallVector combinerOps; + matchReduction(linalgOp.getRegionOutputArgs(), 0, combinerOps); + Operation *reductionOp = combinerOps[0]; + + auto reduction = b.create( + loc, op->getResultTypes(), ValueRange({partialReduce[0]}), + SmallVector{linalgOp.getDpsInitOperands()}, reductionMaps, + reductionIteratorTypes, + [reductionOp](OpBuilder &b, Location loc, ValueRange inputs) { + Operation *clonedReductionOp = b.clone(*reductionOp); + clonedReductionOp->setOperand(0, inputs[0]); + clonedReductionOp->setOperand(1, inputs[1]); + b.create(loc, clonedReductionOp->getResult(0)); + }); + return reduction.getOperation(); + } +}; + } // namespace template static void registerOne(MLIRContext *ctx) { OpType::template attachInterface>(*ctx); + OpType::template attachInterface>( + *ctx); } /// Variadic helper function. diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index d565efb30241d..cedec72b9cb33 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -1465,7 +1465,7 @@ struct Conv1DGenerator : public StructuredGenerator { return; for (Value operand : mulOp->getOperands()) { if (Operation *def = operand.getDefiningOp()) { - if (!isa(def)) + if (!isa(def)) return; operand = def->getOperand(0); } diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp index ce15c6767b24b..ccf7cdc4aadbc 100644 --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -948,13 +948,14 @@ computeAllSliceParameters(OpBuilder &builder, Location loc, LinalgOp linalgOp, SmallVector subShapeSizes = computeTileSizes(builder, loc, tileSizes, sizeBounds); - assert(static_cast(valuesToTile.size()) == + assert(static_cast(valuesToTile.size()) <= linalgOp->getNumOperands() && - "expected one value to tile for every operand"); + "more value to tile than operands."); SmallVector> allSliceParams; allSliceParams.reserve(valuesToTile.size()); - for (OpOperand &opOperand : linalgOp->getOpOperands()) { - Value shapedOp = valuesToTile[opOperand.getOperandNumber()]; + for (auto [opOperand, val] : + llvm::zip(linalgOp->getOpOperands(), valuesToTile)) { + Value shapedOp = val; LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for operand " << shapedOp); AffineMap map = linalgOp.getMatchingIndexingMap(&opOperand); // Use `opOperand` as is if it is not tiled and not an output tensor. Having @@ -1059,5 +1060,37 @@ getReassociationMapForFoldingUnitDims(ArrayRef mixedSizes) { return reassociation; } +/// Return the identity numeric value associated to the give op. +Optional getNeutralElement(Operation *op) { + // Builder only used as helper for attribute creation. + OpBuilder b(op->getContext()); + Type resultType = op->getResult(0).getType(); + if (auto floatType = resultType.dyn_cast()) { + const llvm::fltSemantics &semantic = floatType.getFloatSemantics(); + if (isa(op)) + return b.getFloatAttr(resultType, llvm::APFloat::getZero(semantic)); + if (isa(op)) + return b.getFloatAttr(resultType, llvm::APFloat(semantic, 1)); + if (isa(op)) + return b.getFloatAttr(resultType, + llvm::APFloat::getInf(semantic, /*Negative=*/true)); + if (isa(op)) + return b.getFloatAttr( + resultType, llvm::APFloat::getInf(semantic, /*Negative=*/false)); + return Attribute(); + } + if (isa(op)) + return b.getIntegerAttr(resultType, 0); + if (isa(op)) + return b.getIntegerAttr(resultType, -1); + if (isa(op)) + return b.getIntegerAttr(resultType, std::numeric_limits::min()); + if (isa(op)) + return b.getIntegerAttr(resultType, std::numeric_limits::max()); + if (isa(op)) + return b.getIntegerAttr(resultType, 1); + return llvm::None; +} + } // namespace linalg } // namespace mlir diff --git a/mlir/lib/Dialect/Math/Transforms/AlgebraicSimplification.cpp b/mlir/lib/Dialect/Math/Transforms/AlgebraicSimplification.cpp index bea939a65022a..a1e6746b8fe9b 100644 --- a/mlir/lib/Dialect/Math/Transforms/AlgebraicSimplification.cpp +++ b/mlir/lib/Dialect/Math/Transforms/AlgebraicSimplification.cpp @@ -109,6 +109,15 @@ PowFStrengthReduction::matchAndRewrite(math::PowFOp op, return success(); } + // Replace `pow(x, 0.75)` with `sqrt(sqrt(x)) * sqrt(x)`. + if (isExponentValue(0.75)) { + Value pow_half = rewriter.create(op.getLoc(), x); + Value pow_quarter = rewriter.create(op.getLoc(), pow_half); + rewriter.replaceOpWithNewOp( + op, ValueRange{pow_half, pow_quarter}); + return success(); + } + return failure(); } diff --git a/mlir/lib/Dialect/NVGPU/IR/NVGPUDialect.cpp b/mlir/lib/Dialect/NVGPU/IR/NVGPUDialect.cpp index 9ed04b45aa1c8..24f70cb986e23 100644 --- a/mlir/lib/Dialect/NVGPU/IR/NVGPUDialect.cpp +++ b/mlir/lib/Dialect/NVGPU/IR/NVGPUDialect.cpp @@ -13,9 +13,11 @@ #include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/IR/Builders.h" +#include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/DialectImplementation.h" #include "mlir/IR/OpImplementation.h" #include "mlir/IR/TypeUtilities.h" +#include "mlir/IR/Verifier.h" #include "llvm/ADT/TypeSwitch.h" using namespace mlir; @@ -80,13 +82,21 @@ void MmaSyncOp::build(::mlir::OpBuilder &odsBuilder, mmaShape, UnitAttr()); } -LogicalResult MmaSyncOp::verify() { - - // Fundamental tensor core mma.sync op - // For F32 (TF32), F16, S8, and S4 data types fundamental tensor core - // operation is of shape: 8-by-8-by-128b. F64 is an exception. The - // verification for mma.sync covering various shapes and data types is based - // on the fundamental tensor core operionation. +/// Performs verification for MmaSyncOp and MmaSparseSyncOp. +static LogicalResult verifyMmaSyncOp(Operation *op, + TypedValue matrixA, + TypedValue matrixB, + TypedValue matrixC, + const std::array &mmaShape, + bool tf32Enabled, bool sparse = false) { + + // The verification for mma.sync covering various shapes and data types is + // based on the fundamental tensor core shape. + + // "Fundamental" tensor core shapes: + // - For F32 (TF32), F16, S8, and S4 data + // types the fundamental tensor core operation is of shape 8-by-8-by-128b. + // - F64 is an exception and is of shape 8-by-8-by-256b. constexpr int kThreads = 32; // 32 threads per warp int64_t shapeM = 8; int64_t shapeN = 8; @@ -98,9 +108,9 @@ LogicalResult MmaSyncOp::verify() { int64_t numElementC{2}; // two accumulator elements per fundamental tile // nvgpu.mma.sync vector operands (per thread) - auto aVector = getMatrixA().getType().cast(); - auto bVector = getMatrixB().getType().cast(); - auto cVector = getMatrixC().getType().cast(); + auto aVector = matrixA.getType(); + auto bVector = matrixB.getType(); + auto cVector = matrixC.getType(); // vector shapes ArrayRef aShape = aVector.getShape(); @@ -110,13 +120,9 @@ LogicalResult MmaSyncOp::verify() { // vector element type Type aType = aVector.getElementType(); - // tensor float32 (TF32) enabled - bool tf32Enabled = getOperation()->hasAttr(getTf32EnabledAttrName()); - - // nvgpu.mma.sync shape (per 32 threads or per warp) - int64_t m = getMmaShape()[0].cast().getInt(); - int64_t n = getMmaShape()[1].cast().getInt(); - int64_t k = getMmaShape()[2].cast().getInt(); + // Certain data types are not allowed in sparse mode. + if (sparse && aType.isF64()) + return op->emitError() << "f64 is not supported for sparse mode"; if (aType.isF64()) { // exception to 8-by-8-128b fundamental tensor core tile size @@ -127,36 +133,43 @@ LogicalResult MmaSyncOp::verify() { aType.isInteger(8) || aType.isInteger(4)) { // 8-by-8-128b fundamental tensor core tile size int operandBitwidth = aType.getIntOrFloatBitWidth(); - shapeK = 128 / operandBitwidth; // 128b wide shapeK + shapeK = 128 / operandBitwidth; // 128b wide shapeK + numElementA = 32 / operandBitwidth; // 32b wide operand A numElementB = 32 / operandBitwidth; // 32b wide operand B } else { - return emitError() << "expected input data type (i4,i8,f16,bf16,tf32,f64) " - "supported by nvgpu.mma.sync"; + return op->emitError() + << "expected input data type (i4,i8,f16,bf16,tf32,f64) " + "supported by " + << op->getName(); } // // Basic verification // + auto [m, n, k] = mmaShape; + // verify warp-wide size for vector a - if (aShape[0] * aShape[1] * kThreads != m * k) - return emitOpError() << "expected " << m * k - << " warp-wide matrix A elements"; + int64_t sparseFactor = sparse ? 2 : 1; + if (aShape[0] * aShape[1] * kThreads != m * k / sparseFactor) + return op->emitOpError() + << "expected " << m * k << " warp-wide matrix A elements"; // verify warp-wide size for vector b if (bShape[0] * bShape[1] * kThreads != k * n) - return emitOpError() << "expected " << k * n - << " warp-wide matrix B elements"; + return op->emitOpError() + << "expected " << k * n << " warp-wide matrix B elements"; // verify warp-wide size for vector c if (cShape[0] * cShape[1] * kThreads != m * n) - return emitOpError() << "expected " << m * n - << " warp-wide matrix C elements"; + return op->emitOpError() + << "expected " << m * n << " warp-wide matrix C elements"; // verify tf32 tensor cores are enabled for only F32 datatype if (tf32Enabled && !(aType.isF32())) - return emitOpError() << "expected tf32 tensor cores only for F32 operands"; + return op->emitOpError() + << "expected tf32 tensor cores only for F32 operands"; // // Extended verification @@ -168,23 +181,48 @@ LogicalResult MmaSyncOp::verify() { int64_t kTile = k / shapeK; // verify shape of aVector - if ((aShape[0] != mTile * kTile) || (aShape[1] != numElementA)) - return emitOpError() << "expected matrix A to be shaped (" << mTile * kTile - << " x " << numElementA << ")"; + if ((aShape[0] != mTile * kTile / (sparse ? 2 : 1)) || + (aShape[1] != numElementA)) + return op->emitOpError() << "expected matrix A to be shaped (" + << mTile * kTile << " x " << numElementA << ")"; // verify shape of bVector if ((bShape[0] != kTile * nTile) || (bShape[1] != numElementB)) - return emitOpError() << "expected matrix B to be shaped (" << kTile * nTile - << " x " << numElementB << ")"; + return op->emitOpError() << "expected matrix B to be shaped (" + << kTile * nTile << " x " << numElementB << ")"; // verify shape of cVector if ((cShape[0] != mTile * nTile) || (cShape[1] != numElementC)) - return emitOpError() << "expected matrix C to be shaped (" << mTile * nTile - << " x " << numElementC << ")"; + return op->emitOpError() << "expected matrix C to be shaped (" + << mTile * nTile << " x " << numElementC << ")"; return success(); } +LogicalResult MmaSyncOp::verify() { + return verifyMmaSyncOp(this->getOperation(), getMatrixA(), getMatrixB(), + getMatrixC(), getMmaShapeAsArray(), + getOperation()->hasAttr(getTf32EnabledAttrName())); +} + +//===----------------------------------------------------------------------===// +// NVGPU_MmaSparseSyncOp +//===----------------------------------------------------------------------===// +void MmaSparseSyncOp::build(::mlir::OpBuilder &odsBuilder, + ::mlir::OperationState &odsState, Value matrixA, + Value matrixB, Value matrixC, Value sparseMetadata, + ArrayRef mmaShape) { + build(odsBuilder, odsState, matrixC.getType(), matrixA, matrixB, matrixC, + sparseMetadata, odsBuilder.getI64ArrayAttr(mmaShape), 0, UnitAttr()); +} + +LogicalResult MmaSparseSyncOp::verify() { + return verifyMmaSyncOp(this->getOperation(), getMatrixA(), getMatrixB(), + getMatrixC(), getMmaShapeAsArray(), + getOperation()->hasAttr(getTf32EnabledAttrName()), + true); +} + //===----------------------------------------------------------------------===// // NVGPU_LdMatrixOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/SCF/IR/SCF.cpp b/mlir/lib/Dialect/SCF/IR/SCF.cpp index 684b911f55d45..3f65c1ce0107f 100644 --- a/mlir/lib/Dialect/SCF/IR/SCF.cpp +++ b/mlir/lib/Dialect/SCF/IR/SCF.cpp @@ -1373,7 +1373,7 @@ LogicalResult PerformConcurrentlyOp::verify() { // Verify that inserts are into out block arguments. Value dest = cast(op).getDest(); ArrayRef regionOutArgs = foreachThreadOp.getRegionOutArgs(); - if (llvm::find(regionOutArgs, dest) == regionOutArgs.end()) + if (!llvm::is_contained(regionOutArgs, dest)) return op.emitOpError("may only insert into an output block argument"); } return success(); diff --git a/mlir/lib/Dialect/SCF/Transforms/StructuralTypeConversions.cpp b/mlir/lib/Dialect/SCF/Transforms/StructuralTypeConversions.cpp index c4c219617b782..ac3d76d569228 100644 --- a/mlir/lib/Dialect/SCF/Transforms/StructuralTypeConversions.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/StructuralTypeConversions.cpp @@ -32,22 +32,82 @@ static void unpackUnrealizedConversionCast(Value v, unpacked.push_back(v); } -class ConvertForOpTypes : public OpConversionPattern { +// CRTP +// A base class that takes care of 1:N type conversion, which maps the converted +// op results (computed by the derived class) and materializes 1:N conversion. +template +class Structural1ToNConversionPattern : public OpConversionPattern { public: - using OpConversionPattern::OpConversionPattern; + using OpConversionPattern::typeConverter; + using OpConversionPattern::OpConversionPattern; + using OpAdaptor = typename OpConversionPattern::OpAdaptor; + + // + // Derived classes should provide the following method which performs the + // actual conversion. It should return llvm::None upon conversion failure and + // return the converted operation upon success. + // + // Optional convertSourceOp(SourceOp op, OpAdaptor adaptor, + // ConversionPatternRewriter &rewriter, + // TypeRange dstTypes) const; + LogicalResult - matchAndRewrite(ForOp op, OpAdaptor adaptor, + matchAndRewrite(SourceOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { - SmallVector newResultTypes; + SmallVector dstTypes; SmallVector offsets; offsets.push_back(0); // Do the type conversion and record the offsets. for (Type type : op.getResultTypes()) { - if (failed(typeConverter->convertTypes(type, newResultTypes))) - return rewriter.notifyMatchFailure(op, "could not convert result"); - offsets.push_back(newResultTypes.size()); + if (failed(typeConverter->convertTypes(type, dstTypes))) + return rewriter.notifyMatchFailure(op, "could not convert result type"); + offsets.push_back(dstTypes.size()); } + // Calls the actual converter implementation to convert the operation. + Optional newOp = + static_cast(this)->convertSourceOp( + op, adaptor, rewriter, dstTypes); + + if (!newOp) + return rewriter.notifyMatchFailure(op, "could not convert operation"); + + // Packs the return value. + SmallVector packedRets; + for (unsigned i = 1, e = offsets.size(); i < e; i++) { + unsigned start = offsets[i - 1], end = offsets[i]; + unsigned len = end - start; + ValueRange mappedValue = newOp->getResults().slice(start, len); + if (len != 1) { + // 1 : N type conversion. + Type origType = op.getResultTypes()[i - 1]; + Value mat = typeConverter->materializeSourceConversion( + rewriter, op.getLoc(), origType, mappedValue); + if (!mat) { + return rewriter.notifyMatchFailure( + op, "Failed to materialize 1:N type conversion"); + } + packedRets.push_back(mat); + } else { + // 1 : 1 type conversion. + packedRets.push_back(mappedValue.front()); + } + } + + rewriter.replaceOp(op, packedRets); + return success(); + } +}; + +class ConvertForOpTypes + : public Structural1ToNConversionPattern { +public: + using Structural1ToNConversionPattern::Structural1ToNConversionPattern; + + // The callback required by CRTP. + Optional convertSourceOp(ForOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter, + TypeRange dstTypes) const { // Create a empty new op and inline the regions from the old op. // // This is a little bit tricky. We have two concerns here: @@ -67,15 +127,15 @@ class ConvertForOpTypes : public OpConversionPattern { // convertRegionTypes already takes care of 1:N conversion. if (failed(rewriter.convertRegionTypes(&op.getLoopBody(), *typeConverter))) - return failure(); + return llvm::None; // Unpacked the iteration arguments. SmallVector flatArgs; for (Value arg : adaptor.getInitArgs()) unpackUnrealizedConversionCast(arg, flatArgs); - // We can not do clone as the number of result types after conversion might - // be different. + // We can not do clone as the number of result types after conversion + // might be different. ForOp newOp = rewriter.create(op.getLoc(), adaptor.getLowerBound(), adaptor.getUpperBound(), adaptor.getStep(), flatArgs); @@ -89,72 +149,63 @@ class ConvertForOpTypes : public OpConversionPattern { rewriter.inlineRegionBefore(op.getLoopBody(), newOp.getLoopBody(), newOp.getLoopBody().end()); - // Pack the return value. - SmallVector packedRets; - for (unsigned i = 1, e = offsets.size(); i < e; i++) { - unsigned start = offsets[i - 1], end = offsets[i]; - unsigned len = end - start; - ValueRange mappedValue = newOp.getResults().slice(start, len); - if (len != 1) { - // 1 : N type conversion. - Type origType = op.getResultTypes()[i - 1]; - Value mat = typeConverter->materializeSourceConversion( - rewriter, op.getLoc(), origType, mappedValue); - if (!mat) - return rewriter.notifyMatchFailure( - op, "Failed to materialize 1:N type conversion"); - packedRets.push_back(mat); - } else { - // 1 : 1 type conversion. - packedRets.push_back(mappedValue.front()); - } - } - - rewriter.replaceOp(op, packedRets); - return success(); + return newOp; } }; } // namespace namespace { -class ConvertIfOpTypes : public OpConversionPattern { +class ConvertIfOpTypes + : public Structural1ToNConversionPattern { public: - using OpConversionPattern::OpConversionPattern; - LogicalResult - matchAndRewrite(IfOp op, OpAdaptor adaptor, - ConversionPatternRewriter &rewriter) const override { - // TODO: Generalize this to any type conversion, not just 1:1. - // - // We need to implement something more sophisticated here that tracks - // which types convert to which other types and does the appropriate - // materialization logic. - // For example, it's possible that one result type converts to 0 types and - // another to 2 types, so newResultTypes would at least be the right size - // to not crash in the llvm::zip call below, but then we would set the the - // wrong type on the SSA values! These edge cases are also why we cannot - // safely use the TypeConverter::convertTypes helper here. - SmallVector newResultTypes; - for (auto type : op.getResultTypes()) { - Type newType = typeConverter->convertType(type); - if (!newType) - return rewriter.notifyMatchFailure(op, "not a 1:1 type conversion"); - newResultTypes.push_back(newType); - } + using Structural1ToNConversionPattern::Structural1ToNConversionPattern; + + Optional convertSourceOp(IfOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter, + TypeRange dstTypes) const { + + IfOp newOp = rewriter.create(op.getLoc(), dstTypes, + adaptor.getCondition(), true); + newOp->setAttrs(op->getAttrs()); + + // We do not need the empty blocks created by rewriter. + rewriter.eraseBlock(newOp.elseBlock()); + rewriter.eraseBlock(newOp.thenBlock()); - // See comments in the ForOp pattern for why we clone without regions and - // then inline. - IfOp newOp = cast(rewriter.cloneWithoutRegions(*op.getOperation())); + // Inlines block from the original operation. rewriter.inlineRegionBefore(op.getThenRegion(), newOp.getThenRegion(), newOp.getThenRegion().end()); rewriter.inlineRegionBefore(op.getElseRegion(), newOp.getElseRegion(), newOp.getElseRegion().end()); - // Update the operands and types. - newOp->setOperands(adaptor.getOperands()); - for (auto t : llvm::zip(newOp.getResults(), newResultTypes)) - std::get<0>(t).setType(std::get<1>(t)); - rewriter.replaceOp(op, newOp.getResults()); - return success(); + return newOp; + } +}; +} // namespace + +namespace { +class ConvertWhileOpTypes + : public Structural1ToNConversionPattern { +public: + using Structural1ToNConversionPattern::Structural1ToNConversionPattern; + + Optional convertSourceOp(WhileOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter, + TypeRange dstTypes) const { + // Unpacked the iteration arguments. + SmallVector flatArgs; + for (Value arg : adaptor.getOperands()) + unpackUnrealizedConversionCast(arg, flatArgs); + + auto newOp = rewriter.create(op.getLoc(), dstTypes, flatArgs); + + for (auto i : {0u, 1u}) { + if (failed(rewriter.convertRegionTypes(&op.getRegion(i), *typeConverter))) + return llvm::None; + auto &dstRegion = newOp.getRegion(i); + rewriter.inlineRegionBefore(op.getRegion(i), dstRegion, dstRegion.end()); + } + return newOp; } }; } // namespace @@ -179,34 +230,6 @@ class ConvertYieldOpTypes : public OpConversionPattern { }; } // namespace -namespace { -class ConvertWhileOpTypes : public OpConversionPattern { -public: - using OpConversionPattern::OpConversionPattern; - - LogicalResult - matchAndRewrite(WhileOp op, OpAdaptor adaptor, - ConversionPatternRewriter &rewriter) const override { - auto *converter = getTypeConverter(); - assert(converter); - SmallVector newResultTypes; - if (failed(converter->convertTypes(op.getResultTypes(), newResultTypes))) - return failure(); - - auto newOp = rewriter.create(op.getLoc(), newResultTypes, - adaptor.getOperands()); - for (auto i : {0u, 1u}) { - auto &dstRegion = newOp.getRegion(i); - rewriter.inlineRegionBefore(op.getRegion(i), dstRegion, dstRegion.end()); - if (failed(rewriter.convertRegionTypes(&dstRegion, *converter))) - return rewriter.notifyMatchFailure(op, "could not convert body types"); - } - rewriter.replaceOp(op, newOp.getResults()); - return success(); - } -}; -} // namespace - namespace { class ConvertConditionOpTypes : public OpConversionPattern { public: @@ -214,8 +237,11 @@ class ConvertConditionOpTypes : public OpConversionPattern { LogicalResult matchAndRewrite(ConditionOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { - rewriter.updateRootInPlace( - op, [&]() { op->setOperands(adaptor.getOperands()); }); + SmallVector unpackedYield; + for (Value operand : adaptor.getOperands()) + unpackUnrealizedConversionCast(operand, unpackedYield); + + rewriter.updateRootInPlace(op, [&]() { op->setOperands(unpackedYield); }); return success(); } }; diff --git a/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp b/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp index 2d6edb7332ac8..6e59bdb09b12d 100644 --- a/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp @@ -360,11 +360,7 @@ mlir::scf::tileUsingSCFForOp(RewriterBase &rewriter, TilingInterface op, tilingResult.loops.back().getBody()->getTerminator()); SmallVector tiledImplementation = op.getTiledImplementation(rewriter, offsets, sizes); - if (tiledImplementation.size() != 1) { - return rewriter.notifyMatchFailure( - op, "expected tiled implementation to return a single op"); - } - tilingResult.tiledOp = tiledImplementation[0]; + tilingResult.tiledOps.append(tiledImplementation); if (op->getNumResults() == 0) { // nothing more to do. return tilingResult; @@ -396,13 +392,13 @@ mlir::scf::tileUsingSCFForOp(RewriterBase &rewriter, TilingInterface op, } FailureOr> replacementOr = yieldTiledValues( - rewriter, destinationTensors, tilingResult.tiledOp->getResults(), + rewriter, destinationTensors, tilingResult.tiledOps.back()->getResults(), resultOffsetsList, resultSizesList, tilingResult.loops); if (failed(replacementOr)) return rewriter.notifyMatchFailure(op, "failed to yield replacement"); if (auto dstOp = - dyn_cast(tilingResult.tiledOp)) { + dyn_cast(tilingResult.tiledOps.back())) { auto innerMostLoop = tilingResult.loops.back(); SmallVector destinationTensors = dstOp.getDpsInitOperands(); assert(destinationTensors.size() == @@ -424,6 +420,90 @@ mlir::scf::tileUsingSCFForOp(RewriterBase &rewriter, TilingInterface op, return tilingResult; } +FailureOr +mlir::scf::tileReductionUsingScf(PatternRewriter &b, + PartialReductionOpInterface op, + ArrayRef tileSize) { + Location loc = op.getLoc(); + // Ops implementing PartialReductionOpInterface are expected to implement + // TilingInterface. + auto tilingInterfaceOp = cast(op.getOperation()); + SmallVector iterationDomain = tilingInterfaceOp.getIterationDomain(b); + SmallVector tileSizeVector = + getValueOrCreateConstantIndexOp(b, loc, tileSize); + if (tileSizeVector.size() < iterationDomain.size()) { + auto zero = b.create(loc, 0); + tileSizeVector.append(iterationDomain.size() - tileSizeVector.size(), zero); + } + if (op->getNumResults() != 1) + return b.notifyMatchFailure( + op, "don't support ops with multiple results for now"); + SmallVector iterators = + tilingInterfaceOp.getLoopIteratorTypes(); + int64_t numReductionDims = llvm::count( + tilingInterfaceOp.getLoopIteratorTypes(), utils::IteratorType::reduction); + if (numReductionDims != 1) + return b.notifyMatchFailure( + op, "only support ops with one reduction dimension."); + int reductionDim; + for (auto &[idx, iteratorType] : + llvm::enumerate(tilingInterfaceOp.getLoopIteratorTypes())) { + if (iteratorType == utils::IteratorType::reduction) { + reductionDim = idx; + break; + } + } + // 1. create the inital tensor value. + FailureOr identityTensor = + op.generateInitialTensorForPartialReduction(b, loc, tileSize, + reductionDim); + if (failed(identityTensor)) + return b.notifyMatchFailure(op, + "cannot create a tensor of identity value."); + // 2. Create the nested loops. + SmallVector offsets, sizes; + SmallVector loops = generateTileLoopNest( + b, loc, iterationDomain, tileSizeVector, offsets, sizes); + + // 3. Generate the tiled implementation within the inner most loop. + b.setInsertionPoint(loops.back().getBody()->getTerminator()); + Operation *parallelOp = + op.tileToPartialReduction(b, loc, identityTensor.value()->getResults(), + offsets, sizes, reductionDim); + + SmallVector resultSizesList; + for (size_t i = 0; i < offsets.size(); i++) + resultSizesList.push_back( + b.createOrFold(loc, parallelOp->getResult(0), i)); + SmallVector outOffsets(offsets.size(), b.getIndexAttr(0)); + FailureOr> replacementOr = yieldTiledValues( + b, identityTensor.value()->getResults(), parallelOp->getResults(), + outOffsets, resultSizesList, loops); + if (failed(replacementOr)) + return b.notifyMatchFailure(op, "failed to yield replacement"); + + auto dstOp = cast(parallelOp); + auto innerMostLoop = loops.back(); + SmallVector destinationTensors = dstOp.getDpsInitOperands(); + assert(destinationTensors.size() == + innerMostLoop.getRegionIterArgs().size() && + "unexpected number of outputs"); + updateDestinationOperandsForTiledOp(b, destinationTensors, + innerMostLoop.getRegionIterArgs()); + + // 4. Apply the merge reduction to combine all the partial values. + b.setInsertionPointAfter(*loops.begin()); + Operation *mergeOp = + op.mergeReductions(b, loc, replacementOr.value(), reductionDim); + b.replaceOp(op, mergeOp->getResults()); + + SCFReductionTilingResult results; + results.initialOp = identityTensor.value(); + results.loops = std::move(loops); + results.parallelTiledOp = parallelOp; + results.mergeOp = mergeOp; + return results; +} //===----------------------------------------------------------------------===// // tileConsumerAndFuseProducerGreedilyUsingSCFForOp implementation. //===----------------------------------------------------------------------===// @@ -470,13 +550,14 @@ mlir::scf::tileConsumerAndFuseProducerGreedilyUsingSCFForOp( tileUsingSCFForOp(rewriter, consumer, options.tilingOptions); if (failed(tilingResult)) return rewriter.notifyMatchFailure(consumer, "failed to tile consumer"); - tileAndFuseResult.tiledAndFusedOps.insert(tilingResult->tiledOp); + for (auto tiledOp : tilingResult->tiledOps) + tileAndFuseResult.tiledAndFusedOps.insert(tiledOp); tileAndFuseResult.loops = std::move(tilingResult->loops); for (const auto &result : llvm::enumerate( llvm::zip(consumer->getResults(), tilingResult->replacements))) { tileAndFuseResult.replacements[std::get<0>(result.value())] = std::get<1>(result.value()); - yieldedValueToResultNumber[tilingResult->tiledOp->getResult( + yieldedValueToResultNumber[tilingResult->tiledOps.back()->getResult( result.index())] = result.index(); } } diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVCanonicalization.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVCanonicalization.cpp index b3444d8b210a6..b068d23f0e9f0 100644 --- a/mlir/lib/Dialect/SPIRV/IR/SPIRVCanonicalization.cpp +++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVCanonicalization.cpp @@ -116,9 +116,23 @@ void spirv::AccessChainOp::getCanonicalizationPatterns( // spirv.BitcastOp //===----------------------------------------------------------------------===// -void spirv::BitcastOp::getCanonicalizationPatterns(RewritePatternSet &results, - MLIRContext *context) { - results.add(context); +OpFoldResult spirv::BitcastOp::fold(ArrayRef /*operands*/) { + Value curInput = getOperand(); + if (getType() == curInput.getType()) + return curInput; + + // Look through nested bitcasts. + if (auto prevCast = curInput.getDefiningOp()) { + Value prevInput = prevCast.getOperand(); + if (prevInput.getType() == getType()) + return prevInput; + + getOperandMutable().assign(prevInput); + return getResult(); + } + + // TODO(kuhar): Consider constant-folding the operand attribute. + return {}; } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVCanonicalization.td b/mlir/lib/Dialect/SPIRV/IR/SPIRVCanonicalization.td index 12c41fcaf0f00..e8d2274d29aa0 100644 --- a/mlir/lib/Dialect/SPIRV/IR/SPIRVCanonicalization.td +++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVCanonicalization.td @@ -13,13 +13,6 @@ include "mlir/IR/PatternBase.td" include "mlir/Dialect/SPIRV/IR/SPIRVOps.td" -//===----------------------------------------------------------------------===// -// spirv.Bitcast -//===----------------------------------------------------------------------===// - -def ConvertChainedBitcast : Pat<(SPIRV_BitcastOp (SPIRV_BitcastOp $operand)), - (SPIRV_BitcastOp $operand)>; - //===----------------------------------------------------------------------===// // spirv.LogicalNot //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp b/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp index 2514cfe0301a1..286ff0b7eff2d 100644 --- a/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp +++ b/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp @@ -220,9 +220,16 @@ static Type convertScalarType(const spirv::TargetEnv &targetEnv, // Otherwise we need to adjust the type, which really means adjusting the // bitwidth given this is a scalar type. + if (!options.emulateLT32BitScalarTypes) + return nullptr; - if (!options.emulateNon32BitScalarTypes) + // We only emulate narrower scalar types here and do not truncate results. + if (type.getIntOrFloatBitWidth() > 32) { + LLVM_DEBUG(llvm::dbgs() + << type + << " not converted to 32-bit for SPIR-V to avoid truncation\n"); return nullptr; + } if (auto floatType = type.dyn_cast()) { LLVM_DEBUG(llvm::dbgs() << type << " converted to 32-bit for SPIR-V\n"); diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp index 133879b12b197..6a4177737df9f 100644 --- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp +++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp @@ -581,11 +581,20 @@ LogicalResult CompressOp::verify() { void ForeachOp::build( OpBuilder &builder, OperationState &result, Value tensor, - function_ref bodyBuilder) { - build(builder, result, tensor); + function_ref + bodyBuilder) { + build(builder, result, tensor, llvm::None, bodyBuilder); +} + +void ForeachOp::build( + OpBuilder &builder, OperationState &result, Value tensor, + ValueRange initArgs, + function_ref + bodyBuilder) { + build(builder, result, initArgs.getTypes(), tensor, initArgs); + // Builds foreach body. if (!bodyBuilder) return; - auto rtp = tensor.getType().cast(); int64_t rank = rtp.getRank(); @@ -594,31 +603,49 @@ void ForeachOp::build( std::fill_n(std::back_inserter(blockArgTypes), rank, builder.getIndexType()); // Followed by one value. blockArgTypes.push_back(rtp.getElementType()); + // Followed by reduction variable. + blockArgTypes.append(initArgs.getTypes().begin(), initArgs.getTypes().end()); SmallVector blockArgLocs; - std::fill_n(std::back_inserter(blockArgLocs), rank + 1, tensor.getLoc()); + std::fill_n(std::back_inserter(blockArgLocs), blockArgTypes.size(), + tensor.getLoc()); OpBuilder::InsertionGuard guard(builder); auto ®ion = *result.regions.front(); Block *bodyBlock = builder.createBlock(®ion, region.end(), blockArgTypes, blockArgLocs); - bodyBuilder(builder, result.location, bodyBlock->getArguments()); + bodyBuilder(builder, result.location, + bodyBlock->getArguments().slice(0, rank), + bodyBlock->getArguments()[rank], + bodyBlock->getArguments().drop_front(rank + 1)); } LogicalResult ForeachOp::verify() { auto t = getTensor().getType().cast(); auto args = getBody()->getArguments(); - if (static_cast(t.getRank()) + 1 != args.size()) + if (static_cast(t.getRank()) + 1 + getInitArgs().size() != + args.size()) return emitError("Unmatched number of arguments in the block"); + if (getNumResults() != getInitArgs().size()) + return emitError("Mismatch in number of init arguments and results"); + + if (getResultTypes() != getInitArgs().getTypes()) + return emitError("Mismatch in types of init arguments and results"); + + auto yield = cast(getBody()->getTerminator()); + if (yield.getNumOperands() != getNumResults() || + yield.getOperands().getTypes() != getResultTypes()) + return emitError("Mismatch in types of yield values and results"); + for (int64_t i = 0, e = t.getRank(); i < e; i++) if (args[i].getType() != IndexType::get(getContext())) emitError( llvm::formatv("Expecting Index type for argument at index {0}", i)); auto elemTp = t.getElementType(); - auto valueTp = args.back().getType(); + auto valueTp = args[t.getRank()].getType(); if (elemTp != valueTp) emitError(llvm::formatv("Unmatched element type between input tensor and " "block argument, expected:{0}, got: {1}", @@ -692,6 +719,42 @@ LogicalResult SortOp::verify() { return success(); } +LogicalResult SortCooOp::verify() { + auto cn = getN().getDefiningOp(); + // We can't check the size of the buffers when n or buffer dimensions aren't + // compile-time constants. + if (!cn) + return success(); + + uint64_t n = cn.value(); + uint64_t nx = 1; + if (auto nxAttr = getNxAttr()) { + nx = nxAttr.getInt(); + if (nx < 1) + emitError(llvm::formatv("Expected nx > 1, got {0}", nx)); + } + uint64_t ny = 0; + if (auto nyAttr = getNyAttr()) { + ny = nyAttr.getInt(); + } + + auto checkDim = [&](Value v, uint64_t min, const char *message) { + MemRefType tp = v.getType().cast(); + int64_t dim = tp.getShape()[0]; + if (dim != ShapedType::kDynamicSize && dim < (int64_t)min) { + emitError(llvm::formatv("{0} got {1} < {2}", message, dim, min)); + } + }; + + checkDim(getXy(), n * (nx + ny), "Expected dimension(xy) >= n * (nx + ny)"); + + for (Value opnd : getYs()) { + checkDim(opnd, n, "Expected dimension(y) >= n"); + } + + return success(); +} + LogicalResult YieldOp::verify() { // Check for compatible parent. auto *parentOp = (*this)->getParentOp(); diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp index 1e9cadd13e156..fcddcd27ed40b 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp @@ -219,9 +219,12 @@ Operation *SparseTensorLoopEmitter::enterLoopOverTensorAtDim( OpBuilder &builder, Location loc, size_t tid, size_t dim, MutableArrayRef reduc, bool isParallel, ArrayRef extraTids, ArrayRef extraDims) { + assert(dimTypes[tid].size() > dim); // We can not re-enter the same level. assert(!coord[tid][dim]); + // TODO: support multiple return on parallel for? + assert(!isParallel || reduc.size() <= 1); Value step = constantIndex(builder, loc, 1); auto dimType = dimTypes[tid][dim]; @@ -232,11 +235,38 @@ Operation *SparseTensorLoopEmitter::enterLoopOverTensorAtDim( Value lo = isSparseInput ? pidxs[tid][dim] // current offset : loopSeqStack.back(); // univeral tid Value hi = highs[tid][dim]; + Operation *loop = nullptr; + Value iv; + if (isParallel) { + scf::ParallelOp parOp = + builder.create(loc, lo, hi, step, reduc); + builder.setInsertionPointToStart(parOp.getBody()); + assert(parOp.getNumReductions() == reduc.size()); + iv = parOp.getInductionVars()[0]; + + // In-place update on the reduction variable vector. + // Note that the init vals is not the actual reduction variables but instead + // used as a `special handle` to (temporarily) represent them. The + // expression on init vals will be moved into scf.reduce and replaced with + // the block arguments when exiting the loop (see exitForLoop). This is + // needed as we can not build the actual reduction block and get the actual + // reduction varaible before users fill parallel loop body. + for (int i = 0, e = reduc.size(); i < e; i++) + reduc[i] = parOp.getInitVals()[i]; + loop = parOp; + } else { + scf::ForOp forOp = builder.create(loc, lo, hi, step, reduc); + builder.setInsertionPointToStart(forOp.getBody()); + iv = forOp.getInductionVar(); + + // In-place update on the reduction variable vector. + assert(forOp.getNumRegionIterArgs() == reduc.size()); + for (int i = 0, e = reduc.size(); i < e; i++) + reduc[i] = forOp.getRegionIterArg(i); + loop = forOp; + } + assert(loop && iv); - scf::ForOp forOp = builder.create(loc, lo, hi, step, reduc); - builder.setInsertionPointToStart(forOp.getBody()); - Value iv = forOp.getInductionVar(); - assert(iv); if (isSparseInput) { pidxs[tid][dim] = iv; // Generating a load on the indices array yields the coordinate. @@ -253,16 +283,12 @@ Operation *SparseTensorLoopEmitter::enterLoopOverTensorAtDim( // NOTE: we can also prepares for next dim here in advance // Push the loop into stack - loopStack.emplace_back(ArrayRef(tid), ArrayRef(dim), forOp, + loopStack.emplace_back(ArrayRef(tid), ArrayRef(dim), loop, coord[tid][dim]); // Emit extra locals. emitExtraLocalsForTensorsAtDenseDims(builder, loc, extraTids, extraDims); - // In-place update on the reduction variable vector. - assert(forOp.getNumRegionIterArgs() == reduc.size()); - for (int i = 0, e = reduc.size(); i < e; i++) - reduc[i] = forOp.getRegionIterArg(i); - return forOp; + return loop; } Operation *SparseTensorLoopEmitter::enterCoIterationOverTensorsAtDims( @@ -434,17 +460,73 @@ void SparseTensorLoopEmitter::emitExtraLocalsForTensorsAtDenseDims( } } -SmallVector -SparseTensorLoopEmitter::exitForLoop(OpBuilder &builder, Location loc, - ArrayRef reduc) { +void SparseTensorLoopEmitter::exitForLoop(RewriterBase &rewriter, Location loc, + MutableArrayRef reduc) { LoopLevelInfo &loopInfo = loopStack.back(); auto &dims = loopStack.back().dims; auto &tids = loopStack.back().tids; - auto forOp = llvm::cast(loopInfo.loop); - if (!reduc.empty()) { - assert(reduc.size() == forOp.getNumResults()); - builder.setInsertionPointToEnd(forOp.getBody()); - builder.create(loc, reduc); + auto forOp = llvm::dyn_cast(loopInfo.loop); + if (forOp) { + if (!reduc.empty()) { + assert(reduc.size() == forOp.getNumResults()); + rewriter.setInsertionPointToEnd(forOp.getBody()); + rewriter.create(loc, reduc); + } + // Exit the loop. + rewriter.setInsertionPointAfter(forOp); + // In-place update reduction variables. + for (unsigned i = 0, e = forOp.getResults().size(); i < e; i++) + reduc[i] = forOp.getResult(i); + } else { + auto parOp = llvm::cast(loopInfo.loop); + if (!reduc.empty()) { + assert(reduc.size() == parOp.getInitVals().size() && reduc.size() == 1); + Operation *redExp = reduc.front().getDefiningOp(); + // Reduction expression should have no use. + assert(redExp->getUses().empty()); + // This must be a binary operation. + // NOTE: This is users' responsibilty to ensure the operation are + // commutative. + assert(redExp->getNumOperands() == 2 && redExp->getNumResults() == 1); + + Value redVal = parOp.getInitVals().front(); + Value curVal; + if (redExp->getOperand(0) == redVal) + curVal = redExp->getOperand(1); + else if (redExp->getOperand(1) == redVal) + curVal = redExp->getOperand(0); + // One of the operands must be the init value (which is also the + // previous reduction value). + assert(curVal); + // The reduction expression should be the only user of the reduction val + // inside the parallel for. + unsigned numUsers = 0; + for (Operation *op : redVal.getUsers()) { + if (op->getParentOp() == parOp) + numUsers++; + } + assert(numUsers == 1); + (void)numUsers; // to silence unused variable warning in release build + + rewriter.setInsertionPointAfter(redExp); + auto redOp = rewriter.create(loc, curVal); + // Attach to the reduction op. + Block *redBlock = &redOp.getRegion().getBlocks().front(); + rewriter.setInsertionPointToEnd(redBlock); + Operation *newRed = rewriter.clone(*redExp); + // Replaces arguments of the reduction expression by using the block + // arguments from scf.reduce. + rewriter.updateRootInPlace( + newRed, [&]() { newRed->setOperands(redBlock->getArguments()); }); + // Erases the out-dated reduction expression. + rewriter.eraseOp(redExp); + rewriter.setInsertionPointToEnd(redBlock); + rewriter.create(loc, newRed->getResult(0)); + } + rewriter.setInsertionPointAfter(parOp); + // In-place update reduction variables. + for (unsigned i = 0, e = parOp.getResults().size(); i < e; i++) + reduc[i] = parOp.getResult(i); } // Finished iterating a tensor, clean up @@ -458,14 +540,10 @@ SparseTensorLoopEmitter::exitForLoop(OpBuilder &builder, Location loc, if (!isDenseDLT(dimTypes[tid][dim])) highs[tid][dim] = Value(); } - // exit the loop - builder.setInsertionPointAfter(forOp); - return forOp.getResults(); } -SmallVector -SparseTensorLoopEmitter::exitCoiterationLoop(OpBuilder &builder, Location loc, - ArrayRef reduc) { +void SparseTensorLoopEmitter::exitCoIterationLoop( + OpBuilder &builder, Location loc, MutableArrayRef reduc) { auto whileOp = llvm::cast(loopStack.back().loop); auto &dims = loopStack.back().dims; auto &tids = loopStack.back().tids; @@ -499,10 +577,10 @@ SparseTensorLoopEmitter::exitCoiterationLoop(OpBuilder &builder, Location loc, } // Reduction value from users. - SmallVector ret; - for (auto red : reduc) { - operands.push_back(red); - ret.push_back(whileOp->getResult(o++)); + for (unsigned i = 0, e = reduc.size(); i < e; i++) { + operands.push_back(reduc[i]); + // In place update reduction variable. + reduc[i] = whileOp->getResult(o++); } // An (optional) universal index. @@ -517,26 +595,24 @@ SparseTensorLoopEmitter::exitCoiterationLoop(OpBuilder &builder, Location loc, assert(o == operands.size()); builder.create(loc, operands); builder.setInsertionPointAfter(whileOp); - return ret; } -SmallVector -SparseTensorLoopEmitter::exitCurrentLoop(OpBuilder &builder, Location loc, - ArrayRef reduc) { +void SparseTensorLoopEmitter::exitCurrentLoop(RewriterBase &rewriter, + Location loc, + MutableArrayRef reduc) { // Clean up the values, it would help use to discover potential bug at a // earlier stage (instead of silently using a wrong value). LoopLevelInfo &loopInfo = loopStack.back(); assert(loopInfo.tids.size() == loopInfo.dims.size()); SmallVector red; if (llvm::isa(loopInfo.loop)) { - red = exitCoiterationLoop(builder, loc, reduc); + exitCoIterationLoop(rewriter, loc, reduc); } else { - red = exitForLoop(builder, loc, reduc); + exitForLoop(rewriter, loc, reduc); } assert(loopStack.size() == loopSeqStack.size()); loopStack.pop_back(); - return red; } //===----------------------------------------------------------------------===// @@ -880,6 +956,9 @@ Value mlir::sparse_tensor::genValueForDense(OpBuilder &builder, Location loc, return val; } +// FIXME: +// 1. Dense tensors loop should be generated by loop emitter. +// 2. Support reduction variables to propagate SSA chains properly. void mlir::sparse_tensor::genDenseTensorOrSparseConstantIterLoop( OpBuilder &builder, Location loc, Value src, unsigned rank, function_ref bodyBuilder) { diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h index 3228eb4c79cb2..a75d3920a4d55 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h @@ -380,8 +380,8 @@ class SparseTensorLoopEmitter { ArrayRef dims, bool needsUniv, MutableArrayRef reduc = {}, ArrayRef extraTids = {}, ArrayRef extraDims = {}); - SmallVector exitCurrentLoop(OpBuilder &builder, Location loc, - ArrayRef reduc = {}); + void exitCurrentLoop(RewriterBase &rewriter, Location loc, + MutableArrayRef reduc = {}); /// Returns the array of coordinate for all the loop generated till now. void getCoordinateArray(SmallVectorImpl &coords) const { @@ -452,17 +452,35 @@ class SparseTensorLoopEmitter { ArrayRef dims); /// Exits a for loop, returns the reduction results, e.g., + /// For sequential for loops: /// %ret = for () { /// ... + /// %val = addi %args, %c /// yield %val /// } - /// Return %ret to user, while %val is provided by users (`reduc`) - SmallVector exitForLoop(OpBuilder &builder, Location loc, - ArrayRef reduc); + /// For parallel loops, the following generated code by users: + /// %ret = parallel () init(%args) { + /// ... + /// %val = op %args, %c + /// } + /// will be transformed into + /// %ret = parallel () init(%args) { + /// ... + /// scf.reduce(%c) bb0(%0, %1){ + /// %val = op %0, %1 + /// scf.reduce.return %val + /// } + /// } + /// NOTE: only one instruction will be moved into reduce block, transformation + /// will fail if multiple instructions are used to compute the reduction + /// value. + /// Return %ret to user, while %val is provided by users (`reduc`). + void exitForLoop(RewriterBase &rewriter, Location loc, + MutableArrayRef reduc); /// Exits a while loop, returns the reduction results. - SmallVector exitCoiterationLoop(OpBuilder &builder, Location loc, - ArrayRef reduc); + void exitCoIterationLoop(OpBuilder &builder, Location loc, + MutableArrayRef reduc); // Whether the loop emitter needs to treat the last tensor as the output // tensor. diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseBufferRewriting.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseBufferRewriting.cpp index 929d4a4ddf1f3..0af92a656d848 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseBufferRewriting.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseBufferRewriting.cpp @@ -33,8 +33,8 @@ static constexpr uint64_t loIdx = 0; static constexpr uint64_t hiIdx = 1; static constexpr uint64_t xStartIdx = 2; -static constexpr const char kMaySwapFuncNamePrefix[] = "_sparse_may_swap_"; static constexpr const char kLessThanFuncNamePrefix[] = "_sparse_less_than_"; +static constexpr const char kCompareEqFuncNamePrefix[] = "_sparse_compare_eq_"; static constexpr const char kPartitionFuncNamePrefix[] = "_sparse_partition_"; static constexpr const char kBinarySearchFuncNamePrefix[] = "_sparse_binary_search_"; @@ -90,11 +90,10 @@ getMangledSortHelperFunc(OpBuilder &builder, func::FuncOp insertPoint, return result; } -/// Creates a function for swapping the values in index i and j for all the +/// Creates a code block for swapping the values in index i and j for all the /// buffers. // -// The generate IR corresponds to this C like algorithm: -// if (i != j) { +// The generated IR corresponds to this C like algorithm: // swap(x0[i], x0[j]); // swap(x1[i], x1[j]); // ... @@ -102,36 +101,90 @@ getMangledSortHelperFunc(OpBuilder &builder, func::FuncOp insertPoint, // swap(y0[i], y0[j]); // ... // swap(yn[i], yn[j]); -// } -static void createMaySwapFunc(OpBuilder &builder, ModuleOp unused, - func::FuncOp func, size_t dim) { +static void createSwap(OpBuilder &builder, Location loc, ValueRange args) { + Value i = args[0]; + Value j = args[1]; + for (auto arg : args.drop_front(xStartIdx)) { + Value vi = builder.create(loc, arg, i); + Value vj = builder.create(loc, arg, j); + builder.create(loc, vj, arg, i); + builder.create(loc, vi, arg, j); + } +} + +/// Creates a function to compare all the (xs[i], xs[j]) pairs. The method to +/// compare each pair is create via `compareBuilder`. +static void createCompareFuncImplementation( + OpBuilder &builder, ModuleOp unused, func::FuncOp func, size_t dim, + function_ref + compareBuilder) { OpBuilder::InsertionGuard insertionGuard(builder); Block *entryBlock = func.addEntryBlock(); builder.setInsertionPointToStart(entryBlock); - Location loc = func.getLoc(); ValueRange args = entryBlock->getArguments(); - Value i = args[0]; - Value j = args[1]; + + scf::IfOp topIfOp; + for (const auto &item : llvm::enumerate(args.slice(xStartIdx, dim))) { + scf::IfOp ifOp = compareBuilder(builder, loc, args[0], args[1], + item.value(), (item.index() == dim - 1)); + if (item.index() == 0) { + topIfOp = ifOp; + } else { + OpBuilder::InsertionGuard insertionGuard(builder); + builder.setInsertionPointAfter(ifOp); + builder.create(loc, ifOp.getResult(0)); + } + } + + builder.setInsertionPointAfter(topIfOp); + builder.create(loc, topIfOp.getResult(0)); +} + +/// Generates an if-statement to compare whether x[i] is equal to x[j]. +static scf::IfOp createEqCompare(OpBuilder &builder, Location loc, Value i, + Value j, Value x, bool isLastDim) { + Value f = constantI1(builder, loc, false); + Value t = constantI1(builder, loc, true); + Value vi = builder.create(loc, x, i); + Value vj = builder.create(loc, x, j); + Value cond = - builder.create(loc, arith::CmpIPredicate::ne, i, j); - scf::IfOp ifOp = builder.create(loc, cond, /*else=*/false); + builder.create(loc, arith::CmpIPredicate::eq, vi, vj); + scf::IfOp ifOp = + builder.create(loc, f.getType(), cond, /*else=*/true); - // If i!=j swap values in the buffers. + // x[1] != x[j]: + builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); + builder.create(loc, f); + + // x[i] == x[j]: builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); - for (auto arg : args.drop_front(xStartIdx)) { - Value vi = builder.create(loc, arg, i); - Value vj = builder.create(loc, arg, j); - builder.create(loc, vj, arg, i); - builder.create(loc, vi, arg, j); + if (isLastDim == 1) { + // Finish checking all dimensions. + builder.create(loc, t); } - builder.setInsertionPointAfter(ifOp); - builder.create(loc); + return ifOp; +} + +/// Creates a function to compare whether xs[i] is equal to xs[j]. +// +// The generate IR corresponds to this C like algorithm: +// if (x0[i] != x0[j]) +// return false; +// else +// if (x1[i] != x1[j]) +// return false; +// else if (x2[2] != x2[j])) +// and so on ... +static void createEqCompareFunc(OpBuilder &builder, ModuleOp unused, + func::FuncOp func, size_t dim) { + createCompareFuncImplementation(builder, unused, func, dim, createEqCompare); } -/// Generates an if-statement to compare x[i] and x[j]. +/// Generates an if-statement to compare whether x[i] is less than x[j]. static scf::IfOp createLessThanCompare(OpBuilder &builder, Location loc, Value i, Value j, Value x, bool isLastDim) { @@ -172,8 +225,7 @@ static scf::IfOp createLessThanCompare(OpBuilder &builder, Location loc, return ifOp; } -/// Creates a function to compare the xs values in index i and j for all the -/// dimensions. The function returns true iff xs[i] < xs[j]. +/// Creates a function to compare whether xs[i] is less than xs[j]. // // The generate IR corresponds to this C like algorithm: // if (x0[i] < x0[j]) @@ -187,29 +239,8 @@ static scf::IfOp createLessThanCompare(OpBuilder &builder, Location loc, // and so on ... static void createLessThanFunc(OpBuilder &builder, ModuleOp unused, func::FuncOp func, size_t dim) { - OpBuilder::InsertionGuard insertionGuard(builder); - - Block *entryBlock = func.addEntryBlock(); - builder.setInsertionPointToStart(entryBlock); - Location loc = func.getLoc(); - ValueRange args = entryBlock->getArguments(); - - scf::IfOp topIfOp; - for (const auto &item : llvm::enumerate(args.slice(xStartIdx, dim))) { - scf::IfOp ifOp = - createLessThanCompare(builder, loc, args[0], args[1], item.value(), - (item.index() == dim - 1)); - if (item.index() == 0) { - topIfOp = ifOp; - } else { - OpBuilder::InsertionGuard insertionGuard(builder); - builder.setInsertionPointAfter(ifOp); - builder.create(loc, ifOp.getResult(0)); - } - } - - builder.setInsertionPointAfter(topIfOp); - builder.create(loc, topIfOp.getResult(0)); + createCompareFuncImplementation(builder, unused, func, dim, + createLessThanCompare); } /// Creates a function to use a binary search to find the insertion point for @@ -285,23 +316,94 @@ static void createBinarySearchFunc(OpBuilder &builder, ModuleOp module, builder.create(loc, whileOp.getResult(0)); } +/// Creates code to advance i in a loop based on xs[p] as follows: +/// while (xs[i] < xs[p]) i += step (step > 0) +/// or +/// while (xs[i] > xs[p]) i += step (step < 0) +/// The routine returns i as well as a boolean value to indicate whether +/// xs[i] == xs[p]. +static std::pair +createScanLoop(OpBuilder &builder, ModuleOp module, func::FuncOp func, + ValueRange xs, Value i, Value p, size_t dim, int step) { + Location loc = func.getLoc(); + scf::WhileOp whileOp = + builder.create(loc, TypeRange{i.getType()}, ValueRange{i}); + + Block *before = + builder.createBlock(&whileOp.getBefore(), {}, {i.getType()}, {loc}); + builder.setInsertionPointToEnd(before); + SmallVector compareOperands; + if (step > 0) { + compareOperands.push_back(before->getArgument(0)); + compareOperands.push_back(p); + } else { + assert(step < 0); + compareOperands.push_back(p); + compareOperands.push_back(before->getArgument(0)); + } + compareOperands.append(xs.begin(), xs.end()); + MLIRContext *context = module.getContext(); + Type i1Type = IntegerType::get(context, 1, IntegerType::Signless); + FlatSymbolRefAttr lessThanFunc = + getMangledSortHelperFunc(builder, func, {i1Type}, kLessThanFuncNamePrefix, + dim, compareOperands, createLessThanFunc); + Value cond = builder + .create(loc, lessThanFunc, TypeRange{i1Type}, + compareOperands) + .getResult(0); + builder.create(loc, cond, before->getArguments()); + + Block *after = + builder.createBlock(&whileOp.getAfter(), {}, {i.getType()}, {loc}); + builder.setInsertionPointToEnd(after); + Value cs = constantIndex(builder, loc, step); + i = builder.create(loc, after->getArgument(0), cs); + builder.create(loc, ValueRange{i}); + i = whileOp.getResult(0); + + builder.setInsertionPointAfter(whileOp); + compareOperands[0] = i; + compareOperands[1] = p; + FlatSymbolRefAttr compareEqFunc = getMangledSortHelperFunc( + builder, func, {i1Type}, kCompareEqFuncNamePrefix, dim, compareOperands, + createEqCompareFunc); + Value compareEq = + builder + .create(loc, compareEqFunc, TypeRange{i1Type}, + compareOperands) + .getResult(0); + + return std::make_pair(whileOp.getResult(0), compareEq); +} + /// Creates a function to perform quick sort partition on the values in the /// range of index [lo, hi), assuming lo < hi. // // The generated IR corresponds to this C like algorithm: -// int partition(lo, hi, data) { -// pivot = data[hi - 1]; -// i = (lo – 1) // RHS of the pivot found so far. -// for (j = lo; j < hi - 1; j++){ -// if (data[j] < pivot){ -// i++; -// swap data[i] and data[j] +// int partition(lo, hi, xs) { +// p = (lo+hi)/2 // pivot index +// i = lo +// j = hi-1 +// while (i < j) do { +// while (xs[i] < xs[p]) i ++; +// i_eq = (xs[i] == xs[p]); +// while (xs[j] > xs[p]) j --; +// j_eq = (xs[j] == xs[p]); +// if (i < j) { +// swap(xs[i], xs[j]) +// if (i == p) { +// p = j; +// } else if (j == p) { +// p = i; +// } +// if (i_eq && j_eq) { +// ++i; +// --j; +// } // } // } -// i++ -// swap data[i] and data[hi-1]) -// return i -// } +// return p +// } static void createPartitionFunc(OpBuilder &builder, ModuleOp module, func::FuncOp func, size_t dim) { OpBuilder::InsertionGuard insertionGuard(builder); @@ -309,60 +411,96 @@ static void createPartitionFunc(OpBuilder &builder, ModuleOp module, Block *entryBlock = func.addEntryBlock(); builder.setInsertionPointToStart(entryBlock); - MLIRContext *context = module.getContext(); Location loc = func.getLoc(); ValueRange args = entryBlock->getArguments(); Value lo = args[loIdx]; + Value hi = args[hiIdx]; + Value sum = builder.create(loc, lo, hi); Value c1 = constantIndex(builder, loc, 1); - Value i = builder.create(loc, lo, c1); - Value him1 = builder.create(loc, args[hiIdx], c1); - scf::ForOp forOp = - builder.create(loc, lo, him1, c1, ValueRange{i}); - - // Start the for-stmt body. - builder.setInsertionPointToStart(forOp.getBody()); - Value j = forOp.getInductionVar(); - SmallVector compareOperands{j, him1}; - ValueRange xs = args.slice(xStartIdx, dim); - compareOperands.append(xs.begin(), xs.end()); - Type i1Type = IntegerType::get(context, 1, IntegerType::Signless); - FlatSymbolRefAttr lessThanFunc = - getMangledSortHelperFunc(builder, func, {i1Type}, kLessThanFuncNamePrefix, - dim, compareOperands, createLessThanFunc); - Value cond = builder - .create(loc, lessThanFunc, TypeRange{i1Type}, - compareOperands) - .getResult(0); - scf::IfOp ifOp = - builder.create(loc, i.getType(), cond, /*else=*/true); + Value p = builder.create(loc, sum, c1); + + Value i = lo; + Value j = builder.create(loc, hi, c1); + SmallVector operands{i, j, p}; + SmallVector types{i.getType(), j.getType(), p.getType()}; + scf::WhileOp whileOp = builder.create(loc, types, operands); + + // The before-region of the WhileOp. + Block *before = + builder.createBlock(&whileOp.getBefore(), {}, types, {loc, loc, loc}); + builder.setInsertionPointToEnd(before); + Value cond = builder.create(loc, arith::CmpIPredicate::ult, + before->getArgument(0), + before->getArgument(1)); + builder.create(loc, cond, before->getArguments()); - // The if-stmt true branch: i++; swap(data[i], data[j]); yield i. + // The after-region of the WhileOp. + Block *after = + builder.createBlock(&whileOp.getAfter(), {}, types, {loc, loc, loc}); + builder.setInsertionPointToEnd(after); + i = after->getArgument(0); + j = after->getArgument(1); + p = after->getArgument(2); + + auto [iresult, iCompareEq] = createScanLoop( + builder, module, func, args.slice(xStartIdx, dim), i, p, dim, 1); + i = iresult; + auto [jresult, jCompareEq] = createScanLoop( + builder, module, func, args.slice(xStartIdx, dim), j, p, dim, -1); + j = jresult; + + // If i < j: + cond = builder.create(loc, arith::CmpIPredicate::ult, i, j); + scf::IfOp ifOp = builder.create(loc, types, cond, /*else=*/true); builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); - Value i1 = - builder.create(loc, forOp.getRegionIterArgs().front(), c1); - SmallVector swapOperands{i1, j}; + SmallVector swapOperands{i, j}; swapOperands.append(args.begin() + xStartIdx, args.end()); - FlatSymbolRefAttr swapFunc = getMangledSortHelperFunc( - builder, func, TypeRange(), kMaySwapFuncNamePrefix, dim, swapOperands, - createMaySwapFunc); - builder.create(loc, swapFunc, TypeRange(), swapOperands); - builder.create(loc, i1); - - // The if-stmt false branch: yield i. + createSwap(builder, loc, swapOperands); + // If the pivot is moved, update p with the new pivot. + Value icond = + builder.create(loc, arith::CmpIPredicate::eq, i, p); + scf::IfOp ifOpI = builder.create(loc, TypeRange{p.getType()}, + icond, /*else=*/true); + builder.setInsertionPointToStart(&ifOpI.getThenRegion().front()); + builder.create(loc, ValueRange{j}); + builder.setInsertionPointToStart(&ifOpI.getElseRegion().front()); + Value jcond = + builder.create(loc, arith::CmpIPredicate::eq, j, p); + scf::IfOp ifOpJ = builder.create(loc, TypeRange{p.getType()}, + jcond, /*else=*/true); + builder.setInsertionPointToStart(&ifOpJ.getThenRegion().front()); + builder.create(loc, ValueRange{i}); + builder.setInsertionPointToStart(&ifOpJ.getElseRegion().front()); + builder.create(loc, ValueRange{p}); + builder.setInsertionPointAfter(ifOpJ); + builder.create(loc, ifOpJ.getResults()); + builder.setInsertionPointAfter(ifOpI); + Value compareEqIJ = + builder.create(loc, iCompareEq, jCompareEq); + scf::IfOp ifOp2 = builder.create( + loc, TypeRange{i.getType(), j.getType()}, compareEqIJ, /*else=*/true); + builder.setInsertionPointToStart(&ifOp2.getThenRegion().front()); + Value i2 = builder.create(loc, i, c1); + Value j2 = builder.create(loc, j, c1); + builder.create(loc, ValueRange{i2, j2}); + builder.setInsertionPointToStart(&ifOp2.getElseRegion().front()); + builder.create(loc, ValueRange{i, j}); + builder.setInsertionPointAfter(ifOp2); + builder.create( + loc, + ValueRange{ifOp2.getResult(0), ifOp2.getResult(1), ifOpI.getResult(0)}); + + // False branch for if i < j: builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); - builder.create(loc, forOp.getRegionIterArgs().front()); + builder.create(loc, ValueRange{i, j, p}); - // After the if-stmt, yield the updated i value to end the for-stmt body. + // Return for the whileOp. builder.setInsertionPointAfter(ifOp); - builder.create(loc, ifOp.getResult(0)); - - // After the for-stmt: i++; swap(data[i], data[him1]); return i. - builder.setInsertionPointAfter(forOp); - i1 = builder.create(loc, forOp.getResult(0), c1); - swapOperands[0] = i1; - swapOperands[1] = him1; - builder.create(loc, swapFunc, TypeRange(), swapOperands); - builder.create(loc, i1); + builder.create(loc, ifOp.getResults()); + + // Return for the function. + builder.setInsertionPointAfter(whileOp); + builder.create(loc, whileOp.getResult(2)); } /// Creates a function to perform quick sort on the value in the range of diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp index 9c002f1ae0ec8..d0613c09503c0 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp @@ -356,8 +356,10 @@ struct Sparse2SparseReshapeRewriter : public OpRewritePattern { RankedTensorType cooTp = getUnorderedCOOFromType(dstTp); auto cooBuffer = rewriter.create(loc, cooTp, dstDynSizes).getResult(); - rewriter.create( - loc, srcTensor, [&](OpBuilder &builder, Location loc, ValueRange args) { + ForeachOp foreachOp = rewriter.create( + loc, srcTensor, cooBuffer, + [&](OpBuilder &builder, Location loc, ValueRange args, Value v, + ValueRange reduc) { SmallVector srcIndices; SmallVector dstIndices; for (int64_t i = 0, e = srcTp.getRank(); i < e; i++) { @@ -366,11 +368,11 @@ struct Sparse2SparseReshapeRewriter : public OpRewritePattern { } translateIndicesArray(builder, loc, op.getReassociationIndices(), srcIndices, srcSizes, dstSizes, dstIndices); - builder.create(loc, args.back(), cooBuffer, dstIndices); - builder.create(loc); + auto t = builder.create(loc, v, reduc.front(), dstIndices); + builder.create(loc, t); }); - - rewriter.replaceOpWithNewOp(op, dstTp, cooBuffer); + auto t = rewriter.create(loc, foreachOp.getResult(0), true); + rewriter.replaceOpWithNewOp(op, dstTp, t); return success(); } }; @@ -440,13 +442,16 @@ struct ConcatenateRewriter : public OpRewritePattern { rewriter.create(loc, cooTp, ValueRange()).getResult(); Value offset = constantIndex(rewriter, loc, 0); + ForeachOp foreachOp; for (Value input : op.getInputs()) { // Builds the indexing map. // Build a for op for each input tensor to append new values into the // output tensor. - rewriter.create( - loc, input, [&](OpBuilder &builder, Location loc, ValueRange args) { + foreachOp = rewriter.create( + loc, input, cooBuffer, + [&](OpBuilder &builder, Location loc, ValueRange args, Value v, + ValueRange reduc) { SmallVector indices; for (int64_t i = 0; i < rank; i++) { uint64_t dim = @@ -457,8 +462,8 @@ struct ConcatenateRewriter : public OpRewritePattern { idx = builder.create(loc, idx, offset); indices.push_back(idx); } - builder.create(loc, args.back(), cooBuffer, indices); - builder.create(loc); + auto t = builder.create(loc, v, reduc.front(), indices); + builder.create(loc, t); }); // Accumulates the offset. Note that only static-shaped inputs are allowed // by concatenate op verifier, which saves us from computing the offset @@ -467,7 +472,10 @@ struct ConcatenateRewriter : public OpRewritePattern { assert(!ShapedType::isDynamic(d)); offset = rewriter.create(loc, offset, constantIndex(rewriter, loc, d)); + cooBuffer = foreachOp.getResult(0); } + + cooBuffer = rewriter.create(loc, cooBuffer, true); rewriter.replaceOpWithNewOp(op, rtp, cooBuffer); return success(); } @@ -558,12 +566,13 @@ struct ConvertRewriter : public OpRewritePattern { sizesForTensor(rewriter, sizes, loc, srcTp, src); Value dst = allocDenseTensor(rewriter, loc, dstTp, sizes); - rewriter.create( - loc, src, [&](OpBuilder &builder, Location loc, ValueRange args) { - builder.create(loc, args.back(), dst, - args.drop_back()); - builder.create(loc); - }); + rewriter.create(loc, src, llvm::None, + [&](OpBuilder &builder, Location loc, + ValueRange args, Value v, ValueRange reduc) { + builder.create(loc, v, dst, + args); + builder.create(loc); + }); rewriter.replaceOpWithNewOp(op, dstTp, dst); return success(); @@ -597,17 +606,19 @@ struct ConvertRewriter : public OpRewritePattern { srcTp = getUnorderedCOOFromType(srcTp); tmpCoo = rewriter.create(loc, srcTp, dynSrcSizes).getResult(); - rewriter.create( - loc, src, [&](OpBuilder &builder, Location loc, ValueRange args) { + auto foreachOp = rewriter.create( + loc, src, tmpCoo, + [&](OpBuilder &builder, Location loc, ValueRange args, Value v, + ValueRange reduc) { SmallVector indices; for (int64_t i = 0, e = srcTp.getRank(); i < e; i++) { uint64_t dim = toStoredDim(encSrc, i); indices.push_back(args[dim]); } - builder.create(loc, args.back(), tmpCoo, indices); - builder.create(loc); + auto t = builder.create(loc, v, reduc.front(), indices); + builder.create(loc, t); }); - src = tmpCoo; + src = rewriter.create(loc, foreachOp.getResult(0), true); } // Sort the COO tensor so that its elements are ordered via increasing @@ -646,27 +657,31 @@ struct ConvertRewriter : public OpRewritePattern { getDynamicSizes(dstTp, srcSizes, dynDstSizes); Value dst = rewriter.create(loc, dstTp, dynDstSizes).getResult(); - rewriter.create( - loc, src, [&](OpBuilder &builder, Location loc, ValueRange args) { + auto foreachOp = rewriter.create( + loc, src, dst, + [&](OpBuilder &builder, Location loc, ValueRange args, Value v, + ValueRange reduc) { SmallVector indices; for (int64_t i = 0, e = srcTp.getRank(); i < e; i++) { uint64_t dim = toStoredDim(encDst, i); indices.push_back(args[dim]); } - builder.create(loc, args.back(), dst, indices); - builder.create(loc); + auto t = builder.create(loc, v, reduc.front(), indices); + builder.create(loc, t); }); - // Release the temporary COO if it is created. + // Release the temporary COO if it is created. Note that tmpCoo is + // invalidated due to foreach and updated to src. if (tmpCoo) - rewriter.create(loc, tmpCoo); + rewriter.create(loc, src); // Directly replace op with dst results in bufferization error message // "sparse tensor allocation should not escape function". // As such, we insert a trivial tensor convert which will be removed by // codegen. rewriter.setInsertionPointAfter(op); - rewriter.replaceOpWithNewOp(op, dstTp, dst); + auto t = rewriter.create(loc, foreachOp.getResult(0), true); + rewriter.replaceOpWithNewOp(op, dstTp, t); return success(); } }; @@ -685,6 +700,8 @@ struct ForeachRewriter : public OpRewritePattern { int64_t rank = rtp.getRank(); auto enc = getSparseTensorEncoding(rtp); + SmallVector reduc = op.getInitArgs(); + // 1. Generates loop for the sparse input. SparseTensorLoopEmitter loopEmitter(ValueRange{input}); loopEmitter.initializeLoopEmit(rewriter, loc); @@ -692,7 +709,9 @@ struct ForeachRewriter : public OpRewritePattern { // TODO: provide utility function for loop sequences that only contains // one for loop? loopEmitter.enterNewLoopSeq(rewriter, loc, 0, static_cast(i)); - loopEmitter.enterLoopOverTensorAtDim(rewriter, loc, 0, i); + // Note that reduc will be taken care of by loop emitter and get updated + // in place. + loopEmitter.enterLoopOverTensorAtDim(rewriter, loc, 0, i, reduc); } SmallVector coords; @@ -707,15 +726,7 @@ struct ForeachRewriter : public OpRewritePattern { : rewriter.create(loc, vals, coords); // 2. Inline the block in the foreach operator. - Block::iterator inlinePos = rewriter.getInsertionPoint(); Block *srcBlock = op.getBody(); - // Remove sparse_tensor.yield. - rewriter.eraseOp(srcBlock->getTerminator()); - - for (int64_t i = 0; i < rank; i++) { - loopEmitter.exitCurrentLoop(rewriter, loc); - loopEmitter.exitCurrentLoopSeq(); - } SmallVector args; // Remap coordinates. @@ -725,11 +736,33 @@ struct ForeachRewriter : public OpRewritePattern { } // Remap value. args.push_back(val); + // Remap reduction variables. + args.append(reduc); + + // Remove sparse_tensor.yield. + SmallVector reducValue = srcBlock->getTerminator()->getOperands(); + rewriter.eraseOp(srcBlock->getTerminator()); // Inline body. - rewriter.mergeBlockBefore(srcBlock, &*inlinePos, args); - // delete the foreach operator. - rewriter.eraseOp(op); + if (!reducValue.empty()) { + rewriter.mergeBlocks(srcBlock, rewriter.getBlock(), args); + } else { + // This is annoying, since scf.for inserts a implicit yield op when + // there is no reduction variable upon creation, in this case we need to + // merge the block *before* the yield op. + rewriter.mergeBlockBefore(srcBlock, &*rewriter.getInsertionPoint(), args); + } + + for (int64_t i = 0; i < rank; i++) { + // Link the reduction chain. Note that loop emitter update the reducValue + // in place. + loopEmitter.exitCurrentLoop(rewriter, loc, reducValue); + loopEmitter.exitCurrentLoopSeq(); + } + + // Replace the foreach operator with the value returned by the outtermost + // for loop. + rewriter.replaceOp(op, reducValue); return success(); } }; @@ -792,7 +825,8 @@ struct NewRewriter : public OpRewritePattern { .getResult(0); Type eltTp = dstTp.getElementType(); Value value = genAllocaScalar(rewriter, loc, eltTp); - scf::ForOp forOp = rewriter.create(loc, c0, nnz, c1); + scf::ForOp forOp = rewriter.create(loc, c0, nnz, c1, + ArrayRef(cooBuffer)); rewriter.setInsertionPointToStart(forOp.getBody()); SmallString<18> getNextFuncName{"getSparseTensorReaderNext", @@ -807,13 +841,17 @@ struct NewRewriter : public OpRewritePattern { loc, indices, constantIndex(rewriter, loc, i))); } Value v = rewriter.create(loc, value); - rewriter.create(loc, v, cooBuffer, indicesArray); + auto t = rewriter.create(loc, v, forOp.getRegionIterArg(0), + indicesArray); + rewriter.create(loc, ArrayRef(t)); rewriter.setInsertionPointAfter(forOp); + // Link SSA chain. + cooBuffer = forOp.getResult(0); // Release the sparse tensor reader. createFuncCall(rewriter, loc, "delSparseTensorReader", {}, {reader}, EmitCInterface::Off); - + cooBuffer = rewriter.create(loc, cooBuffer, true); Value newOp = rewriter.replaceOpWithNewOp(op, dstTp, cooBuffer); // Release the unordered COO tensor buffer. @@ -866,12 +904,14 @@ struct OutRewriter : public OpRewritePattern { ModuleOp module = op->getParentOfType(); // For each element in the source tensor, output the element. rewriter.create( - loc, src, [&](OpBuilder &builder, Location loc, ValueRange args) { + loc, src, llvm::None, + [&](OpBuilder &builder, Location loc, ValueRange args, Value v, + ValueRange reduc) { for (uint64_t i = 0; i < rank; i++) { rewriter.create(loc, args[i], indices, constantIndex(builder, loc, i)); } - rewriter.create(loc, args.back(), value); + rewriter.create(loc, v, value); SmallVector operands{writer, rankValue, indices, value}; FlatSymbolRefAttr fn = getFunc(module, outNextFuncName, {}, operands, EmitCInterface::On); diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp index 82125e34d5dff..533d31fdb5536 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp @@ -56,7 +56,7 @@ struct CodeGen { CodeGen(SparsificationOptions o, ValueRange tensors, unsigned numTensors, unsigned numLoops, OpOperand *op, unsigned nest, std::vector &ts) - : options(o), loopEmitter(tensors, /*isLastOutput=*/true, + : options(o), loopEmitter(tensors, /*hasOutput=*/true, /*isSparseOut=*/op != nullptr), sparseOut(op), outerParNest(nest), topSort(ts) { if (op) @@ -410,6 +410,34 @@ static Value getCustomRedId(Operation *op) { // Sparse compiler synthesis methods (statements and expressions). //===----------------------------------------------------------------------===// +/// Generates loop boundary statements (entering/exiting loops). The function +/// passes and updates the reduction value. +static Optional genLoopBoundary( + CodeGen &codegen, Merger &merger, + function_ref(MutableArrayRef reduc)> + callback) { + SmallVector reduc; + if (codegen.redVal) + reduc.push_back(codegen.redVal); + if (codegen.expValues) + reduc.push_back(codegen.expCount); + if (codegen.insChain) + reduc.push_back(codegen.insChain); + + auto r = callback(reduc); + + // Callback should do in-place update on reduction value vector. + unsigned i = 0; + if (codegen.redVal) + updateReduc(merger, codegen, reduc[i++]); + if (codegen.expValues) + codegen.expCount = reduc[i++]; + if (codegen.insChain) + codegen.insChain = reduc[i]; + + return r; +} + /// Local bufferization of all dense and sparse data structures. static void genBuffers(Merger &merger, CodeGen &codegen, OpBuilder &builder, linalg::GenericOp op) { @@ -869,23 +897,25 @@ static void genExpansion(Merger &merger, CodeGen &codegen, OpBuilder &builder, /// Returns parallelization strategy. Any implicit loop in the Linalg /// operation that is marked "parallel" is a candidate. Whether it is actually /// converted to a parallel operation depends on the requested strategy. -static bool isParallelFor(CodeGen &codegen, bool isOuter, bool isReduction, - bool isSparse) { +static bool isParallelFor(CodeGen &codegen, bool isOuter, bool isSparse) { // Reject parallelization of sparse output. if (codegen.sparseOut) return false; + // Parallel loops on tensor expansion can cause data races. + if (codegen.expCount) + return false; // Inspect strategy. switch (codegen.options.parallelizationStrategy) { case SparseParallelizationStrategy::kNone: return false; case SparseParallelizationStrategy::kDenseOuterLoop: - return isOuter && !isSparse && !isReduction; + return isOuter && !isSparse; case SparseParallelizationStrategy::kAnyStorageOuterLoop: - return isOuter && !isReduction; + return isOuter; case SparseParallelizationStrategy::kDenseAnyLoop: - return !isSparse && !isReduction; + return !isSparse; case SparseParallelizationStrategy::kAnyStorageAnyLoop: - return !isReduction; + return true; } llvm_unreachable("unexpected parallelization strategy"); } @@ -898,33 +928,16 @@ static Operation *genFor(Merger &merger, CodeGen &codegen, OpBuilder &builder, ArrayRef extraDims) { Location loc = op.getLoc(); auto iteratorTypes = op.getIteratorTypesArray(); - bool isReduction = linalg::isReductionIterator(iteratorTypes[idx]); bool isSparse = isCompressedDLT(merger.getDimLevelType(tid, idx)) || isSingletonDLT(merger.getDimLevelType(tid, idx)); - bool isParallel = isParallelFor(codegen, isOuter, isReduction, isSparse); - assert(!isParallel); - - // Emit a sequential for loop. - SmallVector operands; - if (codegen.redVal) - operands.push_back(codegen.redVal); - if (codegen.expValues) - operands.push_back(codegen.expCount); - if (codegen.insChain) - operands.push_back(codegen.insChain); - - Operation *loop = codegen.loopEmitter.enterLoopOverTensorAtDim( - builder, loc, tid, dim, operands, isParallel, extraTids, extraDims); - - unsigned o = 0; - if (codegen.redVal) - updateReduc(merger, codegen, operands[o++]); - if (codegen.expValues) - codegen.expCount = operands[o++]; - if (codegen.insChain) - codegen.insChain = operands[o++]; - assert(o == operands.size()); - + bool isParallel = isParallelFor(codegen, isOuter, isSparse); + + Operation *loop = + genLoopBoundary(codegen, merger, [&](MutableArrayRef reduc) { + return codegen.loopEmitter.enterLoopOverTensorAtDim( + builder, loc, tid, dim, reduc, isParallel, extraTids, extraDims); + }).value(); + assert(loop); return loop; } @@ -934,29 +947,15 @@ static Operation *genWhile(Merger &merger, CodeGen &codegen, OpBuilder &builder, ArrayRef condTids, ArrayRef condDims, ArrayRef extraTids, ArrayRef extraDims) { - SmallVector operands; - - // Construct the while-loop with a parameter for each index. - if (codegen.redVal) - operands.push_back(codegen.redVal); - if (codegen.expValues) - operands.push_back(codegen.expCount); - if (codegen.insChain) - operands.push_back(codegen.insChain); - - Operation *loop = codegen.loopEmitter.enterCoIterationOverTensorsAtDims( - builder, op.getLoc(), condTids, condDims, needsUniv, operands, extraTids, - extraDims); - - unsigned o = 0; - if (codegen.redVal) - updateReduc(merger, codegen, operands[o++]); - if (codegen.expValues) - codegen.expCount = operands[o++]; - if (codegen.insChain) - codegen.insChain = operands[o++]; - assert(o == operands.size()); + Operation *loop = + genLoopBoundary(codegen, merger, [&](MutableArrayRef reduc) { + // Construct the while-loop with a parameter for each index. + return codegen.loopEmitter.enterCoIterationOverTensorsAtDims( + builder, op.getLoc(), condTids, condDims, needsUniv, reduc, + extraTids, extraDims); + }).value(); + assert(loop); return loop; } @@ -1186,37 +1185,21 @@ static Operation *startLoop(Merger &merger, CodeGen &codegen, } /// Ends a single loop in current sequence. Returns new values for needsUniv. -static bool endLoop(Merger &merger, CodeGen &codegen, OpBuilder &builder, +static bool endLoop(Merger &merger, CodeGen &codegen, RewriterBase &rewriter, linalg::GenericOp op, Operation *loop, unsigned idx, unsigned li, bool needsUniv) { // End a while-loop. if (auto whileOp = dyn_cast(loop)) { - finalizeWhileOp(merger, codegen, builder, op, idx, needsUniv, + finalizeWhileOp(merger, codegen, rewriter, op, idx, needsUniv, merger.lat(li).bits, whileOp); } else { needsUniv = false; } - SmallVector reduc; - if (codegen.redVal) - reduc.push_back(codegen.redVal); - if (codegen.expValues) - reduc.push_back(codegen.expCount); - if (codegen.insChain) - reduc.push_back(codegen.insChain); - - auto loopRet = - codegen.loopEmitter.exitCurrentLoop(builder, op.getLoc(), reduc); - assert(reduc.size() == loopRet.size()); - - unsigned o = 0; - if (codegen.redVal) - updateReduc(merger, codegen, loopRet[o++]); - if (codegen.expValues) - codegen.expCount = loopRet[o++]; - if (codegen.insChain) - codegen.insChain = loopRet[o++]; - assert(o == loopRet.size()); + genLoopBoundary(codegen, merger, [&](MutableArrayRef reduc) { + codegen.loopEmitter.exitCurrentLoop(rewriter, op.getLoc(), reduc); + return llvm::None; + }); return needsUniv; } diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp index 445e78e295fd1..31d892ffb6e41 100644 --- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp +++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp @@ -497,27 +497,29 @@ void DimOp::getCanonicalizationPatterns(RewritePatternSet &results, //===----------------------------------------------------------------------===// void EmptyOp::build(OpBuilder &builder, OperationState &result, - ArrayRef staticShape, Type elementType) { + ArrayRef staticShape, Type elementType, + Attribute encoding) { assert(all_of(staticShape, [](int64_t sz) { return !ShapedType::isDynamic(sz); }) && "expected only static sizes"); - build(builder, result, staticShape, elementType, {}); + build(builder, result, staticShape, elementType, ValueRange{}, encoding); } void EmptyOp::build(OpBuilder &builder, OperationState &result, ArrayRef staticShape, Type elementType, - ValueRange dynamicSizes) { - auto tensorType = RankedTensorType::get(staticShape, elementType); + ValueRange dynamicSizes, Attribute encoding) { + auto tensorType = RankedTensorType::get(staticShape, elementType, encoding); build(builder, result, tensorType, dynamicSizes); } void EmptyOp::build(OpBuilder &builder, OperationState &result, - ArrayRef sizes, Type elementType) { + ArrayRef sizes, Type elementType, + Attribute encoding) { SmallVector staticShape; SmallVector dynamicSizes; dispatchIndexOpFoldResults(sizes, dynamicSizes, staticShape, ShapedType::kDynamicSize); - build(builder, result, staticShape, elementType, dynamicSizes); + build(builder, result, staticShape, elementType, dynamicSizes, encoding); } LogicalResult EmptyOp::verify() { diff --git a/mlir/lib/Dialect/Transform/IR/TransformInterfaces.cpp b/mlir/lib/Dialect/Transform/IR/TransformInterfaces.cpp index 9b85af35783e7..9b136cccbe6f1 100644 --- a/mlir/lib/Dialect/Transform/IR/TransformInterfaces.cpp +++ b/mlir/lib/Dialect/Transform/IR/TransformInterfaces.cpp @@ -130,7 +130,7 @@ LogicalResult transform::TransformState::updatePayloadOps( if (failed(result.checkAndReport())) return failure(); - std::swap(association, updated); + it->second = updated; return success(); } @@ -314,11 +314,11 @@ transform::TransformResults::TransformResults(unsigned numSegments) { void transform::TransformResults::set(OpResult value, ArrayRef ops) { - unsigned position = value.getResultNumber(); - assert(position < segments.size() && + int64_t position = value.getResultNumber(); + assert(position < static_cast(segments.size()) && "setting results for a non-existent handle"); assert(segments[position].data() == nullptr && "results already set"); - unsigned start = operations.size(); + int64_t start = operations.size(); llvm::append_range(operations, ops); segments[position] = makeArrayRef(operations).drop_front(start); } diff --git a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp index 2be1bea91fbe9..5fe2d465ee51a 100644 --- a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp +++ b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp @@ -472,6 +472,16 @@ OpFoldResult transform::MergeHandlesOp::fold(ArrayRef operands) { // SplitHandlesOp //===----------------------------------------------------------------------===// +void transform::SplitHandlesOp::build(OpBuilder &builder, + OperationState &result, Value target, + int64_t numResultHandles) { + result.addOperands(target); + result.addAttribute(SplitHandlesOp::getNumResultHandlesAttrName(result.name), + builder.getI64IntegerAttr(numResultHandles)); + auto pdlOpType = pdl::OperationType::get(builder.getContext()); + result.addTypes(SmallVector(numResultHandles, pdlOpType)); +} + DiagnosedSilenceableFailure transform::SplitHandlesOp::apply(transform::TransformResults &results, transform::TransformState &state) { @@ -812,6 +822,20 @@ LogicalResult transform::WithPDLPatternsOp::verify() { // PrintOp //===----------------------------------------------------------------------===// +void transform::PrintOp::build(OpBuilder &builder, OperationState &result, + StringRef name) { + if (!name.empty()) { + result.addAttribute(PrintOp::getNameAttrName(result.name), + builder.getStrArrayAttr(name)); + } +} + +void transform::PrintOp::build(OpBuilder &builder, OperationState &result, + Value target, StringRef name) { + result.addOperands({target}); + build(builder, result, name); +} + DiagnosedSilenceableFailure transform::PrintOp::apply(transform::TransformResults &results, transform::TransformState &state) { diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp index 5dd98a1bada41..bd96ee7de24f7 100644 --- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp @@ -508,11 +508,11 @@ void vector::ContractionOp::build(OpBuilder &builder, OperationState &result, ArrayRef iteratorTypes) { result.addOperands({lhs, rhs, acc}); result.addTypes(acc.getType()); - result.addAttribute(::mlir::getIndexingMapsAttrName(), + result.addAttribute(getIndexingMapsAttrName(result.name), builder.getAffineMapArrayAttr( AffineMap::inferFromExprList(indexingExprs))); result.addAttribute( - ::mlir::getIteratorTypesAttrName(), + getIteratorTypesAttrName(result.name), builder.getArrayAttr(llvm::to_vector(llvm::map_range( iteratorTypes, [&](IteratorType t) -> mlir::Attribute { return IteratorTypeAttr::get(builder.getContext(), t); @@ -533,9 +533,9 @@ void vector::ContractionOp::build(OpBuilder &builder, OperationState &result, ArrayAttr iteratorTypes, CombiningKind kind) { result.addOperands({lhs, rhs, acc}); result.addTypes(acc.getType()); - result.addAttribute(::mlir::getIndexingMapsAttrName(), indexingMaps); - result.addAttribute(::mlir::getIteratorTypesAttrName(), iteratorTypes); - result.addAttribute(ContractionOp::getKindAttrStrName(), + result.addAttribute(getIndexingMapsAttrName(result.name), indexingMaps); + result.addAttribute(getIteratorTypesAttrName(result.name), iteratorTypes); + result.addAttribute(getKindAttrName(result.name), CombiningKindAttr::get(builder.getContext(), kind)); } @@ -570,7 +570,8 @@ ParseResult ContractionOp::parse(OpAsmParser &parser, OperationState &result) { // represented as an array of strings. // TODO: Remove this conversion once tests are fixed. ArrayAttr iteratorTypes = - result.attributes.get("iterator_types").cast(); + result.attributes.get(getIteratorTypesAttrName(result.name)) + .cast(); SmallVector iteratorTypeAttrs; @@ -579,15 +580,15 @@ ParseResult ContractionOp::parse(OpAsmParser &parser, OperationState &result) { if (!maybeIteratorType.has_value()) return parser.emitError(loc) << "unexpected iterator_type (" << s << ")"; - iteratorTypeAttrs.push_back(IteratorTypeAttr::get( - parser.getContext(), maybeIteratorType.value())); + iteratorTypeAttrs.push_back( + IteratorTypeAttr::get(parser.getContext(), maybeIteratorType.value())); } - result.attributes.set("iterator_types", + result.attributes.set(getIteratorTypesAttrName(result.name), parser.getBuilder().getArrayAttr(iteratorTypeAttrs)); - if (!result.attributes.get(ContractionOp::getKindAttrStrName())) { + if (!result.attributes.get(getKindAttrName(result.name))) { result.addAttribute( - ContractionOp::getKindAttrStrName(), + getKindAttrName(result.name), CombiningKindAttr::get(result.getContext(), ContractionOp::getDefaultKind())); } @@ -822,11 +823,9 @@ LogicalResult ContractionOp::verify() { return success(); } -ArrayRef ContractionOp::getTraitAttrNames() { - static constexpr StringRef names[3] = {::mlir::getIndexingMapsAttrName(), - ::mlir::getIteratorTypesAttrName(), - ContractionOp::getKindAttrStrName()}; - return llvm::makeArrayRef(names); +SmallVector ContractionOp::getTraitAttrNames() { + return SmallVector{getIndexingMapsAttrName(), + getIteratorTypesAttrName(), getKindAttrName()}; } static int64_t getResultIndex(AffineMap map, AffineExpr targetExpr) { diff --git a/mlir/lib/Dialect/Vector/Transforms/LowerVectorMask.cpp b/mlir/lib/Dialect/Vector/Transforms/LowerVectorMask.cpp index ca285465648ff..aa79e54b9b306 100644 --- a/mlir/lib/Dialect/Vector/Transforms/LowerVectorMask.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/LowerVectorMask.cpp @@ -46,9 +46,8 @@ struct MaskOpRewritePattern : OpRewritePattern { using OpRewritePattern::OpRewritePattern; private: - LogicalResult - matchAndRewrite(MaskOp maskOp, - PatternRewriter &rewriter) const override final { + LogicalResult matchAndRewrite(MaskOp maskOp, + PatternRewriter &rewriter) const final { MaskableOpInterface maskableOp = maskOp.getMaskableOp(); SourceOp sourceOp = dyn_cast(maskableOp.getOperation()); if (!sourceOp) diff --git a/mlir/lib/ExecutionEngine/OptUtils.cpp b/mlir/lib/ExecutionEngine/OptUtils.cpp index 403e54f004e2b..893a8a490d44a 100644 --- a/mlir/lib/ExecutionEngine/OptUtils.cpp +++ b/mlir/lib/ExecutionEngine/OptUtils.cpp @@ -68,7 +68,13 @@ mlir::makeOptimizingTransformer(unsigned optLevel, unsigned sizeLevel, CGSCCAnalysisManager cgam; ModuleAnalysisManager mam; - PassBuilder pb(targetMachine); + PipelineTuningOptions tuningOptions; + tuningOptions.LoopUnrolling = true; + tuningOptions.LoopInterleaving = true; + tuningOptions.LoopVectorization = true; + tuningOptions.SLPVectorization = true; + + PassBuilder pb(targetMachine, tuningOptions); pb.registerModuleAnalyses(mam); pb.registerCGSCCAnalyses(cgam); diff --git a/mlir/lib/IR/AffineExpr.cpp b/mlir/lib/IR/AffineExpr.cpp index e0f45470bf3bd..00778cd47fdb0 100644 --- a/mlir/lib/IR/AffineExpr.cpp +++ b/mlir/lib/IR/AffineExpr.cpp @@ -986,18 +986,9 @@ static AffineExpr getSemiAffineExprFromFlatForm(ArrayRef flatExprs, // constant coefficient corresponding to the indices in `coefficients` map, // and affine expression corresponding to indices in `indexToExprMap` map. - for (unsigned j = 0; j < numDims; ++j) { - if (flatExprs[j] == 0) - continue; - // For dimensional expressions we set the index as , as we want dimensional expressions to appear before - // symbolic ones and products of dimensional and symbolic expressions - // having the dimension with the same position number. - std::pair indexEntry(j, -1); - addEntry(indexEntry, flatExprs[j], getAffineDimExpr(j, context)); - } // Ensure we do not have duplicate keys in `indexToExpr` map. - unsigned offset = 0; + unsigned offsetSym = 0; + signed offsetDim = -1; for (unsigned j = numDims; j < numDims + numSymbols; ++j) { if (flatExprs[j] == 0) continue; @@ -1006,7 +997,7 @@ static AffineExpr getSemiAffineExprFromFlatForm(ArrayRef flatExprs, // as we want symbolic expressions with the same positional number to // appear after dimensional expressions having the same positional number. std::pair indexEntry( - j - numDims, std::max(numDims, numSymbols) + offset++); + j - numDims, std::max(numDims, numSymbols) + offsetSym++); addEntry(indexEntry, flatExprs[j], getAffineSymbolExpr(j - numDims, context)); } @@ -1038,13 +1029,13 @@ static AffineExpr getSemiAffineExprFromFlatForm(ArrayRef flatExprs, // constructing. When rhs is constant, we place 0 in place of keyB. if (lhs.isa()) { lhsPos = lhs.cast().getPosition(); - std::pair indexEntry(lhsPos, -1); + std::pair indexEntry(lhsPos, offsetDim--); addEntry(indexEntry, flatExprs[numDims + numSymbols + it.index()], expr); } else { lhsPos = lhs.cast().getPosition(); std::pair indexEntry( - lhsPos, std::max(numDims, numSymbols) + offset++); + lhsPos, std::max(numDims, numSymbols) + offsetSym++); addEntry(indexEntry, flatExprs[numDims + numSymbols + it.index()], expr); } @@ -1066,12 +1057,23 @@ static AffineExpr getSemiAffineExprFromFlatForm(ArrayRef flatExprs, lhsPos = lhs.cast().getPosition(); rhsPos = rhs.cast().getPosition(); std::pair indexEntry( - lhsPos, std::max(numDims, numSymbols) + offset++); + lhsPos, std::max(numDims, numSymbols) + offsetSym++); addEntry(indexEntry, flatExprs[numDims + numSymbols + it.index()], expr); } addedToMap[it.index()] = true; } + for (unsigned j = 0; j < numDims; ++j) { + if (flatExprs[j] == 0) + continue; + // For dimensional expressions we set the index as , as we want dimensional expressions to appear before + // symbolic ones and products of dimensional and symbolic expressions + // having the dimension with the same position number. + std::pair indexEntry(j, offsetDim--); + addEntry(indexEntry, flatExprs[j], getAffineDimExpr(j, context)); + } + // Constructing the simplified semi-affine sum of product/division/mod // expression from the flattened form in the desired sorted order of indices // of the various individual product/division/mod expressions. diff --git a/mlir/lib/IR/BuiltinAttributes.cpp b/mlir/lib/IR/BuiltinAttributes.cpp index ed22134d1dcc8..8a3c162f59423 100644 --- a/mlir/lib/IR/BuiltinAttributes.cpp +++ b/mlir/lib/IR/BuiltinAttributes.cpp @@ -43,23 +43,6 @@ void BuiltinDialect::registerAttributes() { >(); } -//===----------------------------------------------------------------------===// -// ArrayAttr -//===----------------------------------------------------------------------===// - -void ArrayAttr::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - for (Attribute attr : getValue()) - walkAttrsFn(attr); -} - -Attribute -ArrayAttr::replaceImmediateSubElements(ArrayRef replAttrs, - ArrayRef replTypes) const { - return get(getContext(), replAttrs); -} - //===----------------------------------------------------------------------===// // DictionaryAttr //===----------------------------------------------------------------------===// @@ -217,25 +200,6 @@ DictionaryAttr DictionaryAttr::getEmptyUnchecked(MLIRContext *context) { return Base::get(context, ArrayRef()); } -void DictionaryAttr::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - for (const NamedAttribute &attr : getValue()) - walkAttrsFn(attr.getValue()); -} - -Attribute -DictionaryAttr::replaceImmediateSubElements(ArrayRef replAttrs, - ArrayRef replTypes) const { - std::vector vec = getValue().vec(); - for (auto &it : llvm::enumerate(replAttrs)) - vec[it.index()].setValue(it.value()); - - // The above only modifies the mapped value, but not the key, and therefore - // not the order of the elements. It remains sorted - return getWithSorted(getContext(), vec); -} - //===----------------------------------------------------------------------===// // StridedLayoutAttr //===----------------------------------------------------------------------===// @@ -375,24 +339,6 @@ StringAttr SymbolRefAttr::getLeafReference() const { return nestedRefs.empty() ? getRootReference() : nestedRefs.back().getAttr(); } -void SymbolRefAttr::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkAttrsFn(getRootReference()); - for (FlatSymbolRefAttr ref : getNestedReferences()) - walkAttrsFn(ref); -} - -Attribute -SymbolRefAttr::replaceImmediateSubElements(ArrayRef replAttrs, - ArrayRef replTypes) const { - ArrayRef rawNestedRefs = replAttrs.drop_front(); - ArrayRef nestedRefs( - static_cast(rawNestedRefs.data()), - rawNestedRefs.size()); - return get(replAttrs[0].cast(), nestedRefs); -} - //===----------------------------------------------------------------------===// // IntegerAttr //===----------------------------------------------------------------------===// @@ -1812,22 +1758,6 @@ SparseElementsAttr::verify(function_ref emitError, return success(); } -//===----------------------------------------------------------------------===// -// TypeAttr -//===----------------------------------------------------------------------===// - -void TypeAttr::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkTypesFn(getValue()); -} - -Attribute -TypeAttr::replaceImmediateSubElements(ArrayRef replAttrs, - ArrayRef replTypes) const { - return get(replTypes[0]); -} - //===----------------------------------------------------------------------===// // Attribute Utilities //===----------------------------------------------------------------------===// diff --git a/mlir/lib/IR/BuiltinTypes.cpp b/mlir/lib/IR/BuiltinTypes.cpp index fe6d6ac3b2c4d..d65c5e9d28b1e 100644 --- a/mlir/lib/IR/BuiltinTypes.cpp +++ b/mlir/lib/IR/BuiltinTypes.cpp @@ -187,20 +187,6 @@ FunctionType::getWithoutArgsAndResults(const BitVector &argIndices, return clone(newArgTypes, newResultTypes); } -void FunctionType::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - for (Type type : llvm::concat(getInputs(), getResults())) - walkTypesFn(type); -} - -Type FunctionType::replaceImmediateSubElements(ArrayRef replAttrs, - ArrayRef replTypes) const { - unsigned numInputs = getNumInputs(); - return get(getContext(), replTypes.take_front(numInputs), - replTypes.drop_front(numInputs)); -} - //===----------------------------------------------------------------------===// // OpaqueType //===----------------------------------------------------------------------===// @@ -258,17 +244,6 @@ VectorType VectorType::scaleElementBitwidth(unsigned scale) { return VectorType(); } -void VectorType::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkTypesFn(getElementType()); -} - -Type VectorType::replaceImmediateSubElements(ArrayRef replAttrs, - ArrayRef replTypes) const { - return get(getShape(), replTypes.front(), getNumScalableDims()); -} - VectorType VectorType::cloneWith(Optional> shape, Type elementType) const { return VectorType::get(shape.value_or(getShape()), elementType, @@ -343,20 +318,6 @@ RankedTensorType::verify(function_ref emitError, return checkTensorElementType(emitError, elementType); } -void RankedTensorType::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkTypesFn(getElementType()); - if (Attribute encoding = getEncoding()) - walkAttrsFn(encoding); -} - -Type RankedTensorType::replaceImmediateSubElements( - ArrayRef replAttrs, ArrayRef replTypes) const { - return get(getShape(), replTypes.front(), - replAttrs.empty() ? Attribute() : replAttrs.back()); -} - //===----------------------------------------------------------------------===// // UnrankedTensorType //===----------------------------------------------------------------------===// @@ -367,17 +328,6 @@ UnrankedTensorType::verify(function_ref emitError, return checkTensorElementType(emitError, elementType); } -void UnrankedTensorType::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkTypesFn(getElementType()); -} - -Type UnrankedTensorType::replaceImmediateSubElements( - ArrayRef replAttrs, ArrayRef replTypes) const { - return get(replTypes.front()); -} - //===----------------------------------------------------------------------===// // BaseMemRefType //===----------------------------------------------------------------------===// @@ -671,24 +621,6 @@ LogicalResult MemRefType::verify(function_ref emitError, return success(); } -void MemRefType::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkTypesFn(getElementType()); - if (!getLayout().isIdentity()) - walkAttrsFn(getLayout()); - walkAttrsFn(getMemorySpace()); -} - -Type MemRefType::replaceImmediateSubElements(ArrayRef replAttrs, - ArrayRef replTypes) const { - bool hasLayout = replAttrs.size() > 1; - return get(getShape(), replTypes[0], - hasLayout ? replAttrs[0].dyn_cast() - : MemRefLayoutAttrInterface(), - hasLayout ? replAttrs[1] : replAttrs[0]); -} - //===----------------------------------------------------------------------===// // UnrankedMemRefType //===----------------------------------------------------------------------===// @@ -870,18 +802,6 @@ LogicalResult mlir::getStridesAndOffset(MemRefType t, return success(); } -void UnrankedMemRefType::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkTypesFn(getElementType()); - walkAttrsFn(getMemorySpace()); -} - -Type UnrankedMemRefType::replaceImmediateSubElements( - ArrayRef replAttrs, ArrayRef replTypes) const { - return get(replTypes.front(), replAttrs.front()); -} - //===----------------------------------------------------------------------===// /// TupleType //===----------------------------------------------------------------------===// @@ -905,18 +825,6 @@ void TupleType::getFlattenedTypes(SmallVectorImpl &types) { /// Return the number of element types. size_t TupleType::size() const { return getImpl()->size(); } -void TupleType::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - for (Type type : getTypes()) - walkTypesFn(type); -} - -Type TupleType::replaceImmediateSubElements(ArrayRef replAttrs, - ArrayRef replTypes) const { - return get(getContext(), replTypes); -} - //===----------------------------------------------------------------------===// // Type Utilities //===----------------------------------------------------------------------===// diff --git a/mlir/lib/IR/Location.cpp b/mlir/lib/IR/Location.cpp index 8a8801daa1160..dcbf9dcecfe29 100644 --- a/mlir/lib/IR/Location.cpp +++ b/mlir/lib/IR/Location.cpp @@ -80,20 +80,6 @@ CallSiteLoc CallSiteLoc::get(Location name, ArrayRef frames) { return CallSiteLoc::get(name, caller); } -void CallSiteLoc::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkAttrsFn(getCallee()); - walkAttrsFn(getCaller()); -} - -Attribute -CallSiteLoc::replaceImmediateSubElements(ArrayRef replAttrs, - ArrayRef replTypes) const { - return get(replAttrs[0].cast(), - replAttrs[1].cast()); -} - //===----------------------------------------------------------------------===// // FusedLoc //===----------------------------------------------------------------------===// @@ -135,55 +121,3 @@ Location FusedLoc::get(ArrayRef locs, Attribute metadata, return Base::get(context, locs, metadata); } - -void FusedLoc::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - for (Attribute attr : getLocations()) - walkAttrsFn(attr); - walkAttrsFn(getMetadata()); -} - -Attribute -FusedLoc::replaceImmediateSubElements(ArrayRef replAttrs, - ArrayRef replTypes) const { - SmallVector newLocs; - newLocs.reserve(replAttrs.size() - 1); - for (Attribute attr : replAttrs.drop_back()) - newLocs.push_back(attr.cast()); - return get(getContext(), newLocs, replAttrs.back()); -} - -//===----------------------------------------------------------------------===// -// NameLoc -//===----------------------------------------------------------------------===// - -void NameLoc::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkAttrsFn(getName()); - walkAttrsFn(getChildLoc()); -} - -Attribute NameLoc::replaceImmediateSubElements(ArrayRef replAttrs, - ArrayRef replTypes) const { - return get(replAttrs[0].cast(), - replAttrs[1].cast()); -} - -//===----------------------------------------------------------------------===// -// OpaqueLoc -//===----------------------------------------------------------------------===// - -void OpaqueLoc::walkImmediateSubElements( - function_ref walkAttrsFn, - function_ref walkTypesFn) const { - walkAttrsFn(getFallbackLocation()); -} - -Attribute -OpaqueLoc::replaceImmediateSubElements(ArrayRef replAttrs, - ArrayRef replTypes) const { - return get(getUnderlyingLocation(), getUnderlyingTypeID(), - replAttrs[0].cast()); -} diff --git a/mlir/lib/IR/OperationSupport.cpp b/mlir/lib/IR/OperationSupport.cpp index 33828a954c633..d46f1b46bf7b8 100644 --- a/mlir/lib/IR/OperationSupport.cpp +++ b/mlir/lib/IR/OperationSupport.cpp @@ -16,6 +16,7 @@ #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/OpDefinition.h" #include "llvm/ADT/BitVector.h" +#include "llvm/Support/SHA1.h" #include using namespace mlir; @@ -757,3 +758,42 @@ bool OperationEquivalence::isEquivalentTo( return false; return true; } + +//===----------------------------------------------------------------------===// +// OperationFingerPrint +//===----------------------------------------------------------------------===// + +template +static void addDataToHash(llvm::SHA1 &hasher, const T &data) { + hasher.update( + ArrayRef(reinterpret_cast(&data), sizeof(T))); +} + +OperationFingerPrint::OperationFingerPrint(Operation *topOp) { + llvm::SHA1 hasher; + + // Hash each of the operations based upon their mutable bits: + topOp->walk([&](Operation *op) { + // - Operation pointer + addDataToHash(hasher, op); + // - Attributes + addDataToHash(hasher, op->getAttrDictionary()); + // - Blocks in Regions + for (Region ®ion : op->getRegions()) { + for (Block &block : region) { + addDataToHash(hasher, &block); + for (BlockArgument arg : block.getArguments()) + addDataToHash(hasher, arg); + } + } + // - Location + addDataToHash(hasher, op->getLoc().getAsOpaquePointer()); + // - Operands + for (Value operand : op->getOperands()) + addDataToHash(hasher, operand); + // - Successors + for (unsigned i = 0, e = op->getNumSuccessors(); i != e; ++i) + addDataToHash(hasher, op->getSuccessor(i)); + }); + hash = hasher.result(); +} diff --git a/mlir/lib/IR/TypeDetail.h b/mlir/lib/IR/TypeDetail.h index 1ae66555715f7..9dc8e6380c795 100644 --- a/mlir/lib/IR/TypeDetail.h +++ b/mlir/lib/IR/TypeDetail.h @@ -47,6 +47,8 @@ struct IntegerTypeStorage : public TypeStorage { IntegerTypeStorage(key.first, key.second); } + KeyTy getAsKey() const { return KeyTy(width, signedness); } + unsigned width : 30; IntegerType::SignednessSemantics signedness : 2; }; @@ -59,7 +61,7 @@ struct FunctionTypeStorage : public TypeStorage { inputsAndResults(inputsAndResults) {} /// The hash key used for uniquing. - using KeyTy = std::pair; + using KeyTy = std::tuple; bool operator==(const KeyTy &key) const { if (std::get<0>(key) == getInputs()) return std::get<1>(key) == getResults(); @@ -69,7 +71,7 @@ struct FunctionTypeStorage : public TypeStorage { /// Construction. static FunctionTypeStorage *construct(TypeStorageAllocator &allocator, const KeyTy &key) { - TypeRange inputs = key.first, results = key.second; + auto [inputs, results] = key; // Copy the inputs and results into the bump pointer. SmallVector types; @@ -90,6 +92,8 @@ struct FunctionTypeStorage : public TypeStorage { return ArrayRef(inputsAndResults + numInputs, numResults); } + KeyTy getAsKey() const { return KeyTy(getInputs(), getResults()); } + unsigned numInputs; unsigned numResults; Type const *inputsAndResults; @@ -127,6 +131,8 @@ struct TupleTypeStorage final return {getTrailingObjects(), size()}; } + KeyTy getAsKey() const { return getTypes(); } + /// The number of tuple elements. unsigned numElements; }; diff --git a/mlir/lib/Pass/IRPrinting.cpp b/mlir/lib/Pass/IRPrinting.cpp index c20d9b1e9135f..ee52bf81847c2 100644 --- a/mlir/lib/Pass/IRPrinting.cpp +++ b/mlir/lib/Pass/IRPrinting.cpp @@ -11,66 +11,11 @@ #include "mlir/Pass/PassManager.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormatVariadic.h" -#include "llvm/Support/SHA1.h" using namespace mlir; using namespace mlir::detail; namespace { -//===----------------------------------------------------------------------===// -// OperationFingerPrint -//===----------------------------------------------------------------------===// - -/// A unique fingerprint for a specific operation, and all of it's internal -/// operations. -class OperationFingerPrint { -public: - OperationFingerPrint(Operation *topOp) { - llvm::SHA1 hasher; - - // Hash each of the operations based upon their mutable bits: - topOp->walk([&](Operation *op) { - // - Operation pointer - addDataToHash(hasher, op); - // - Attributes - addDataToHash(hasher, op->getAttrDictionary()); - // - Blocks in Regions - for (Region ®ion : op->getRegions()) { - for (Block &block : region) { - addDataToHash(hasher, &block); - for (BlockArgument arg : block.getArguments()) - addDataToHash(hasher, arg); - } - } - // - Location - addDataToHash(hasher, op->getLoc().getAsOpaquePointer()); - // - Operands - for (Value operand : op->getOperands()) - addDataToHash(hasher, operand); - // - Successors - for (unsigned i = 0, e = op->getNumSuccessors(); i != e; ++i) - addDataToHash(hasher, op->getSuccessor(i)); - }); - hash = hasher.result(); - } - - bool operator==(const OperationFingerPrint &other) const { - return hash == other.hash; - } - bool operator!=(const OperationFingerPrint &other) const { - return !(*this == other); - } - -private: - template - void addDataToHash(llvm::SHA1 &hasher, const T &data) { - hasher.update( - ArrayRef(reinterpret_cast(&data), sizeof(T))); - } - - std::array hash; -}; - //===----------------------------------------------------------------------===// // IRPrinter //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Pass/Pass.cpp b/mlir/lib/Pass/Pass.cpp index 0b229844cf874..edb5a53507724 100644 --- a/mlir/lib/Pass/Pass.cpp +++ b/mlir/lib/Pass/Pass.cpp @@ -290,12 +290,16 @@ OpPassManager::OpPassManager(StringRef name, Nesting nesting) : impl(new OpPassManagerImpl(name, nesting)) {} OpPassManager::OpPassManager(OperationName name, Nesting nesting) : impl(new OpPassManagerImpl(name, nesting)) {} -OpPassManager::OpPassManager(OpPassManager &&rhs) : impl(std::move(rhs.impl)) {} +OpPassManager::OpPassManager(OpPassManager &&rhs) { *this = std::move(rhs); } OpPassManager::OpPassManager(const OpPassManager &rhs) { *this = rhs; } OpPassManager &OpPassManager::operator=(const OpPassManager &rhs) { impl = std::make_unique(*rhs.impl); return *this; } +OpPassManager &OpPassManager::operator=(OpPassManager &&rhs) { + impl = std::move(rhs.impl); + return *this; +} OpPassManager::~OpPassManager() = default; @@ -773,9 +777,11 @@ void PassManager::enableVerifier(bool enabled) { verifyPasses = enabled; } /// Run the passes within this manager on the provided operation. LogicalResult PassManager::run(Operation *op) { MLIRContext *context = getContext(); - assert(op->getName() == getOpName(*context) && - "operation has a different name than the PassManager or is from a " - "different context"); + Optional anchorOp = getOpName(*context); + if (anchorOp && anchorOp != op->getName()) + return emitError(op->getLoc()) + << "can't run '" << getOpAnchorName() << "' pass manager on '" + << op->getName() << "' op"; // Register all dialects for the current pipeline. DialectRegistry dependentDialects; diff --git a/mlir/lib/Pass/PassCrashRecovery.cpp b/mlir/lib/Pass/PassCrashRecovery.cpp index 98ea35cf02c3b..a98a1f1f6e475 100644 --- a/mlir/lib/Pass/PassCrashRecovery.cpp +++ b/mlir/lib/Pass/PassCrashRecovery.cpp @@ -60,7 +60,7 @@ struct RecoveryReproducerContext { static void registerSignalHandler(); /// The textual description of the currently executing pipeline. - std::string pipeline; + std::string pipelineElements; /// The MLIR operation representing the IR before the crash. Operation *preCrashOperation; @@ -93,8 +93,8 @@ llvm::ManagedStatic> RecoveryReproducerContext::RecoveryReproducerContext( std::string passPipelineStr, Operation *op, PassManager::ReproducerStreamFactory &streamFactory, bool verifyPasses) - : pipeline(std::move(passPipelineStr)), preCrashOperation(op->clone()), - streamFactory(streamFactory), + : pipelineElements(std::move(passPipelineStr)), + preCrashOperation(op->clone()), streamFactory(streamFactory), disableThreads(!op->getContext()->isMultithreadingEnabled()), verifyPasses(verifyPasses) { enable(); @@ -118,6 +118,9 @@ void RecoveryReproducerContext::generate(std::string &description) { } descOS << "reproducer generated at `" << stream->description() << "`"; + std::string pipeline = (preCrashOperation->getName().getStringRef() + "(" + + pipelineElements + ")") + .str(); AsmState state(preCrashOperation); state.attachResourcePrinter( "mlir_reproducer", [&](Operation *op, AsmResourceBuilder &builder) { @@ -470,9 +473,12 @@ void PassReproducerOptions::attachResourceParser(ParserConfig &config) { } LogicalResult PassReproducerOptions::apply(PassManager &pm) const { - if (pipeline.has_value()) - if (failed(parsePassPipeline(*pipeline, pm))) + if (pipeline.has_value()) { + FailureOr reproPm = parsePassPipeline(*pipeline); + if (failed(reproPm)) return failure(); + static_cast(pm) = std::move(*reproPm); + } if (disableThreading.has_value()) pm.getContext()->disableMultithreading(*disableThreading); diff --git a/mlir/lib/Pass/PassRegistry.cpp b/mlir/lib/Pass/PassRegistry.cpp index 423c97c7a6466..0ddb2e99ecfce 100644 --- a/mlir/lib/Pass/PassRegistry.cpp +++ b/mlir/lib/Pass/PassRegistry.cpp @@ -12,6 +12,7 @@ #include "mlir/Pass/PassManager.h" #include "mlir/Pass/PassRegistry.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/Support/Format.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MemoryBuffer.h" @@ -532,6 +533,13 @@ LogicalResult TextualPipeline::initialize(StringRef text, LogicalResult TextualPipeline::addToPipeline( OpPassManager &pm, function_ref errorHandler) const { + // Temporarily disable implicit nesting while we append to the pipeline. We + // want the created pipeline to exactly match the parsed text pipeline, so + // it's preferrable to just error out if implicit nesting would be required. + OpPassManager::Nesting nesting = pm.getNesting(); + pm.setNesting(OpPassManager::Nesting::Explicit); + auto restore = llvm::make_scope_exit([&]() { pm.setNesting(nesting); }); + return addToPipeline(pipeline, pm, errorHandler); } @@ -730,10 +738,6 @@ struct PassArgData { /// This field is set when instance specific pass options have been provided /// on the command line. StringRef options; - - /// This field is used when the parsed option corresponds to an explicit - /// pipeline. - TextualPipeline pipeline; }; } // namespace @@ -775,9 +779,8 @@ struct PassNameParser : public llvm::cl::parser { PassArgData &value); /// If true, this parser only parses entries that correspond to a concrete - /// pass registry entry, and does not add a `pass-pipeline` argument, does not - /// include the options for pass entries, and does not include pass pipelines - /// entries. + /// pass registry entry, and does not include pipeline entries or the options + /// for pass entries. bool passNamesOnly = false; }; } // namespace @@ -785,12 +788,6 @@ struct PassNameParser : public llvm::cl::parser { void PassNameParser::initialize() { llvm::cl::parser::initialize(); - /// Add an entry for the textual pass pipeline option. - if (!passNamesOnly) { - addLiteralOption(passPipelineArg, PassArgData(), - "A textual description of a pass pipeline to run"); - } - /// Add the pass entries. for (const auto &kv : *passRegistry) { addLiteralOption(kv.second.getPassArgument(), &kv.second, @@ -823,11 +820,6 @@ void PassNameParser::printOptionInfo(const llvm::cl::Option &opt, llvm::outs() << " " << opt.HelpStr << '\n'; } - // Print the top-level pipeline argument. - printOptionHelp(passPipelineArg, - "A textual description of a pass pipeline to run", - /*indent=*/4, globalWidth, /*isTopLevel=*/!opt.hasArgStr()); - // Functor used to print the ordered entries of a registration map. auto printOrderedEntries = [&](StringRef header, auto &map) { llvm::SmallVector orderedEntries; @@ -865,11 +857,6 @@ size_t PassNameParser::getOptionWidth(const llvm::cl::Option &opt) const { bool PassNameParser::parse(llvm::cl::Option &opt, StringRef argName, StringRef arg, PassArgData &value) { - // Handle the pipeline option explicitly. - if (argName == passPipelineArg) - return failed(value.pipeline.initialize(arg, llvm::errs())); - - // Otherwise, default to the base for handling. if (llvm::cl::parser::parse(opt, argName, arg, value)) return true; value.options = arg; @@ -907,12 +894,16 @@ struct PassPipelineCLParserImpl { /// Construct a pass pipeline parser with the given command line description. PassPipelineCLParser::PassPipelineCLParser(StringRef arg, StringRef description) : impl(std::make_unique( - arg, description, /*passNamesOnly=*/false)) {} + arg, description, /*passNamesOnly=*/false)), + passPipeline( + StringRef(passPipelineArg), + llvm::cl::desc("Textual description of the pass pipeline to run")) {} PassPipelineCLParser::~PassPipelineCLParser() = default; /// Returns true if this parser contains any valid options to add. bool PassPipelineCLParser::hasAnyOccurrences() const { - return impl->passList.getNumOccurrences() != 0; + return passPipeline.getNumOccurrences() != 0 || + impl->passList.getNumOccurrences() != 0; } /// Returns true if the given pass registry entry was registered at the @@ -925,19 +916,24 @@ bool PassPipelineCLParser::contains(const PassRegistryEntry *entry) const { LogicalResult PassPipelineCLParser::addToPipeline( OpPassManager &pm, function_ref errorHandler) const { + if (passPipeline.getNumOccurrences()) { + if (impl->passList.getNumOccurrences()) + return errorHandler( + "'-" + passPipelineArg + + "' option can't be used with individual pass options"); + std::string errMsg; + llvm::raw_string_ostream os(errMsg); + FailureOr parsed = parsePassPipeline(passPipeline, os); + if (failed(parsed)) + return errorHandler(errMsg); + pm = std::move(*parsed); + return success(); + } + for (auto &passIt : impl->passList) { - if (passIt.registryEntry) { - if (failed(passIt.registryEntry->addToPipeline(pm, passIt.options, - errorHandler))) - return failure(); - } else { - OpPassManager::Nesting nesting = pm.getNesting(); - pm.setNesting(OpPassManager::Nesting::Explicit); - LogicalResult status = passIt.pipeline.addToPipeline(pm, errorHandler); - pm.setNesting(nesting); - if (failed(status)) - return failure(); - } + if (failed(passIt.registryEntry->addToPipeline(pm, passIt.options, + errorHandler))) + return failure(); } return success(); } diff --git a/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp b/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp index 865add93f9659..18cff0c466771 100644 --- a/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp +++ b/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp @@ -412,35 +412,35 @@ class Importer { /// Returns the builtin type equivalent to be used in attributes for the given /// LLVM IR dialect type. Type getStdTypeForAttr(Type type); - /// Return `value` as an attribute to attach to a GlobalOp. + /// Returns `value` as an attribute to attach to a GlobalOp. Attribute getConstantAsAttr(llvm::Constant *value); - /// Return `constant` as an MLIR Value. This could either be a ConstantOp, or - /// an expanded sequence of ops in the current function's entry block (for + /// Converts the LLVM constant to an MLIR value produced by a ConstantOp, + /// AddressOfOp, NullOp, or to an expanded sequence of operations (for /// ConstantExprs or ConstantGEPs). - Value processConstant(llvm::Constant *constant); + Value convertConstantInPlace(llvm::Constant *constant); + /// Converts the LLVM constant to an MLIR value using the + /// `convertConstantInPlace` method and inserts the constant at the start of + /// the function entry block. + Value convertConstant(llvm::Constant *constant); + + /// Set the constant insertion point to the start of the given block. + void setConstantInsertionPointToStart(Block *block) { + constantInsertionBlock = block; + constantInsertionOp = nullptr; + } - /// Builder pointing at where the next Instruction should be generated. + /// Builder pointing at where the next instruction should be generated. OpBuilder builder; + /// Block to insert the next constant into. + Block *constantInsertionBlock = nullptr; + /// Operation to insert the next constant after. + Operation *constantInsertionOp = nullptr; + /// Operation to insert the next global after. + Operation *globalInsertionOp = nullptr; /// The current context. MLIRContext *context; /// The current module being created. ModuleOp module; - /// The entry block of the current function being processed. - Block *currentEntryBlock = nullptr; - - /// Globals are inserted before the first function, if any. - Block::iterator getGlobalInsertPt() { - Block::iterator it = module.getBody()->begin(); - Block::iterator endIt = module.getBody()->end(); - while (it != endIt && !isa(it)) - ++it; - return it; - } - - /// Functions are always inserted before the module terminator. - Block::iterator getFuncInsertPt() { - return std::prev(module.getBody()->end()); - } /// Function-local mapping between original and imported block. DenseMap blockMapping; @@ -642,7 +642,14 @@ GlobalOp Importer::processGlobal(llvm::GlobalVariable *gv) { if (it != globals.end()) return it->second; - OpBuilder b(module.getBody(), getGlobalInsertPt()); + // Insert the global after the last one or at the start of the module. + OpBuilder::InsertionGuard guard(builder); + if (!globalInsertionOp) { + builder.setInsertionPointToStart(module.getBody()); + } else { + builder.setInsertionPointAfter(globalInsertionOp); + } + Attribute valueAttr; if (gv->hasInitializer()) valueAttr = getConstantAsAttr(gv->getInitializer()); @@ -655,20 +662,18 @@ GlobalOp Importer::processGlobal(llvm::GlobalVariable *gv) { alignment = align.value(); } - GlobalOp op = b.create( + GlobalOp op = builder.create( UnknownLoc::get(context), type, gv->isConstant(), convertLinkageFromLLVM(gv->getLinkage()), gv->getName(), valueAttr, alignment, /*addr_space=*/gv->getAddressSpace(), /*dso_local=*/gv->isDSOLocal(), /*thread_local=*/gv->isThreadLocal()); + globalInsertionOp = op; if (gv->hasInitializer() && !valueAttr) { - Region &r = op.getInitializerRegion(); - currentEntryBlock = b.createBlock(&r); - b.setInsertionPoint(currentEntryBlock, currentEntryBlock->begin()); - Value v = processConstant(gv->getInitializer()); - if (!v) - return nullptr; - b.create(op.getLoc(), ArrayRef({v})); + Block *block = builder.createBlock(&op.getInitializerRegion()); + setConstantInsertionPointToStart(block); + Value value = convertConstant(gv->getInitializer()); + builder.create(op.getLoc(), ArrayRef({value})); } if (gv->hasAtLeastLocalUnnamedAddr()) op.setUnnamedAddr(convertUnnamedAddrFromLLVM(gv->getUnnamedAddr())); @@ -678,29 +683,25 @@ GlobalOp Importer::processGlobal(llvm::GlobalVariable *gv) { return globals[gv] = op; } -Value Importer::processConstant(llvm::Constant *constant) { - OpBuilder bEntry(currentEntryBlock, currentEntryBlock->begin()); +Value Importer::convertConstantInPlace(llvm::Constant *constant) { if (Attribute attr = getConstantAsAttr(constant)) { // These constants can be represented as attributes. - OpBuilder b(currentEntryBlock, currentEntryBlock->begin()); Type type = convertType(constant->getType()); if (auto symbolRef = attr.dyn_cast()) - return bEntry.create(UnknownLoc::get(context), type, - symbolRef.getValue()); - return bEntry.create(UnknownLoc::get(context), type, attr); + return builder.create(UnknownLoc::get(context), type, + symbolRef.getValue()); + return builder.create(UnknownLoc::get(context), type, attr); } if (auto *cn = dyn_cast(constant)) { Type type = convertType(cn->getType()); - return bEntry.create(UnknownLoc::get(context), type); + return builder.create(UnknownLoc::get(context), type); } if (auto *gv = dyn_cast(constant)) - return bEntry.create(UnknownLoc::get(context), - processGlobal(gv)); + return builder.create(UnknownLoc::get(context), + processGlobal(gv)); if (auto *ce = dyn_cast(constant)) { llvm::Instruction *i = ce->getAsInstruction(); - OpBuilder::InsertionGuard guard(builder); - builder.setInsertionPoint(currentEntryBlock, currentEntryBlock->begin()); if (failed(processInstruction(i))) return nullptr; assert(valueMapping.count(i)); @@ -720,7 +721,7 @@ Value Importer::processConstant(llvm::Constant *constant) { } if (auto *ue = dyn_cast(constant)) { Type type = convertType(ue->getType()); - return bEntry.create(UnknownLoc::get(context), type); + return builder.create(UnknownLoc::get(context), type); } if (isa(constant) || @@ -747,41 +748,62 @@ Value Importer::processConstant(llvm::Constant *constant) { bool useInsertValue = rootType.isa(); assert((useInsertValue || LLVM::isCompatibleVectorType(rootType)) && "unrecognized aggregate type"); - Value root = bEntry.create(UnknownLoc::get(context), rootType); + Value root = builder.create(UnknownLoc::get(context), rootType); for (unsigned i = 0; i < numElements; ++i) { llvm::Constant *element = getElement(i); - Value elementValue = processConstant(element); + Value elementValue = convertConstantInPlace(element); if (!elementValue) return nullptr; if (useInsertValue) { - root = bEntry.create(UnknownLoc::get(context), root, - elementValue, i); + root = builder.create(UnknownLoc::get(context), root, + elementValue, i); } else { - Attribute indexAttr = bEntry.getI32IntegerAttr(static_cast(i)); - Value indexValue = bEntry.create( - UnknownLoc::get(context), bEntry.getI32Type(), indexAttr); + Attribute indexAttr = + builder.getI32IntegerAttr(static_cast(i)); + Value indexValue = builder.create( + UnknownLoc::get(context), builder.getI32Type(), indexAttr); if (!indexValue) return nullptr; - root = bEntry.create( + root = builder.create( UnknownLoc::get(context), rootType, root, elementValue, indexValue); } } return root; } - emitError(UnknownLoc::get(context)) - << "unhandled constant: " << diag(*constant); return nullptr; } +Value Importer::convertConstant(llvm::Constant *constant) { + assert(constantInsertionBlock && + "expected the constant insertion block to be non-null"); + + // Insert the constant after the last one or at the start or the entry block. + OpBuilder::InsertionGuard guard(builder); + if (!constantInsertionOp) { + builder.setInsertionPointToStart(constantInsertionBlock); + } else { + builder.setInsertionPointAfter(constantInsertionOp); + } + + // Convert the constant in-place and update the insertion point if successful. + if (Value result = convertConstantInPlace(constant)) { + constantInsertionOp = result.getDefiningOp(); + return result; + } + + llvm::errs() << diag(*constant) << "\n"; + llvm_unreachable("unhandled constant"); +} + Value Importer::processValue(llvm::Value *value) { auto it = valueMapping.find(value); if (it != valueMapping.end()) return it->second; - // Process constants such as immediate arguments that have no mapping. + // Convert constants such as immediate arguments that have no mapping. if (auto *c = dyn_cast(value)) - return processConstant(c); + return convertConstant(c); llvm::errs() << diag(*value) << "\n"; llvm_unreachable("unhandled value"); @@ -927,7 +949,7 @@ LogicalResult Importer::processInstruction(llvm::Instruction *inst) { SmallVector ops; for (unsigned i = 0, ie = lpi->getNumClauses(); i < ie; i++) - ops.push_back(processConstant(lpi->getClause(i))); + ops.push_back(convertConstant(lpi->getClause(i))); Type ty = convertType(lpi->getType()); Value res = builder.create(loc, ty, lpi->isCleanup(), ops); @@ -1034,7 +1056,10 @@ LogicalResult Importer::processFunction(llvm::Function *func) { bool dsoLocal = func->hasLocalLinkage(); CConv cconv = convertCConvFromLLVM(func->getCallingConv()); - builder.setInsertionPoint(module.getBody(), getFuncInsertPt()); + // Insert the function at the end of the module. + OpBuilder::InsertionGuard guard(builder); + builder.setInsertionPoint(module.getBody(), module.getBody()->end()); + LLVMFuncOp funcOp = builder.create( UnknownLoc::get(context), func->getName(), functionType, convertLinkageFromLLVM(func->getLinkage()), dsoLocal, cconv); @@ -1090,7 +1115,6 @@ LogicalResult Importer::processFunction(llvm::Function *func) { builder.createBlock(&funcOp.getBody(), funcOp.getBody().end()); mapBlock(&bb, block); } - currentEntryBlock = &funcOp.getFunctionBody().getBlocks().front(); // Add function arguments to the entry block. for (const auto &it : llvm::enumerate(func->args())) { @@ -1103,6 +1127,7 @@ LogicalResult Importer::processFunction(llvm::Function *func) { // operands defined in a dominating block have a valid mapping to an MLIR // value once a block is translated. SetVector blocks = getTopologicallySortedBlocks(func); + setConstantInsertionPointToStart(lookupBlock(blocks.front())); for (llvm::BasicBlock *bb : blocks) { if (failed(processBasicBlock(bb, lookupBlock(bb)))) return failure(); diff --git a/mlir/lib/Target/LLVMIR/DebugTranslation.cpp b/mlir/lib/Target/LLVMIR/DebugTranslation.cpp index 6de8febdd56a8..3dbb3f719e5b3 100644 --- a/mlir/lib/Target/LLVMIR/DebugTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/DebugTranslation.cpp @@ -216,7 +216,7 @@ llvm::DINode *DebugTranslation::translate(DINodeAttr attr) { .Case( + DISubrangeAttr, DISubroutineTypeAttr>( [&](auto attr) { return translateImpl(attr); }); attrToNode.insert({attr, node}); return node; diff --git a/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp index 1f89a55ee363e..abc2fadbbc9ac 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp @@ -258,6 +258,70 @@ static SmallVector extractPosition(ArrayRef indices) { return position; } +/// Get the declaration of an overloaded llvm intrinsic. First we get the +/// overloaded argument types and/or result type from the CallIntrinsicOp, and +/// then use those to get the correct declaration of the overloaded intrinsic. +static FailureOr +getOverloadedDeclaration(CallIntrinsicOp &op, llvm::Intrinsic::ID id, + llvm::Module *module, + LLVM::ModuleTranslation &moduleTranslation) { + SmallVector allArgTys; + for (Type type : op->getOperandTypes()) + allArgTys.push_back(moduleTranslation.convertType(type)); + + llvm::Type *resTy; + if (op.getNumResults() == 0) + resTy = llvm::Type::getVoidTy(module->getContext()); + else + resTy = moduleTranslation.convertType(op.getResult(0).getType()); + + // ATM we do not support variadic intrinsics. + llvm::FunctionType *ft = llvm::FunctionType::get(resTy, allArgTys, false); + + SmallVector table; + getIntrinsicInfoTableEntries(id, table); + ArrayRef tableRef = table; + + SmallVector overloadedArgTys; + if (llvm::Intrinsic::matchIntrinsicSignature(ft, tableRef, + overloadedArgTys) != + llvm::Intrinsic::MatchIntrinsicTypesResult::MatchIntrinsicTypes_Match) { + return op.emitOpError("intrinsic type is not a match"); + } + + ArrayRef overloadedArgTysRef = overloadedArgTys; + return llvm::Intrinsic::getDeclaration(module, id, overloadedArgTysRef); +} + +/// Builder for LLVM_CallIntrinsicOp +static LogicalResult +convertCallLLVMIntrinsicOp(CallIntrinsicOp &op, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + llvm::Module *module = builder.GetInsertBlock()->getModule(); + llvm::Intrinsic::ID id = + llvm::Function::lookupIntrinsicID(op.getIntrinAttr()); + if (!id) + return op.emitOpError() + << "couldn't find intrinsic: " << op.getIntrinAttr(); + + llvm::Function *fn = nullptr; + if (llvm::Intrinsic::isOverloaded(id)) { + auto fnOrFailure = + getOverloadedDeclaration(op, id, module, moduleTranslation); + if (failed(fnOrFailure)) + return failure(); + fn = fnOrFailure.value(); + } else { + fn = llvm::Intrinsic::getDeclaration(module, id, {}); + } + + auto *inst = + builder.CreateCall(fn, moduleTranslation.lookupValues(op.getOperands())); + if (op.getNumResults() == 1) + moduleTranslation.mapValue(op->getResults().front()) = inst; + return success(); +} + static LogicalResult convertOperationImpl(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { @@ -272,8 +336,8 @@ convertOperationImpl(Operation &opInst, llvm::IRBuilderBase &builder, // Emit function calls. If the "callee" attribute is present, this is a // direct function call and we also need to look up the remapped function // itself. Otherwise, this is an indirect call and the callee is the first - // operand, look it up as a normal value. Return the llvm::Value representing - // the function result, which may be of llvm::VoidTy type. + // operand, look it up as a normal value. Return the llvm::Value + // representing the function result, which may be of llvm::VoidTy type. auto convertCall = [&](Operation &op) -> llvm::Value * { auto operands = moduleTranslation.lookupValues(op.getOperands()); ArrayRef operandsRef(operands); @@ -404,8 +468,8 @@ convertOperationImpl(Operation &opInst, llvm::IRBuilderBase &builder, return success(); } - // Emit branches. We need to look up the remapped blocks and ignore the block - // arguments that were transformed into PHI nodes. + // Emit branches. We need to look up the remapped blocks and ignore the + // block arguments that were transformed into PHI nodes. if (auto brOp = dyn_cast(opInst)) { llvm::BranchInst *branch = builder.CreateBr(moduleTranslation.lookupBlock(brOp.getSuccessor())); diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp index d1cdc77e4b968..7c0e3efe8e0b6 100644 --- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp @@ -826,107 +826,9 @@ LogicalResult ModuleTranslation::convertOneFunction(LLVMFuncOp func) { debugTranslation->translate(func, *llvmFunc); // Add function arguments to the value remapping table. - // If there was noalias info then we decorate each argument accordingly. - unsigned int argIdx = 0; - for (auto kvp : llvm::zip(func.getArguments(), llvmFunc->args())) { - llvm::Argument &llvmArg = std::get<1>(kvp); - BlockArgument mlirArg = std::get<0>(kvp); - - if (auto attr = func.getArgAttrOfType( - argIdx, LLVMDialect::getNoAliasAttrName())) { - // NB: Attribute already verified to be boolean, so check if we can indeed - // attach the attribute to this argument, based on its type. - auto argTy = mlirArg.getType(); - if (!argTy.isa()) - return func.emitError( - "llvm.noalias attribute attached to LLVM non-pointer argument"); - llvmArg.addAttr(llvm::Attribute::AttrKind::NoAlias); - } - - if (auto attr = func.getArgAttrOfType( - argIdx, LLVMDialect::getAlignAttrName())) { - // NB: Attribute already verified to be int, so check if we can indeed - // attach the attribute to this argument, based on its type. - auto argTy = mlirArg.getType(); - if (!argTy.isa()) - return func.emitError( - "llvm.align attribute attached to LLVM non-pointer argument"); - llvmArg.addAttrs(llvm::AttrBuilder(llvmArg.getContext()) - .addAlignmentAttr(llvm::Align(attr.getInt()))); - } - - if (auto attr = func.getArgAttrOfType( - argIdx, LLVMDialect::getStructRetAttrName())) { - auto argTy = mlirArg.getType().dyn_cast(); - if (!argTy) - return func.emitError( - "llvm.sret attribute attached to LLVM non-pointer argument"); - if (!argTy.isOpaque() && argTy.getElementType() != attr.getValue()) - return func.emitError("llvm.sret attribute attached to LLVM pointer " - "argument of a different type"); - llvmArg.addAttrs(llvm::AttrBuilder(llvmArg.getContext()) - .addStructRetAttr(convertType(attr.getValue()))); - } - - if (auto attr = func.getArgAttrOfType( - argIdx, LLVMDialect::getByValAttrName())) { - auto argTy = mlirArg.getType().dyn_cast(); - if (!argTy) - return func.emitError( - "llvm.byval attribute attached to LLVM non-pointer argument"); - if (!argTy.isOpaque() && argTy.getElementType() != attr.getValue()) - return func.emitError("llvm.byval attribute attached to LLVM pointer " - "argument of a different type"); - llvmArg.addAttrs(llvm::AttrBuilder(llvmArg.getContext()) - .addByValAttr(convertType(attr.getValue()))); - } - - if (auto attr = func.getArgAttrOfType( - argIdx, LLVMDialect::getByRefAttrName())) { - auto argTy = mlirArg.getType().dyn_cast(); - if (!argTy) - return func.emitError( - "llvm.byref attribute attached to LLVM non-pointer argument"); - if (!argTy.isOpaque() && argTy.getElementType() != attr.getValue()) - return func.emitError("llvm.byref attribute attached to LLVM pointer " - "argument of a different type"); - llvmArg.addAttrs(llvm::AttrBuilder(llvmArg.getContext()) - .addByRefAttr(convertType(attr.getValue()))); - } - - if (auto attr = func.getArgAttrOfType( - argIdx, LLVMDialect::getInAllocaAttrName())) { - auto argTy = mlirArg.getType().dyn_cast(); - if (!argTy) - return func.emitError( - "llvm.inalloca attribute attached to LLVM non-pointer argument"); - if (!argTy.isOpaque() && argTy.getElementType() != attr.getValue()) - return func.emitError( - "llvm.inalloca attribute attached to LLVM pointer " - "argument of a different type"); - llvmArg.addAttrs(llvm::AttrBuilder(llvmArg.getContext()) - .addInAllocaAttr(convertType(attr.getValue()))); - } - - if (auto attr = func.getArgAttrOfType(argIdx, "llvm.nest")) { - auto argTy = mlirArg.getType(); - if (!argTy.isa()) - return func.emitError( - "llvm.nest attribute attached to LLVM non-pointer argument"); - llvmArg.addAttrs(llvm::AttrBuilder(llvmArg.getContext()) - .addAttribute(llvm::Attribute::Nest)); - } - - if (auto attr = func.getArgAttrOfType( - argIdx, LLVMDialect::getNoUndefAttrName())) { - // llvm.noundef can be added to any argument type. - llvmArg.addAttrs(llvm::AttrBuilder(llvmArg.getContext()) - .addAttribute(llvm::Attribute::NoUndef)); - } - + for (auto [mlirArg, llvmArg] : + llvm::zip(func.getArguments(), llvmFunc->args())) mapValue(mlirArg, &llvmArg); - argIdx++; - } // Check the personality and set it. if (func.getPersonality()) { @@ -986,6 +888,124 @@ LogicalResult ModuleTranslation::convertFunctionSignatures() { if (function->getAttrOfType(LLVMDialect::getReadnoneAttrName())) llvmFunc->setDoesNotAccessMemory(); + // Convert argument attributes. + unsigned int argIdx = 0; + for (auto [mlirArgTy, llvmArg] : + llvm::zip(function.getArgumentTypes(), llvmFunc->args())) { + if (auto attr = function.getArgAttrOfType( + argIdx, LLVMDialect::getNoAliasAttrName())) { + // NB: Attribute already verified to be boolean, so check if we can + // indeed attach the attribute to this argument, based on its type. + if (!mlirArgTy.isa()) + return function.emitError( + "llvm.noalias attribute attached to LLVM non-pointer argument"); + llvmArg.addAttr(llvm::Attribute::AttrKind::NoAlias); + } + + if (auto attr = function.getArgAttrOfType( + argIdx, LLVMDialect::getAlignAttrName())) { + // NB: Attribute already verified to be int, so check if we can indeed + // attach the attribute to this argument, based on its type. + if (!mlirArgTy.isa()) + return function.emitError( + "llvm.align attribute attached to LLVM non-pointer argument"); + llvmArg.addAttrs(llvm::AttrBuilder(llvmArg.getContext()) + .addAlignmentAttr(llvm::Align(attr.getInt()))); + } + + if (auto attr = function.getArgAttrOfType( + argIdx, LLVMDialect::getStructRetAttrName())) { + auto argTy = mlirArgTy.dyn_cast(); + if (!argTy) + return function.emitError( + "llvm.sret attribute attached to LLVM non-pointer argument"); + if (!argTy.isOpaque() && argTy.getElementType() != attr.getValue()) + return function.emitError( + "llvm.sret attribute attached to LLVM pointer " + "argument of a different type"); + llvmArg.addAttrs(llvm::AttrBuilder(llvmArg.getContext()) + .addStructRetAttr(convertType(attr.getValue()))); + } + + if (auto attr = function.getArgAttrOfType( + argIdx, LLVMDialect::getByValAttrName())) { + auto argTy = mlirArgTy.dyn_cast(); + if (!argTy) + return function.emitError( + "llvm.byval attribute attached to LLVM non-pointer argument"); + if (!argTy.isOpaque() && argTy.getElementType() != attr.getValue()) + return function.emitError( + "llvm.byval attribute attached to LLVM pointer " + "argument of a different type"); + llvmArg.addAttrs(llvm::AttrBuilder(llvmArg.getContext()) + .addByValAttr(convertType(attr.getValue()))); + } + + if (auto attr = function.getArgAttrOfType( + argIdx, LLVMDialect::getByRefAttrName())) { + auto argTy = mlirArgTy.dyn_cast(); + if (!argTy) + return function.emitError( + "llvm.byref attribute attached to LLVM non-pointer argument"); + if (!argTy.isOpaque() && argTy.getElementType() != attr.getValue()) + return function.emitError( + "llvm.byref attribute attached to LLVM pointer " + "argument of a different type"); + llvmArg.addAttrs(llvm::AttrBuilder(llvmArg.getContext()) + .addByRefAttr(convertType(attr.getValue()))); + } + + if (auto attr = function.getArgAttrOfType( + argIdx, LLVMDialect::getInAllocaAttrName())) { + auto argTy = mlirArgTy.dyn_cast(); + if (!argTy) + return function.emitError( + "llvm.inalloca attribute attached to LLVM non-pointer argument"); + if (!argTy.isOpaque() && argTy.getElementType() != attr.getValue()) + return function.emitError( + "llvm.inalloca attribute attached to LLVM pointer " + "argument of a different type"); + llvmArg.addAttrs(llvm::AttrBuilder(llvmArg.getContext()) + .addInAllocaAttr(convertType(attr.getValue()))); + } + + if (auto attr = + function.getArgAttrOfType(argIdx, "llvm.nest")) { + if (!mlirArgTy.isa()) + return function.emitError( + "llvm.nest attribute attached to LLVM non-pointer argument"); + llvmArg.addAttrs(llvm::AttrBuilder(llvmArg.getContext()) + .addAttribute(llvm::Attribute::Nest)); + } + + if (auto attr = function.getArgAttrOfType( + argIdx, LLVMDialect::getNoUndefAttrName())) { + // llvm.noundef can be added to any argument type. + llvmArg.addAttrs(llvm::AttrBuilder(llvmArg.getContext()) + .addAttribute(llvm::Attribute::NoUndef)); + } + if (auto attr = function.getArgAttrOfType( + argIdx, LLVMDialect::getSExtAttrName())) { + // llvm.signext can be added to any integer argument type. + if (!mlirArgTy.isa()) + return function.emitError( + "llvm.signext attribute attached to LLVM non-integer argument"); + llvmArg.addAttrs(llvm::AttrBuilder(llvmArg.getContext()) + .addAttribute(llvm::Attribute::SExt)); + } + if (auto attr = function.getArgAttrOfType( + argIdx, LLVMDialect::getZExtAttrName())) { + // llvm.zeroext can be added to any integer argument type. + if (!mlirArgTy.isa()) + return function.emitError( + "llvm.zeroext attribute attached to LLVM non-integer argument"); + llvmArg.addAttrs(llvm::AttrBuilder(llvmArg.getContext()) + .addAttribute(llvm::Attribute::ZExt)); + } + + ++argIdx; + } + // Forward the pass-through attributes to LLVM. if (failed(forwardPassthroughAttributes( function.getLoc(), function.getPassthrough(), llvmFunc))) diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index 505127c459656..61bc4ffbe6f28 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -3056,6 +3056,29 @@ auto TypeConverter::convertBlockSignature(Block *block) // FunctionOpInterfaceSignatureConversion //===----------------------------------------------------------------------===// +static LogicalResult convertFuncOpTypes(FunctionOpInterface funcOp, + TypeConverter &typeConverter, + ConversionPatternRewriter &rewriter) { + FunctionType type = funcOp.getFunctionType().cast(); + + // Convert the original function types. + TypeConverter::SignatureConversion result(type.getNumInputs()); + SmallVector newResults; + if (failed(typeConverter.convertSignatureArgs(type.getInputs(), result)) || + failed(typeConverter.convertTypes(type.getResults(), newResults)) || + failed(rewriter.convertRegionTypes(&funcOp.getFunctionBody(), + typeConverter, &result))) + return failure(); + + // Update the function signature in-place. + auto newType = FunctionType::get(rewriter.getContext(), + result.getConvertedTypes(), newResults); + + rewriter.updateRootInPlace(funcOp, [&] { funcOp.setType(newType); }); + + return success(); +} + /// Create a default conversion pattern that rewrites the type signature of a /// FunctionOpInterface op. This only supports ops which use FunctionType to /// represent their type. @@ -3067,27 +3090,21 @@ struct FunctionOpInterfaceSignatureConversion : public ConversionPattern { : ConversionPattern(converter, functionLikeOpName, /*benefit=*/1, ctx) {} LogicalResult - matchAndRewrite(Operation *op, ArrayRef operands, + matchAndRewrite(Operation *op, ArrayRef /*operands*/, ConversionPatternRewriter &rewriter) const override { FunctionOpInterface funcOp = cast(op); - FunctionType type = funcOp.getFunctionType().cast(); - - // Convert the original function types. - TypeConverter::SignatureConversion result(type.getNumInputs()); - SmallVector newResults; - if (failed(typeConverter->convertSignatureArgs(type.getInputs(), result)) || - failed(typeConverter->convertTypes(type.getResults(), newResults)) || - failed(rewriter.convertRegionTypes(&funcOp.getFunctionBody(), - *typeConverter, &result))) - return failure(); - - // Update the function signature in-place. - auto newType = FunctionType::get(rewriter.getContext(), - result.getConvertedTypes(), newResults); + return convertFuncOpTypes(funcOp, *typeConverter, rewriter); + } +}; - rewriter.updateRootInPlace(op, [&] { funcOp.setType(newType); }); +struct AnyFunctionOpInterfaceSignatureConversion + : public OpInterfaceConversionPattern { + using OpInterfaceConversionPattern::OpInterfaceConversionPattern; - return success(); + LogicalResult + matchAndRewrite(FunctionOpInterface funcOp, ArrayRef /*operands*/, + ConversionPatternRewriter &rewriter) const override { + return convertFuncOpTypes(funcOp, *typeConverter, rewriter); } }; } // namespace @@ -3099,6 +3116,12 @@ void mlir::populateFunctionOpInterfaceTypeConversionPattern( functionLikeOpName, patterns.getContext(), converter); } +void mlir::populateAnyFunctionOpInterfaceTypeConversionPattern( + RewritePatternSet &patterns, TypeConverter &converter) { + patterns.add( + converter, patterns.getContext()); +} + //===----------------------------------------------------------------------===// // ConversionTarget //===----------------------------------------------------------------------===// diff --git a/mlir/test/Analysis/test-alias-analysis-modref.mlir b/mlir/test/Analysis/test-alias-analysis-modref.mlir index 7c5328cb7f750..eee8ae9049cfa 100644 --- a/mlir/test/Analysis/test-alias-analysis-modref.mlir +++ b/mlir/test/Analysis/test-alias-analysis-modref.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline='func.func(test-alias-analysis-modref)' -split-input-file -allow-unregistered-dialect 2>&1 | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-alias-analysis-modref))' -split-input-file -allow-unregistered-dialect 2>&1 | FileCheck %s // CHECK-LABEL: Testing : "no_side_effects" // CHECK: alloc -> func.region0#0: NoModRef diff --git a/mlir/test/Analysis/test-alias-analysis.mlir b/mlir/test/Analysis/test-alias-analysis.mlir index 0e19282dbc9e9..8cbee61c78b45 100644 --- a/mlir/test/Analysis/test-alias-analysis.mlir +++ b/mlir/test/Analysis/test-alias-analysis.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline='func.func(test-alias-analysis)' -split-input-file -allow-unregistered-dialect 2>&1 | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-alias-analysis))' -split-input-file -allow-unregistered-dialect 2>&1 | FileCheck %s // CHECK-LABEL: Testing : "simple" // CHECK-DAG: func.region0#0 <-> func.region0#1: MayAlias diff --git a/mlir/test/Analysis/test-dominance.mlir b/mlir/test/Analysis/test-dominance.mlir index b7734151a516f..3c53193db7f72 100644 --- a/mlir/test/Analysis/test-dominance.mlir +++ b/mlir/test/Analysis/test-dominance.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(test-print-dominance)" -split-input-file 2>&1 | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(test-print-dominance))" -split-input-file 2>&1 | FileCheck %s // CHECK-LABEL: Testing : func_condBranch func.func @func_condBranch(%cond : i1) { diff --git a/mlir/test/Analysis/test-foo-analysis.mlir b/mlir/test/Analysis/test-foo-analysis.mlir index 7c5d07396a83f..83ac5b5796135 100644 --- a/mlir/test/Analysis/test-foo-analysis.mlir +++ b/mlir/test/Analysis/test-foo-analysis.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -split-input-file -pass-pipeline='func.func(test-foo-analysis)' %s 2>&1 | FileCheck %s +// RUN: mlir-opt -split-input-file -pass-pipeline='builtin.module(func.func(test-foo-analysis))' %s 2>&1 | FileCheck %s // CHECK-LABEL: function: @test_default_init func.func @test_default_init() -> () { diff --git a/mlir/test/Analysis/test-liveness.mlir b/mlir/test/Analysis/test-liveness.mlir index 02179d6472d54..8ae3d09a6cd12 100644 --- a/mlir/test/Analysis/test-liveness.mlir +++ b/mlir/test/Analysis/test-liveness.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(test-print-liveness)" -split-input-file 2>&1 | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(test-print-liveness))" -split-input-file 2>&1 | FileCheck %s // CHECK-LABEL: Testing : func_empty func.func @func_empty() { diff --git a/mlir/test/Analysis/test-match-reduction.mlir b/mlir/test/Analysis/test-match-reduction.mlir index ecc74c6cc686c..b5902db77e899 100644 --- a/mlir/test/Analysis/test-match-reduction.mlir +++ b/mlir/test/Analysis/test-match-reduction.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(test-match-reduction)" -verify-diagnostics -split-input-file +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(test-match-reduction))" -verify-diagnostics -split-input-file // Verify that the generic reduction detection utility works on different // dialects. diff --git a/mlir/test/Analysis/test-topoligical-sort.mlir b/mlir/test/Analysis/test-topoligical-sort.mlir index 9323c486f8a11..8608586402055 100644 --- a/mlir/test/Analysis/test-topoligical-sort.mlir +++ b/mlir/test/Analysis/test-topoligical-sort.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(test-print-topological-sort)" 2>&1 | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(test-print-topological-sort))" 2>&1 | FileCheck %s // CHECK-LABEL: Testing : region // CHECK: arith.addi {{.*}} : index diff --git a/mlir/test/CAPI/pass.c b/mlir/test/CAPI/pass.c index 5b04d749b1cdc..87430b9e47978 100644 --- a/mlir/test/CAPI/pass.c +++ b/mlir/test/CAPI/pass.c @@ -182,7 +182,8 @@ void testParsePassPipeline() { MlirLogicalResult status = mlirParsePassPipeline( mlirPassManagerGetAsOpPassManager(pm), mlirStringRefCreateFromCString( - "builtin.module(func.func(print-op-stats{json=false}))")); + "builtin.module(func.func(print-op-stats{json=false}))"), + printToStderr, NULL); // Expect a failure, we haven't registered the print-op-stats pass yet. if (mlirLogicalResultIsSuccess(status)) { fprintf( @@ -195,7 +196,8 @@ void testParsePassPipeline() { status = mlirParsePassPipeline( mlirPassManagerGetAsOpPassManager(pm), mlirStringRefCreateFromCString( - "builtin.module(func.func(print-op-stats{json=false}))")); + "builtin.module(func.func(print-op-stats{json=false}))"), + printToStderr, NULL); // Expect a failure, we haven't registered the print-op-stats pass yet. if (mlirLogicalResultIsFailure(status)) { fprintf(stderr, @@ -203,9 +205,7 @@ void testParsePassPipeline() { exit(EXIT_FAILURE); } - // CHECK: Round-trip: builtin.module( - // CHECK-SAME: builtin.module(func.func(print-op-stats{json=false})) - // CHECK-SAME: ) + // CHECK: Round-trip: builtin.module(func.func(print-op-stats{json=false})) fprintf(stderr, "Round-trip: "); mlirPrintPassPipeline(mlirPassManagerGetAsOpPassManager(pm), printToStderr, NULL); @@ -221,7 +221,7 @@ void testParsePassPipeline() { exit(EXIT_FAILURE); } // CHECK: Appended: builtin.module( - // CHECK-SAME: builtin.module(func.func(print-op-stats{json=false})), + // CHECK-SAME: func.func(print-op-stats{json=false}), // CHECK-SAME: func.func(print-op-stats{json=false}) // CHECK-SAME: ) fprintf(stderr, "Appended: "); @@ -242,6 +242,14 @@ void testParseErrorCapture() { MlirOpPassManager opm = mlirPassManagerGetAsOpPassManager(pm); MlirStringRef invalidPipeline = mlirStringRefCreateFromCString("invalid"); + // CHECK: mlirParsePassPipeline: + // CHECK: expected pass pipeline to be wrapped with the anchor operation type + fprintf(stderr, "mlirParsePassPipeline:\n"); + if (mlirLogicalResultIsSuccess( + mlirParsePassPipeline(opm, invalidPipeline, printToStderr, NULL))) + exit(EXIT_FAILURE); + fprintf(stderr, "\n"); + // CHECK: mlirOpPassManagerAddPipeline: // CHECK: 'invalid' does not refer to a registered pass or pass pipeline fprintf(stderr, "mlirOpPassManagerAddPipeline:\n"); @@ -253,6 +261,9 @@ void testParseErrorCapture() { // Make sure all output is going through the callback. // CHECK: dontPrint: <> fprintf(stderr, "dontPrint: <"); + if (mlirLogicalResultIsSuccess( + mlirParsePassPipeline(opm, invalidPipeline, dontPrint, NULL))) + exit(EXIT_FAILURE); if (mlirLogicalResultIsSuccess( mlirOpPassManagerAddPipeline(opm, invalidPipeline, dontPrint, NULL))) exit(EXIT_FAILURE); diff --git a/mlir/test/Conversion/AffineToStandard/lower-affine-gpu.mlir b/mlir/test/Conversion/AffineToStandard/lower-affine-gpu.mlir index 382c3359278e0..d88f71eac4458 100644 --- a/mlir/test/Conversion/AffineToStandard/lower-affine-gpu.mlir +++ b/mlir/test/Conversion/AffineToStandard/lower-affine-gpu.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -pass-pipeline="gpu.module(lower-affine)" %s | FileCheck %s +// RUN: mlir-opt -pass-pipeline="builtin.module(gpu.module(lower-affine))" %s | FileCheck %s #map0gpufunc = affine_map<(d0) -> (d0)> gpu.module @kernels { diff --git a/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir b/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir index 81f402195fb4f..d8e49a55c2ad7 100644 --- a/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir +++ b/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -pass-pipeline="func.func(convert-arith-to-llvm)" %s -split-input-file | FileCheck %s +// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-arith-to-llvm))" %s -split-input-file | FileCheck %s // CHECK-LABEL: @vector_ops func.func @vector_ops(%arg0: vector<4xf32>, %arg1: vector<4xi1>, %arg2: vector<4xi64>, %arg3: vector<4xi64>) -> vector<4xf32> { @@ -429,22 +429,22 @@ func.func @select(%arg0 : i1, %arg1 : i32, %arg2 : i32) -> i32 { // CHECK-LABEL: @minmaxi func.func @minmaxi(%arg0 : i32, %arg1 : i32) -> i32 { - // CHECK: = "llvm.intr.smin"(%arg0, %arg1) : (i32, i32) -> i32 + // CHECK: = llvm.intr.smin(%arg0, %arg1) : (i32, i32) -> i32 %0 = arith.minsi %arg0, %arg1 : i32 - // CHECK: = "llvm.intr.smax"(%arg0, %arg1) : (i32, i32) -> i32 + // CHECK: = llvm.intr.smax(%arg0, %arg1) : (i32, i32) -> i32 %1 = arith.maxsi %arg0, %arg1 : i32 - // CHECK: = "llvm.intr.umin"(%arg0, %arg1) : (i32, i32) -> i32 + // CHECK: = llvm.intr.umin(%arg0, %arg1) : (i32, i32) -> i32 %2 = arith.minui %arg0, %arg1 : i32 - // CHECK: = "llvm.intr.umax"(%arg0, %arg1) : (i32, i32) -> i32 + // CHECK: = llvm.intr.umax(%arg0, %arg1) : (i32, i32) -> i32 %3 = arith.maxui %arg0, %arg1 : i32 return %0 : i32 } // CHECK-LABEL: @minmaxf func.func @minmaxf(%arg0 : f32, %arg1 : f32) -> f32 { - // CHECK: = "llvm.intr.minnum"(%arg0, %arg1) : (f32, f32) -> f32 + // CHECK: = llvm.intr.minnum(%arg0, %arg1) : (f32, f32) -> f32 %0 = arith.minf %arg0, %arg1 : f32 - // CHECK: = "llvm.intr.maxnum"(%arg0, %arg1) : (f32, f32) -> f32 + // CHECK: = llvm.intr.maxnum(%arg0, %arg1) : (f32, f32) -> f32 %1 = arith.maxf %arg0, %arg1 : f32 return %0 : f32 } @@ -453,11 +453,11 @@ func.func @minmaxf(%arg0 : f32, %arg1 : f32) -> f32 { // CHECK-LABEL: @fastmath func.func @fastmath(%arg0: f32, %arg1: f32, %arg2: i32) { -// CHECK: {{.*}} = llvm.fadd %arg0, %arg1 {fastmathFlags = #llvm.fastmath} : f32 -// CHECK: {{.*}} = llvm.fmul %arg0, %arg1 {fastmathFlags = #llvm.fastmath} : f32 -// CHECK: {{.*}} = llvm.fneg %arg0 {fastmathFlags = #llvm.fastmath} : f32 -// CHECK: {{.*}} = llvm.fadd %arg0, %arg1 : f32 -// CHECK: {{.*}} = llvm.fadd %arg0, %arg1 {fastmathFlags = #llvm.fastmath} : f32 +// CHECK: llvm.fadd %arg0, %arg1 {fastmathFlags = #llvm.fastmath} : f32 +// CHECK: llvm.fmul %arg0, %arg1 {fastmathFlags = #llvm.fastmath} : f32 +// CHECK: llvm.fneg %arg0 {fastmathFlags = #llvm.fastmath} : f32 +// CHECK: llvm.fadd %arg0, %arg1 : f32 +// CHECK: llvm.fadd %arg0, %arg1 {fastmathFlags = #llvm.fastmath} : f32 %0 = arith.addf %arg0, %arg1 fastmath : f32 %1 = arith.mulf %arg0, %arg1 fastmath : f32 %2 = arith.negf %arg0 fastmath : f32 @@ -465,3 +465,26 @@ func.func @fastmath(%arg0: f32, %arg1: f32, %arg2: i32) { %4 = arith.addf %arg0, %arg1 fastmath : f32 return } + +// ----- + +// CHECK-LABEL: @ops_supporting_fastmath +func.func @ops_supporting_fastmath(%arg0: f32, %arg1: f32, %arg2: i32) { +// CHECK: llvm.fadd %arg0, %arg1 {fastmathFlags = #llvm.fastmath} : f32 + %0 = arith.addf %arg0, %arg1 fastmath : f32 +// CHECK: llvm.fdiv %arg0, %arg1 {fastmathFlags = #llvm.fastmath} : f32 + %1 = arith.divf %arg0, %arg1 fastmath : f32 +// CHECK: llvm.intr.maxnum(%arg0, %arg1) {fastmathFlags = #llvm.fastmath} : (f32, f32) -> f32 + %2 = arith.maxf %arg0, %arg1 fastmath : f32 +// CHECK: llvm.intr.minnum(%arg0, %arg1) {fastmathFlags = #llvm.fastmath} : (f32, f32) -> f32 + %3 = arith.minf %arg0, %arg1 fastmath : f32 +// CHECK: llvm.fmul %arg0, %arg1 {fastmathFlags = #llvm.fastmath} : f32 + %4 = arith.mulf %arg0, %arg1 fastmath : f32 +// CHECK: llvm.fneg %arg0 {fastmathFlags = #llvm.fastmath} : f32 + %5 = arith.negf %arg0 fastmath : f32 +// CHECK: llvm.frem %arg0, %arg1 {fastmathFlags = #llvm.fastmath} : f32 + %6 = arith.remf %arg0, %arg1 fastmath : f32 +// CHECK: llvm.fsub %arg0, %arg1 {fastmathFlags = #llvm.fastmath} : f32 + %7 = arith.subf %arg0, %arg1 fastmath : f32 + return +} diff --git a/mlir/test/Conversion/ArithToLLVM/convert-nd-vector-to-llvmir.mlir b/mlir/test/Conversion/ArithToLLVM/convert-nd-vector-to-llvmir.mlir index 7c219578baedd..63989347567b5 100644 --- a/mlir/test/Conversion/ArithToLLVM/convert-nd-vector-to-llvmir.mlir +++ b/mlir/test/Conversion/ArithToLLVM/convert-nd-vector-to-llvmir.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -pass-pipeline="func.func(convert-arith-to-llvm)" %s -split-input-file | FileCheck %s +// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-arith-to-llvm))" %s -split-input-file | FileCheck %s // CHECK-LABEL: @vec_bin func.func @vec_bin(%arg0: vector<2x2x2xf32>) -> vector<2x2x2xf32> { diff --git a/mlir/test/Conversion/ArithToSPIRV/arith-to-spirv-unsupported.mlir b/mlir/test/Conversion/ArithToSPIRV/arith-to-spirv-unsupported.mlir index 967adbc84a3bb..f6e84e80bbf51 100644 --- a/mlir/test/Conversion/ArithToSPIRV/arith-to-spirv-unsupported.mlir +++ b/mlir/test/Conversion/ArithToSPIRV/arith-to-spirv-unsupported.mlir @@ -49,7 +49,15 @@ func.func @int_vector4_invalid(%arg0: vector<2xi16>) { // ----- -func.func @unsupported_constant_0() { +func.func @unsupported_constant_i64_0() { + // expected-error @+1 {{failed to legalize operation 'arith.constant'}} + %0 = arith.constant 0 : i64 + return +} + +// ----- + +func.func @unsupported_constant_i64_1() { // expected-error @+1 {{failed to legalize operation 'arith.constant'}} %0 = arith.constant 4294967296 : i64 // 2^32 return @@ -57,16 +65,68 @@ func.func @unsupported_constant_0() { // ----- -func.func @unsupported_constant_1() { +func.func @unsupported_constant_vector_2xi64_0() { + // expected-error @+1 {{failed to legalize operation 'arith.constant'}} + %1 = arith.constant dense<0> : vector<2xi64> + return +} + +// ----- + +func.func @unsupported_constant_f64_0() { // expected-error @+1 {{failed to legalize operation 'arith.constant'}} - %1 = arith.constant -2147483649 : i64 // -2^31 - 1 + %1 = arith.constant 0.0 : f64 return } // ----- -func.func @unsupported_constant_2() { +func.func @unsupported_constant_vector_2xf64_0() { // expected-error @+1 {{failed to legalize operation 'arith.constant'}} - %2 = arith.constant -2147483649 : i64 // -2^31 - 1 + %1 = arith.constant dense<0.0> : vector<2xf64> return } + +// ----- + +func.func @unsupported_constant_tensor_2xf64_0() { + // expected-error @+1 {{failed to legalize operation 'arith.constant'}} + %1 = arith.constant dense<0.0> : tensor<2xf64> + return +} + +///===----------------------------------------------------------------------===// +// Type emulation +//===----------------------------------------------------------------------===// + +// ----- + +module attributes { + spirv.target_env = #spirv.target_env< + #spirv.vce, #spirv.resource_limits<>> +} { + +// Check that we do not emualte i64 by truncating to i32. +func.func @unsupported_i64(%arg0: i64) { + // expected-error@+1 {{failed to legalize operation 'arith.addi'}} + %2 = arith.addi %arg0, %arg0: i64 + return +} + +} // end module + +// ----- + +module attributes { + spirv.target_env = #spirv.target_env< + #spirv.vce, #spirv.resource_limits<>> +} { + +// Check that we do not emualte f64 by truncating to i32. +func.func @unsupported_f64(%arg0: f64) { + // expected-error@+1 {{failed to legalize operation 'arith.addf'}} + %2 = arith.addf %arg0, %arg0: f64 + return +} + +} // end module diff --git a/mlir/test/Conversion/ArithToSPIRV/arith-to-spirv.mlir b/mlir/test/Conversion/ArithToSPIRV/arith-to-spirv.mlir index df6806a0e4bd1..2f7fb592c896a 100644 --- a/mlir/test/Conversion/ArithToSPIRV/arith-to-spirv.mlir +++ b/mlir/test/Conversion/ArithToSPIRV/arith-to-spirv.mlir @@ -282,6 +282,15 @@ func.func @cmpf(%arg0 : f32, %arg1 : f32) { return } +// CHECK-LABEL: @vec1cmpf +func.func @vec1cmpf(%arg0 : vector<1xf32>, %arg1 : vector<1xf32>) { + // CHECK: spirv.FOrdGreaterThan + %0 = arith.cmpf ogt, %arg0, %arg1 : vector<1xf32> + // CHECK: spirv.FUnordLessThan + %1 = arith.cmpf ult, %arg0, %arg1 : vector<1xf32> + return +} + } // end module // ----- @@ -466,9 +475,9 @@ func.func @constant() { // CHECK: spirv.Constant dense<{{\[}}1, 2, 3, 4, 5, 6]> : tensor<6xi32> : !spirv.array<6 x i32> %8 = arith.constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> // CHECK: spirv.Constant dense<{{\[}}1, 2, 3, 4, 5, 6]> : tensor<6xi32> : !spirv.array<6 x i32> - %9 = arith.constant dense<[[1, 2], [3, 4], [5, 6]]> : tensor<3x2xi32> + %9 = arith.constant dense<[[1, 2], [3, 4], [5, 6]]> : tensor<3x2xi32> // CHECK: spirv.Constant dense<{{\[}}1, 2, 3, 4, 5, 6]> : tensor<6xi32> : !spirv.array<6 x i32> - %10 = arith.constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi32> + %10 = arith.constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi32> return } @@ -513,7 +522,7 @@ func.func @constant_size1() { // ----- -// Check that constants are converted to 32-bit when no special capability. +// Check that constants are widened to 32-bit when no special capability. module attributes { spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> } { @@ -533,51 +542,26 @@ func.func @constant_16bit() { return } -// CHECK-LABEL: @constant_64bit -func.func @constant_64bit() { - // CHECK: spirv.Constant 4 : i32 - %0 = arith.constant 4 : i64 - // CHECK: spirv.Constant 5.000000e+00 : f32 - %1 = arith.constant 5.0 : f64 - // CHECK: spirv.Constant dense<[2, 3]> : vector<2xi32> - %2 = arith.constant dense<[2, 3]> : vector<2xi64> - // CHECK: spirv.Constant dense<4.000000e+00> : tensor<5xf32> : !spirv.array<5 x f32> - %3 = arith.constant dense<4.0> : tensor<5xf64> - // CHECK: spirv.Constant dense<[1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00]> : tensor<4xf32> : !spirv.array<4 x f32> - %4 = arith.constant dense<[[1.0, 2.0], [3.0, 4.0]]> : tensor<2x2xf16> - return -} - // CHECK-LABEL: @constant_size1 func.func @constant_size1() { // CHECK: spirv.Constant 4 : i32 - %0 = arith.constant dense<4> : vector<1xi64> + %0 = arith.constant dense<4> : vector<1xi16> // CHECK: spirv.Constant 5.000000e+00 : f32 - %1 = arith.constant dense<5.0> : tensor<1xf64> + %1 = arith.constant dense<5.0> : tensor<1xf16> return } // CHECK-LABEL: @corner_cases func.func @corner_cases() { - // CHECK: %{{.*}} = spirv.Constant -1 : i32 - %0 = arith.constant 4294967295 : i64 // 2^32 - 1 - // CHECK: %{{.*}} = spirv.Constant 2147483647 : i32 - %1 = arith.constant 2147483647 : i64 // 2^31 - 1 - // CHECK: %{{.*}} = spirv.Constant -2147483648 : i32 - %2 = arith.constant 2147483648 : i64 // 2^31 - // CHECK: %{{.*}} = spirv.Constant -2147483648 : i32 - %3 = arith.constant -2147483648 : i64 // -2^31 - - // CHECK: %{{.*}} = spirv.Constant -1 : i32 - %5 = arith.constant -1 : i64 + // CHECK: %{{.*}} = spirv.Constant -1 : i32 + %5 = arith.constant -1 : i16 // CHECK: %{{.*}} = spirv.Constant -2 : i32 - %6 = arith.constant -2 : i64 + %6 = arith.constant -2 : i16 // CHECK: %{{.*}} = spirv.Constant -1 : i32 %7 = arith.constant -1 : index // CHECK: %{{.*}} = spirv.Constant -2 : i32 %8 = arith.constant -2 : index - // CHECK: spirv.Constant false %9 = arith.constant false // CHECK: spirv.Constant true @@ -903,29 +887,13 @@ module attributes { } { // CHECK-LABEL: @fptrunc1 -// CHECK-SAME: %[[A:.*]]: f64 -func.func @fptrunc1(%arg0 : f64) -> f16 { - // CHECK: %[[ARG:.+]] = builtin.unrealized_conversion_cast %[[A]] : f64 to f32 - // CHECK-NEXT: spirv.FConvert %[[ARG]] : f32 to f16 - %0 = arith.truncf %arg0 : f64 to f16 - return %0: f16 -} - -// CHECK-LABEL: @fptrunc2 // CHECK-SAME: %[[ARG:.*]]: f32 -func.func @fptrunc2(%arg0: f32) -> f16 { +func.func @fptrunc1(%arg0: f32) -> f16 { // CHECK-NEXT: spirv.FConvert %[[ARG]] : f32 to f16 %0 = arith.truncf %arg0 : f32 to f16 return %0: f16 } -// CHECK-LABEL: @sitofp -func.func @sitofp(%arg0 : i64) -> f64 { - // CHECK: spirv.ConvertSToF %{{.*}} : i32 to f32 - %0 = arith.sitofp %arg0 : i64 to f64 - return %0: f64 -} - } // end module // ----- @@ -1209,677 +1177,10 @@ func.func @int_vector23(%arg0: vector<2xi8>, %arg1: vector<3xi16>) { } // CHECK-LABEL: @float_scalar -func.func @float_scalar(%arg0: f16, %arg1: f64) { +func.func @float_scalar(%arg0: f16) { // CHECK: spirv.FAdd %{{.*}}, %{{.*}}: f32 %0 = arith.addf %arg0, %arg0: f16 - // CHECK: spirv.FMul %{{.*}}, %{{.*}}: f32 - %1 = arith.mulf %arg1, %arg1: f64 - return -} - -} // end module - -// ----- - -//===----------------------------------------------------------------------===// -// std bit ops -//===----------------------------------------------------------------------===// - -module attributes { - spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> -} { - -// CHECK-LABEL: @bitwise_scalar -func.func @bitwise_scalar(%arg0 : i32, %arg1 : i32) { - // CHECK: spirv.BitwiseAnd - %0 = arith.andi %arg0, %arg1 : i32 - // CHECK: spirv.BitwiseOr - %1 = arith.ori %arg0, %arg1 : i32 - // CHECK: spirv.BitwiseXor - %2 = arith.xori %arg0, %arg1 : i32 - return -} - -// CHECK-LABEL: @bitwise_vector -func.func @bitwise_vector(%arg0 : vector<4xi32>, %arg1 : vector<4xi32>) { - // CHECK: spirv.BitwiseAnd - %0 = arith.andi %arg0, %arg1 : vector<4xi32> - // CHECK: spirv.BitwiseOr - %1 = arith.ori %arg0, %arg1 : vector<4xi32> - // CHECK: spirv.BitwiseXor - %2 = arith.xori %arg0, %arg1 : vector<4xi32> - return -} - -// CHECK-LABEL: @logical_scalar -func.func @logical_scalar(%arg0 : i1, %arg1 : i1) { - // CHECK: spirv.LogicalAnd - %0 = arith.andi %arg0, %arg1 : i1 - // CHECK: spirv.LogicalOr - %1 = arith.ori %arg0, %arg1 : i1 - // CHECK: spirv.LogicalNotEqual - %2 = arith.xori %arg0, %arg1 : i1 - return -} - -// CHECK-LABEL: @logical_vector -func.func @logical_vector(%arg0 : vector<4xi1>, %arg1 : vector<4xi1>) { - // CHECK: spirv.LogicalAnd - %0 = arith.andi %arg0, %arg1 : vector<4xi1> - // CHECK: spirv.LogicalOr - %1 = arith.ori %arg0, %arg1 : vector<4xi1> - // CHECK: spirv.LogicalNotEqual - %2 = arith.xori %arg0, %arg1 : vector<4xi1> - return -} - -// CHECK-LABEL: @shift_scalar -func.func @shift_scalar(%arg0 : i32, %arg1 : i32) { - // CHECK: spirv.ShiftLeftLogical - %0 = arith.shli %arg0, %arg1 : i32 - // CHECK: spirv.ShiftRightArithmetic - %1 = arith.shrsi %arg0, %arg1 : i32 - // CHECK: spirv.ShiftRightLogical - %2 = arith.shrui %arg0, %arg1 : i32 - return -} - -// CHECK-LABEL: @shift_vector -func.func @shift_vector(%arg0 : vector<4xi32>, %arg1 : vector<4xi32>) { - // CHECK: spirv.ShiftLeftLogical - %0 = arith.shli %arg0, %arg1 : vector<4xi32> - // CHECK: spirv.ShiftRightArithmetic - %1 = arith.shrsi %arg0, %arg1 : vector<4xi32> - // CHECK: spirv.ShiftRightLogical - %2 = arith.shrui %arg0, %arg1 : vector<4xi32> - return -} - -} // end module - -// ----- - -//===----------------------------------------------------------------------===// -// arith.cmpf -//===----------------------------------------------------------------------===// - -module attributes { - spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> -} { - -// CHECK-LABEL: @cmpf -func.func @cmpf(%arg0 : f32, %arg1 : f32) { - // CHECK: spirv.FOrdEqual - %1 = arith.cmpf oeq, %arg0, %arg1 : f32 - // CHECK: spirv.FOrdGreaterThan - %2 = arith.cmpf ogt, %arg0, %arg1 : f32 - // CHECK: spirv.FOrdGreaterThanEqual - %3 = arith.cmpf oge, %arg0, %arg1 : f32 - // CHECK: spirv.FOrdLessThan - %4 = arith.cmpf olt, %arg0, %arg1 : f32 - // CHECK: spirv.FOrdLessThanEqual - %5 = arith.cmpf ole, %arg0, %arg1 : f32 - // CHECK: spirv.FOrdNotEqual - %6 = arith.cmpf one, %arg0, %arg1 : f32 - // CHECK: spirv.FUnordEqual - %7 = arith.cmpf ueq, %arg0, %arg1 : f32 - // CHECK: spirv.FUnordGreaterThan - %8 = arith.cmpf ugt, %arg0, %arg1 : f32 - // CHECK: spirv.FUnordGreaterThanEqual - %9 = arith.cmpf uge, %arg0, %arg1 : f32 - // CHECK: spirv.FUnordLessThan - %10 = arith.cmpf ult, %arg0, %arg1 : f32 - // CHECK: FUnordLessThanEqual - %11 = arith.cmpf ule, %arg0, %arg1 : f32 - // CHECK: spirv.FUnordNotEqual - %12 = arith.cmpf une, %arg0, %arg1 : f32 - return -} - -// CHECK-LABEL: @vec1cmpf -func.func @vec1cmpf(%arg0 : vector<1xf32>, %arg1 : vector<1xf32>) { - // CHECK: spirv.FOrdGreaterThan - %0 = arith.cmpf ogt, %arg0, %arg1 : vector<1xf32> - // CHECK: spirv.FUnordLessThan - %1 = arith.cmpf ult, %arg0, %arg1 : vector<1xf32> - return -} - -} // end module - -// ----- - -// With Kernel capability, we can convert NaN check to spirv.Ordered/spirv.Unordered. -module attributes { - spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> -} { - -// CHECK-LABEL: @cmpf -func.func @cmpf(%arg0 : f32, %arg1 : f32) { - // CHECK: spirv.Ordered - %0 = arith.cmpf ord, %arg0, %arg1 : f32 - // CHECK: spirv.Unordered - %1 = arith.cmpf uno, %arg0, %arg1 : f32 - return -} - -} // end module - -// ----- - -// Without Kernel capability, we need to convert NaN check to spirv.IsNan. -module attributes { - spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> -} { - -// CHECK-LABEL: @cmpf -// CHECK-SAME: %[[LHS:.+]]: f32, %[[RHS:.+]]: f32 -func.func @cmpf(%arg0 : f32, %arg1 : f32) { - // CHECK: %[[LHS_NAN:.+]] = spirv.IsNan %[[LHS]] : f32 - // CHECK-NEXT: %[[RHS_NAN:.+]] = spirv.IsNan %[[RHS]] : f32 - // CHECK-NEXT: %[[OR:.+]] = spirv.LogicalOr %[[LHS_NAN]], %[[RHS_NAN]] : i1 - // CHECK-NEXT: %{{.+}} = spirv.LogicalNot %[[OR]] : i1 - %0 = arith.cmpf ord, %arg0, %arg1 : f32 - - // CHECK-NEXT: %[[LHS_NAN:.+]] = spirv.IsNan %[[LHS]] : f32 - // CHECK-NEXT: %[[RHS_NAN:.+]] = spirv.IsNan %[[RHS]] : f32 - // CHECK-NEXT: %{{.+}} = spirv.LogicalOr %[[LHS_NAN]], %[[RHS_NAN]] : i1 - %1 = arith.cmpf uno, %arg0, %arg1 : f32 return } } // end module - -// ----- - -//===----------------------------------------------------------------------===// -// arith.cmpi -//===----------------------------------------------------------------------===// - -module attributes { - spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> -} { - -// CHECK-LABEL: @cmpi -func.func @cmpi(%arg0 : i32, %arg1 : i32) { - // CHECK: spirv.IEqual - %0 = arith.cmpi eq, %arg0, %arg1 : i32 - // CHECK: spirv.INotEqual - %1 = arith.cmpi ne, %arg0, %arg1 : i32 - // CHECK: spirv.SLessThan - %2 = arith.cmpi slt, %arg0, %arg1 : i32 - // CHECK: spirv.SLessThanEqual - %3 = arith.cmpi sle, %arg0, %arg1 : i32 - // CHECK: spirv.SGreaterThan - %4 = arith.cmpi sgt, %arg0, %arg1 : i32 - // CHECK: spirv.SGreaterThanEqual - %5 = arith.cmpi sge, %arg0, %arg1 : i32 - // CHECK: spirv.ULessThan - %6 = arith.cmpi ult, %arg0, %arg1 : i32 - // CHECK: spirv.ULessThanEqual - %7 = arith.cmpi ule, %arg0, %arg1 : i32 - // CHECK: spirv.UGreaterThan - %8 = arith.cmpi ugt, %arg0, %arg1 : i32 - // CHECK: spirv.UGreaterThanEqual - %9 = arith.cmpi uge, %arg0, %arg1 : i32 - return -} - -// CHECK-LABEL: @boolcmpi -func.func @boolcmpi(%arg0 : i1, %arg1 : i1) { - // CHECK: spirv.LogicalEqual - %0 = arith.cmpi eq, %arg0, %arg1 : i1 - // CHECK: spirv.LogicalNotEqual - %1 = arith.cmpi ne, %arg0, %arg1 : i1 - return -} - -// CHECK-LABEL: @vecboolcmpi -func.func @vecboolcmpi(%arg0 : vector<4xi1>, %arg1 : vector<4xi1>) { - // CHECK: spirv.LogicalEqual - %0 = arith.cmpi eq, %arg0, %arg1 : vector<4xi1> - // CHECK: spirv.LogicalNotEqual - %1 = arith.cmpi ne, %arg0, %arg1 : vector<4xi1> - return -} - -} // end module - -// ----- - -//===----------------------------------------------------------------------===// -// arith.constant -//===----------------------------------------------------------------------===// - -module attributes { - spirv.target_env = #spirv.target_env< - #spirv.vce, #spirv.resource_limits<>> -} { - -// CHECK-LABEL: @constant -func.func @constant() { - // CHECK: spirv.Constant true - %0 = arith.constant true - // CHECK: spirv.Constant 42 : i32 - %1 = arith.constant 42 : i32 - // CHECK: spirv.Constant 5.000000e-01 : f32 - %2 = arith.constant 0.5 : f32 - // CHECK: spirv.Constant dense<[2, 3]> : vector<2xi32> - %3 = arith.constant dense<[2, 3]> : vector<2xi32> - // CHECK: spirv.Constant 1 : i32 - %4 = arith.constant 1 : index - // CHECK: spirv.Constant dense<1> : tensor<6xi32> : !spirv.array<6 x i32> - %5 = arith.constant dense<1> : tensor<2x3xi32> - // CHECK: spirv.Constant dense<1.000000e+00> : tensor<6xf32> : !spirv.array<6 x f32> - %6 = arith.constant dense<1.0> : tensor<2x3xf32> - // CHECK: spirv.Constant dense<{{\[}}1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00, 5.000000e+00, 6.000000e+00]> : tensor<6xf32> : !spirv.array<6 x f32> - %7 = arith.constant dense<[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]> : tensor<2x3xf32> - // CHECK: spirv.Constant dense<{{\[}}1, 2, 3, 4, 5, 6]> : tensor<6xi32> : !spirv.array<6 x i32> - %8 = arith.constant dense<[[1, 2, 3], [4, 5, 6]]> : tensor<2x3xi32> - // CHECK: spirv.Constant dense<{{\[}}1, 2, 3, 4, 5, 6]> : tensor<6xi32> : !spirv.array<6 x i32> - %9 = arith.constant dense<[[1, 2], [3, 4], [5, 6]]> : tensor<3x2xi32> - // CHECK: spirv.Constant dense<{{\[}}1, 2, 3, 4, 5, 6]> : tensor<6xi32> : !spirv.array<6 x i32> - %10 = arith.constant dense<[1, 2, 3, 4, 5, 6]> : tensor<6xi32> - return -} - -// CHECK-LABEL: @constant_16bit -func.func @constant_16bit() { - // CHECK: spirv.Constant 4 : i16 - %0 = arith.constant 4 : i16 - // CHECK: spirv.Constant 5.000000e+00 : f16 - %1 = arith.constant 5.0 : f16 - // CHECK: spirv.Constant dense<[2, 3]> : vector<2xi16> - %2 = arith.constant dense<[2, 3]> : vector<2xi16> - // CHECK: spirv.Constant dense<4.000000e+00> : tensor<5xf16> : !spirv.array<5 x f16> - %3 = arith.constant dense<4.0> : tensor<5xf16> - return -} - -// CHECK-LABEL: @constant_64bit -func.func @constant_64bit() { - // CHECK: spirv.Constant 4 : i64 - %0 = arith.constant 4 : i64 - // CHECK: spirv.Constant 5.000000e+00 : f64 - %1 = arith.constant 5.0 : f64 - // CHECK: spirv.Constant dense<[2, 3]> : vector<2xi64> - %2 = arith.constant dense<[2, 3]> : vector<2xi64> - // CHECK: spirv.Constant dense<4.000000e+00> : tensor<5xf64> : !spirv.array<5 x f64> - %3 = arith.constant dense<4.0> : tensor<5xf64> - return -} - -} // end module - -// ----- - -// Check that constants are converted to 32-bit when no special capability. -module attributes { - spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> -} { - -// CHECK-LABEL: @constant_16bit -func.func @constant_16bit() { - // CHECK: spirv.Constant 4 : i32 - %0 = arith.constant 4 : i16 - // CHECK: spirv.Constant 5.000000e+00 : f32 - %1 = arith.constant 5.0 : f16 - // CHECK: spirv.Constant dense<[2, 3]> : vector<2xi32> - %2 = arith.constant dense<[2, 3]> : vector<2xi16> - // CHECK: spirv.Constant dense<4.000000e+00> : tensor<5xf32> : !spirv.array<5 x f32> - %3 = arith.constant dense<4.0> : tensor<5xf16> - // CHECK: spirv.Constant dense<[1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00]> : tensor<4xf32> : !spirv.array<4 x f32> - %4 = arith.constant dense<[[1.0, 2.0], [3.0, 4.0]]> : tensor<2x2xf16> - return -} - -// CHECK-LABEL: @constant_64bit -func.func @constant_64bit() { - // CHECK: spirv.Constant 4 : i32 - %0 = arith.constant 4 : i64 - // CHECK: spirv.Constant 5.000000e+00 : f32 - %1 = arith.constant 5.0 : f64 - // CHECK: spirv.Constant dense<[2, 3]> : vector<2xi32> - %2 = arith.constant dense<[2, 3]> : vector<2xi64> - // CHECK: spirv.Constant dense<4.000000e+00> : tensor<5xf32> : !spirv.array<5 x f32> - %3 = arith.constant dense<4.0> : tensor<5xf64> - // CHECK: spirv.Constant dense<[1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00]> : tensor<4xf32> : !spirv.array<4 x f32> - %4 = arith.constant dense<[[1.0, 2.0], [3.0, 4.0]]> : tensor<2x2xf16> - return -} - -// CHECK-LABEL: @corner_cases -func.func @corner_cases() { - // CHECK: %{{.*}} = spirv.Constant -1 : i32 - %0 = arith.constant 4294967295 : i64 // 2^32 - 1 - // CHECK: %{{.*}} = spirv.Constant 2147483647 : i32 - %1 = arith.constant 2147483647 : i64 // 2^31 - 1 - // CHECK: %{{.*}} = spirv.Constant -2147483648 : i32 - %2 = arith.constant 2147483648 : i64 // 2^31 - // CHECK: %{{.*}} = spirv.Constant -2147483648 : i32 - %3 = arith.constant -2147483648 : i64 // -2^31 - - // CHECK: %{{.*}} = spirv.Constant -1 : i32 - %5 = arith.constant -1 : i64 - // CHECK: %{{.*}} = spirv.Constant -2 : i32 - %6 = arith.constant -2 : i64 - // CHECK: %{{.*}} = spirv.Constant -1 : i32 - %7 = arith.constant -1 : index - // CHECK: %{{.*}} = spirv.Constant -2 : i32 - %8 = arith.constant -2 : index - - - // CHECK: spirv.Constant false - %9 = arith.constant false - // CHECK: spirv.Constant true - %10 = arith.constant true - - return -} - -} // end module - -// ----- - -//===----------------------------------------------------------------------===// -// std cast ops -//===----------------------------------------------------------------------===// - -module attributes { - spirv.target_env = #spirv.target_env< - #spirv.vce, #spirv.resource_limits<>> -} { - -// CHECK-LABEL: index_cast1 -func.func @index_cast1(%arg0: i16) { - // CHECK: spirv.SConvert %{{.+}} : i16 to i32 - %0 = arith.index_cast %arg0 : i16 to index - return -} - -// CHECK-LABEL: index_cast2 -func.func @index_cast2(%arg0: index) { - // CHECK: spirv.SConvert %{{.+}} : i32 to i16 - %0 = arith.index_cast %arg0 : index to i16 - return -} - -// CHECK-LABEL: index_cast3 -func.func @index_cast3(%arg0: i32) { - // CHECK-NOT: spirv.SConvert - %0 = arith.index_cast %arg0 : i32 to index - return -} - -// CHECK-LABEL: index_cast4 -func.func @index_cast4(%arg0: index) { - // CHECK-NOT: spirv.SConvert - %0 = arith.index_cast %arg0 : index to i32 - return -} - -// CHECK-LABEL: @fpext1 -func.func @fpext1(%arg0: f16) -> f64 { - // CHECK: spirv.FConvert %{{.*}} : f16 to f64 - %0 = arith.extf %arg0 : f16 to f64 - return %0 : f64 -} - -// CHECK-LABEL: @fpext2 -func.func @fpext2(%arg0 : f32) -> f64 { - // CHECK: spirv.FConvert %{{.*}} : f32 to f64 - %0 = arith.extf %arg0 : f32 to f64 - return %0 : f64 -} - -// CHECK-LABEL: @fptrunc1 -func.func @fptrunc1(%arg0 : f64) -> f16 { - // CHECK: spirv.FConvert %{{.*}} : f64 to f16 - %0 = arith.truncf %arg0 : f64 to f16 - return %0 : f16 -} - -// CHECK-LABEL: @fptrunc2 -func.func @fptrunc2(%arg0: f32) -> f16 { - // CHECK: spirv.FConvert %{{.*}} : f32 to f16 - %0 = arith.truncf %arg0 : f32 to f16 - return %0 : f16 -} - -// CHECK-LABEL: @sitofp1 -func.func @sitofp1(%arg0 : i32) -> f32 { - // CHECK: spirv.ConvertSToF %{{.*}} : i32 to f32 - %0 = arith.sitofp %arg0 : i32 to f32 - return %0 : f32 -} - -// CHECK-LABEL: @sitofp2 -func.func @sitofp2(%arg0 : i64) -> f64 { - // CHECK: spirv.ConvertSToF %{{.*}} : i64 to f64 - %0 = arith.sitofp %arg0 : i64 to f64 - return %0 : f64 -} - -// CHECK-LABEL: @uitofp_i16_f32 -func.func @uitofp_i16_f32(%arg0: i16) -> f32 { - // CHECK: spirv.ConvertUToF %{{.*}} : i16 to f32 - %0 = arith.uitofp %arg0 : i16 to f32 - return %0 : f32 -} - -// CHECK-LABEL: @uitofp_i32_f32 -func.func @uitofp_i32_f32(%arg0 : i32) -> f32 { - // CHECK: spirv.ConvertUToF %{{.*}} : i32 to f32 - %0 = arith.uitofp %arg0 : i32 to f32 - return %0 : f32 -} - -// CHECK-LABEL: @uitofp_i1_f32 -func.func @uitofp_i1_f32(%arg0 : i1) -> f32 { - // CHECK: %[[ZERO:.+]] = spirv.Constant 0.000000e+00 : f32 - // CHECK: %[[ONE:.+]] = spirv.Constant 1.000000e+00 : f32 - // CHECK: spirv.Select %{{.*}}, %[[ONE]], %[[ZERO]] : i1, f32 - %0 = arith.uitofp %arg0 : i1 to f32 - return %0 : f32 -} - -// CHECK-LABEL: @uitofp_i1_f64 -func.func @uitofp_i1_f64(%arg0 : i1) -> f64 { - // CHECK: %[[ZERO:.+]] = spirv.Constant 0.000000e+00 : f64 - // CHECK: %[[ONE:.+]] = spirv.Constant 1.000000e+00 : f64 - // CHECK: spirv.Select %{{.*}}, %[[ONE]], %[[ZERO]] : i1, f64 - %0 = arith.uitofp %arg0 : i1 to f64 - return %0 : f64 -} - -// CHECK-LABEL: @uitofp_vec_i1_f32 -func.func @uitofp_vec_i1_f32(%arg0 : vector<4xi1>) -> vector<4xf32> { - // CHECK: %[[ZERO:.+]] = spirv.Constant dense<0.000000e+00> : vector<4xf32> - // CHECK: %[[ONE:.+]] = spirv.Constant dense<1.000000e+00> : vector<4xf32> - // CHECK: spirv.Select %{{.*}}, %[[ONE]], %[[ZERO]] : vector<4xi1>, vector<4xf32> - %0 = arith.uitofp %arg0 : vector<4xi1> to vector<4xf32> - return %0 : vector<4xf32> -} - -// CHECK-LABEL: @uitofp_vec_i1_f64 -spirv.func @uitofp_vec_i1_f64(%arg0: vector<4xi1>) -> vector<4xf64> "None" { - // CHECK: %[[ZERO:.+]] = spirv.Constant dense<0.000000e+00> : vector<4xf64> - // CHECK: %[[ONE:.+]] = spirv.Constant dense<1.000000e+00> : vector<4xf64> - // CHECK: spirv.Select %{{.*}}, %[[ONE]], %[[ZERO]] : vector<4xi1>, vector<4xf64> - %0 = spirv.Constant dense<0.000000e+00> : vector<4xf64> - %1 = spirv.Constant dense<1.000000e+00> : vector<4xf64> - %2 = spirv.Select %arg0, %1, %0 : vector<4xi1>, vector<4xf64> - spirv.ReturnValue %2 : vector<4xf64> -} - -// CHECK-LABEL: @sexti1 -func.func @sexti1(%arg0: i16) -> i64 { - // CHECK: spirv.SConvert %{{.*}} : i16 to i64 - %0 = arith.extsi %arg0 : i16 to i64 - return %0 : i64 -} - -// CHECK-LABEL: @sexti2 -func.func @sexti2(%arg0 : i32) -> i64 { - // CHECK: spirv.SConvert %{{.*}} : i32 to i64 - %0 = arith.extsi %arg0 : i32 to i64 - return %0 : i64 -} - -// CHECK-LABEL: @zexti1 -func.func @zexti1(%arg0: i16) -> i64 { - // CHECK: spirv.UConvert %{{.*}} : i16 to i64 - %0 = arith.extui %arg0 : i16 to i64 - return %0 : i64 -} - -// CHECK-LABEL: @zexti2 -func.func @zexti2(%arg0 : i32) -> i64 { - // CHECK: spirv.UConvert %{{.*}} : i32 to i64 - %0 = arith.extui %arg0 : i32 to i64 - return %0 : i64 -} - -// CHECK-LABEL: @zexti3 -func.func @zexti3(%arg0 : i1) -> i32 { - // CHECK: %[[ZERO:.+]] = spirv.Constant 0 : i32 - // CHECK: %[[ONE:.+]] = spirv.Constant 1 : i32 - // CHECK: spirv.Select %{{.*}}, %[[ONE]], %[[ZERO]] : i1, i32 - %0 = arith.extui %arg0 : i1 to i32 - return %0 : i32 -} - -// CHECK-LABEL: @zexti4 -func.func @zexti4(%arg0 : vector<4xi1>) -> vector<4xi32> { - // CHECK: %[[ZERO:.+]] = spirv.Constant dense<0> : vector<4xi32> - // CHECK: %[[ONE:.+]] = spirv.Constant dense<1> : vector<4xi32> - // CHECK: spirv.Select %{{.*}}, %[[ONE]], %[[ZERO]] : vector<4xi1>, vector<4xi32> - %0 = arith.extui %arg0 : vector<4xi1> to vector<4xi32> - return %0 : vector<4xi32> -} - -// CHECK-LABEL: @zexti5 -func.func @zexti5(%arg0 : vector<4xi1>) -> vector<4xi64> { - // CHECK: %[[ZERO:.+]] = spirv.Constant dense<0> : vector<4xi64> - // CHECK: %[[ONE:.+]] = spirv.Constant dense<1> : vector<4xi64> - // CHECK: spirv.Select %{{.*}}, %[[ONE]], %[[ZERO]] : vector<4xi1>, vector<4xi64> - %0 = arith.extui %arg0 : vector<4xi1> to vector<4xi64> - return %0 : vector<4xi64> -} - -// CHECK-LABEL: @trunci1 -func.func @trunci1(%arg0 : i64) -> i16 { - // CHECK: spirv.SConvert %{{.*}} : i64 to i16 - %0 = arith.trunci %arg0 : i64 to i16 - return %0 : i16 -} - -// CHECK-LABEL: @trunci2 -func.func @trunci2(%arg0: i32) -> i16 { - // CHECK: spirv.SConvert %{{.*}} : i32 to i16 - %0 = arith.trunci %arg0 : i32 to i16 - return %0 : i16 -} - -// CHECK-LABEL: @trunc_to_i1 -func.func @trunc_to_i1(%arg0: i32) -> i1 { - // CHECK: %[[MASK:.*]] = spirv.Constant 1 : i32 - // CHECK: %[[MASKED_SRC:.*]] = spirv.BitwiseAnd %{{.*}}, %[[MASK]] : i32 - // CHECK: %[[IS_ONE:.*]] = spirv.IEqual %[[MASKED_SRC]], %[[MASK]] : i32 - // CHECK-DAG: %[[TRUE:.*]] = spirv.Constant true - // CHECK-DAG: %[[FALSE:.*]] = spirv.Constant false - // CHECK: spirv.Select %[[IS_ONE]], %[[TRUE]], %[[FALSE]] : i1, i1 - %0 = arith.trunci %arg0 : i32 to i1 - return %0 : i1 -} - -// CHECK-LABEL: @trunc_to_veci1 -func.func @trunc_to_veci1(%arg0: vector<4xi32>) -> vector<4xi1> { - // CHECK: %[[MASK:.*]] = spirv.Constant dense<1> : vector<4xi32> - // CHECK: %[[MASKED_SRC:.*]] = spirv.BitwiseAnd %{{.*}}, %[[MASK]] : vector<4xi32> - // CHECK: %[[IS_ONE:.*]] = spirv.IEqual %[[MASKED_SRC]], %[[MASK]] : vector<4xi32> - // CHECK-DAG: %[[TRUE:.*]] = spirv.Constant dense : vector<4xi1> - // CHECK-DAG: %[[FALSE:.*]] = spirv.Constant dense : vector<4xi1> - // CHECK: spirv.Select %[[IS_ONE]], %[[TRUE]], %[[FALSE]] : vector<4xi1>, vector<4xi1> - %0 = arith.trunci %arg0 : vector<4xi32> to vector<4xi1> - return %0 : vector<4xi1> -} - -// CHECK-LABEL: @fptosi1 -func.func @fptosi1(%arg0 : f32) -> i32 { - // CHECK: spirv.ConvertFToS %{{.*}} : f32 to i32 - %0 = arith.fptosi %arg0 : f32 to i32 - return %0 : i32 -} - -// CHECK-LABEL: @fptosi2 -func.func @fptosi2(%arg0 : f16) -> i16 { - // CHECK: spirv.ConvertFToS %{{.*}} : f16 to i16 - %0 = arith.fptosi %arg0 : f16 to i16 - return %0 : i16 -} - -} // end module - -// ----- - -// Checks that cast types will be adjusted when missing special capabilities for -// certain non-32-bit scalar types. -module attributes { - spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> -} { - -// CHECK-LABEL: @fpext1 -// CHECK-SAME: %[[A:.*]]: f16 -func.func @fpext1(%arg0: f16) -> f64 { - // CHECK: %[[ARG:.+]] = builtin.unrealized_conversion_cast %[[A]] : f16 to f32 - // CHECK-NEXT: spirv.FConvert %[[ARG]] : f32 to f64 - %0 = arith.extf %arg0 : f16 to f64 - return %0: f64 -} - -// CHECK-LABEL: @fpext2 -// CHECK-SAME: %[[ARG:.*]]: f32 -func.func @fpext2(%arg0 : f32) -> f64 { - // CHECK-NEXT: spirv.FConvert %[[ARG]] : f32 to f64 - %0 = arith.extf %arg0 : f32 to f64 - return %0: f64 -} - -} // end module - -// ----- - -// Checks that cast types will be adjusted when missing special capabilities for -// certain non-32-bit scalar types. -module attributes { - spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> -} { - -// CHECK-LABEL: @fptrunc1 -// CHECK-SAME: %[[A:.*]]: f64 -func.func @fptrunc1(%arg0 : f64) -> f16 { - // CHECK: %[[ARG:.+]] = builtin.unrealized_conversion_cast %[[A]] : f64 to f32 - // CHECK-NEXT: spirv.FConvert %[[ARG]] : f32 to f16 - %0 = arith.truncf %arg0 : f64 to f16 - return %0: f16 -} - -// CHECK-LABEL: @fptrunc2 -// CHECK-SAME: %[[ARG:.*]]: f32 -func.func @fptrunc2(%arg0: f32) -> f16 { - // CHECK-NEXT: spirv.FConvert %[[ARG]] : f32 to f16 - %0 = arith.truncf %arg0 : f32 to f16 - return %0: f16 -} - -// CHECK-LABEL: @sitofp -func.func @sitofp(%arg0 : i64) -> f64 { - // CHECK: spirv.ConvertSToF %{{.*}} : i32 to f32 - %0 = arith.sitofp %arg0 : i64 to f64 - return %0: f64 -} - -} // end module - -// ----- diff --git a/mlir/test/Conversion/ComplexToLLVM/convert-to-llvm.mlir b/mlir/test/Conversion/ComplexToLLVM/convert-to-llvm.mlir index 32409ca242c28..be46c22155a6c 100644 --- a/mlir/test/Conversion/ComplexToLLVM/convert-to-llvm.mlir +++ b/mlir/test/Conversion/ComplexToLLVM/convert-to-llvm.mlir @@ -147,6 +147,6 @@ func.func @complex_abs(%arg: complex) -> f32 { // CHECK-DAG: %[[REAL_SQ:.*]] = llvm.fmul %[[REAL]], %[[REAL]] : f32 // CHECK-DAG: %[[IMAG_SQ:.*]] = llvm.fmul %[[IMAG]], %[[IMAG]] : f32 // CHECK: %[[SQ_NORM:.*]] = llvm.fadd %[[REAL_SQ]], %[[IMAG_SQ]] : f32 -// CHECK: %[[NORM:.*]] = "llvm.intr.sqrt"(%[[SQ_NORM]]) : (f32) -> f32 +// CHECK: %[[NORM:.*]] = llvm.intr.sqrt(%[[SQ_NORM]]) : (f32) -> f32 // CHECK: return %[[NORM]] : f32 diff --git a/mlir/test/Conversion/ComplexToLLVM/full-conversion.mlir b/mlir/test/Conversion/ComplexToLLVM/full-conversion.mlir index d8bf45d752669..b7756b3be543f 100644 --- a/mlir/test/Conversion/ComplexToLLVM/full-conversion.mlir +++ b/mlir/test/Conversion/ComplexToLLVM/full-conversion.mlir @@ -66,6 +66,6 @@ func.func @complex_abs(%arg: complex) -> f32 { // CHECK-DAG: %[[REAL_SQ:.*]] = llvm.fmul %[[REAL]], %[[REAL]] : f32 // CHECK-DAG: %[[IMAG_SQ:.*]] = llvm.fmul %[[IMAG]], %[[IMAG]] : f32 // CHECK: %[[SQ_NORM:.*]] = llvm.fadd %[[REAL_SQ]], %[[IMAG_SQ]] : f32 -// CHECK: %[[NORM:.*]] = "llvm.intr.sqrt"(%[[SQ_NORM]]) : (f32) -> f32 +// CHECK: %[[NORM:.*]] = llvm.intr.sqrt(%[[SQ_NORM]]) : (f32) -> f32 // CHECK: llvm.return %[[NORM]] : f32 diff --git a/mlir/test/Conversion/ComplexToStandard/full-conversion.mlir b/mlir/test/Conversion/ComplexToStandard/full-conversion.mlir index 5ff58240358e7..9983dd46f0943 100644 --- a/mlir/test/Conversion/ComplexToStandard/full-conversion.mlir +++ b/mlir/test/Conversion/ComplexToStandard/full-conversion.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-complex-to-standard),convert-complex-to-llvm,func.func(convert-math-to-llvm,convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-complex-to-standard),convert-complex-to-llvm,func.func(convert-math-to-llvm,convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts)" | FileCheck %s // CHECK-LABEL: llvm.func @complex_abs // CHECK-SAME: %[[ARG:.*]]: ![[C_TY:.*]]) @@ -11,7 +11,7 @@ func.func @complex_abs(%arg: complex) -> f32 { // CHECK-DAG: %[[REAL_SQ:.*]] = llvm.fmul %[[REAL]], %[[REAL]] : f32 // CHECK-DAG: %[[IMAG_SQ:.*]] = llvm.fmul %[[IMAG]], %[[IMAG]] : f32 // CHECK: %[[SQ_NORM:.*]] = llvm.fadd %[[REAL_SQ]], %[[IMAG_SQ]] : f32 -// CHECK: %[[NORM:.*]] = "llvm.intr.sqrt"(%[[SQ_NORM]]) : (f32) -> f32 +// CHECK: %[[NORM:.*]] = llvm.intr.sqrt(%[[SQ_NORM]]) : (f32) -> f32 // CHECK: llvm.return %[[NORM]] : f32 // CHECK-LABEL: llvm.func @complex_eq diff --git a/mlir/test/Conversion/FuncToLLVM/func-memref.mlir b/mlir/test/Conversion/FuncToLLVM/func-memref.mlir index 48a319b3ee283..10b205fc1a35d 100644 --- a/mlir/test/Conversion/FuncToLLVM/func-memref.mlir +++ b/mlir/test/Conversion/FuncToLLVM/func-memref.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt -pass-pipeline="func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" -split-input-file %s | FileCheck %s -// RUN: mlir-opt -pass-pipeline="func.func(convert-arith-to-llvm),convert-func-to-llvm{use-bare-ptr-memref-call-conv=1},reconcile-unrealized-casts" -split-input-file %s | FileCheck %s --check-prefix=BAREPTR +// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts)" -split-input-file %s | FileCheck %s +// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-arith-to-llvm),convert-func-to-llvm{use-bare-ptr-memref-call-conv=1},reconcile-unrealized-casts)" -split-input-file %s | FileCheck %s --check-prefix=BAREPTR // BAREPTR-LABEL: func @check_noalias // BAREPTR-SAME: %{{.*}}: !llvm.ptr {llvm.noalias}, %{{.*}}: !llvm.ptr {llvm.noalias} diff --git a/mlir/test/Conversion/FuncToLLVM/func-to-llvm.mlir b/mlir/test/Conversion/FuncToLLVM/func-to-llvm.mlir index 7f4396b7b6d79..5624bf12e72d7 100644 --- a/mlir/test/Conversion/FuncToLLVM/func-to-llvm.mlir +++ b/mlir/test/Conversion/FuncToLLVM/func-to-llvm.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt -pass-pipeline="func.func(convert-math-to-llvm,convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" %s -split-input-file | FileCheck %s -// RUN: mlir-opt -pass-pipeline="func.func(convert-math-to-llvm,convert-arith-to-llvm{index-bitwidth=32}),convert-func-to-llvm{index-bitwidth=32},reconcile-unrealized-casts" %s -split-input-file | FileCheck --check-prefix=CHECK32 %s +// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-math-to-llvm,convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts)" %s -split-input-file | FileCheck %s +// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-math-to-llvm,convert-arith-to-llvm{index-bitwidth=32}),convert-func-to-llvm{index-bitwidth=32},reconcile-unrealized-casts)" %s -split-input-file | FileCheck --check-prefix=CHECK32 %s // CHECK-LABEL: func @empty() { // CHECK-NEXT: llvm.return @@ -452,7 +452,7 @@ func.func @dfs_block_order(%arg0: i32) -> (i32) { // CHECK-LABEL: func @ceilf( // CHECK-SAME: f32 func.func @ceilf(%arg0 : f32) { - // CHECK: "llvm.intr.ceil"(%arg0) : (f32) -> f32 + // CHECK: llvm.intr.ceil(%arg0) : (f32) -> f32 %0 = math.ceil %arg0 : f32 func.return } @@ -462,7 +462,7 @@ func.func @ceilf(%arg0 : f32) { // CHECK-LABEL: func @floorf( // CHECK-SAME: f32 func.func @floorf(%arg0 : f32) { - // CHECK: "llvm.intr.floor"(%arg0) : (f32) -> f32 + // CHECK: llvm.intr.floor(%arg0) : (f32) -> f32 %0 = math.floor %arg0 : f32 func.return } @@ -503,9 +503,9 @@ func.func private @zero_result_func() // CHECK-SAME: %[[ARG0:.*]]: f32 // CHECK-SAME: %[[ARG1:.*]]: vector<4xf32> func.func @fmaf(%arg0: f32, %arg1: vector<4xf32>) { - // CHECK: %[[S:.*]] = "llvm.intr.fma"(%[[ARG0]], %[[ARG0]], %[[ARG0]]) : (f32, f32, f32) -> f32 + // CHECK: %[[S:.*]] = llvm.intr.fma(%[[ARG0]], %[[ARG0]], %[[ARG0]]) : (f32, f32, f32) -> f32 %0 = math.fma %arg0, %arg0, %arg0 : f32 - // CHECK: %[[V:.*]] = "llvm.intr.fma"(%[[ARG1]], %[[ARG1]], %[[ARG1]]) : (vector<4xf32>, vector<4xf32>, vector<4xf32>) -> vector<4xf32> + // CHECK: %[[V:.*]] = llvm.intr.fma(%[[ARG1]], %[[ARG1]], %[[ARG1]]) : (vector<4xf32>, vector<4xf32>, vector<4xf32>) -> vector<4xf32> %1 = math.fma %arg1, %arg1, %arg1 : vector<4xf32> func.return } diff --git a/mlir/test/Conversion/FuncToSPIRV/types-to-spirv.mlir b/mlir/test/Conversion/FuncToSPIRV/types-to-spirv.mlir index 4f1cd09efec30..d207ecd71c3cb 100644 --- a/mlir/test/Conversion/FuncToSPIRV/types-to-spirv.mlir +++ b/mlir/test/Conversion/FuncToSPIRV/types-to-spirv.mlir @@ -1,5 +1,6 @@ // RUN: mlir-opt -split-input-file -convert-func-to-spirv %s -o - | FileCheck %s -// RUN: mlir-opt -split-input-file -convert-func-to-spirv="emulate-non-32-bit-scalar-types=false" %s -o - | FileCheck %s --check-prefix=NOEMU +// RUN: mlir-opt -split-input-file -convert-func-to-spirv="emulate-lt-32-bit-scalar-types=false" %s | \ +// RUN: FileCheck %s --check-prefix=NOEMU //===----------------------------------------------------------------------===// // Integer types @@ -15,7 +16,7 @@ module attributes { // CHECK-SAME: i32 // CHECK-SAME: si32 // CHECK-SAME: ui32 -// NOEMU-LABEL: func @integer8 +// NOEMU-LABEL: func.func @integer8 // NOEMU-SAME: i8 // NOEMU-SAME: si8 // NOEMU-SAME: ui8 @@ -25,16 +26,17 @@ func.func @integer8(%arg0: i8, %arg1: si8, %arg2: ui8) { return } // CHECK-SAME: i32 // CHECK-SAME: si32 // CHECK-SAME: ui32 -// NOEMU-LABEL: func @integer16 +// NOEMU-LABEL: func.func @integer16 // NOEMU-SAME: i16 // NOEMU-SAME: si16 // NOEMU-SAME: ui16 func.func @integer16(%arg0: i16, %arg1: si16, %arg2: ui16) { return } -// CHECK-LABEL: spirv.func @integer64 -// CHECK-SAME: i32 -// CHECK-SAME: si32 -// CHECK-SAME: ui32 +// We do not truncate 64-bit types to 32-bit ones. +// CHECK-LABEL: func.func @integer64 +// CHECK-SAME: i64 +// CHECK-SAME: si64 +// CHECK-SAME: ui64 // NOEMU-LABEL: func @integer64 // NOEMU-SAME: i64 // NOEMU-SAME: si64 @@ -131,13 +133,13 @@ module attributes { // CHECK-LABEL: spirv.func @float16 // CHECK-SAME: f32 -// NOEMU-LABEL: func @float16 +// NOEMU-LABEL: func.func @float16 // NOEMU-SAME: f16 func.func @float16(%arg0: f16) { return } -// CHECK-LABEL: spirv.func @float64 -// CHECK-SAME: f32 -// NOEMU-LABEL: func @float64 +// CHECK-LABEL: func.func @float64 +// CHECK-SAME: f64 +// NOEMU-LABEL: func.func @float64 // NOEMU-SAME: f64 func.func @float64(%arg0: f64) { return } @@ -184,7 +186,7 @@ func.func @bf16_type(%arg0: bf16) { return } //===----------------------------------------------------------------------===// // Check that capabilities for scalar types affects vector types too: no special -// capabilities available means using turning element types to 32-bit. +// capabilities available means widening element types to 32-bit. module attributes { spirv.target_env = #spirv.target_env<#spirv.vce, #spirv.resource_limits<>> } { @@ -192,19 +194,15 @@ module attributes { // CHECK-LABEL: spirv.func @int_vector // CHECK-SAME: vector<2xi32> // CHECK-SAME: vector<3xsi32> -// CHECK-SAME: vector<4xui32> func.func @int_vector( %arg0: vector<2xi8>, - %arg1: vector<3xsi16>, - %arg2: vector<4xui64> + %arg1: vector<3xsi16> ) { return } // CHECK-LABEL: spirv.func @float_vector // CHECK-SAME: vector<2xf32> -// CHECK-SAME: vector<3xf32> func.func @float_vector( - %arg0: vector<2xf16>, - %arg1: vector<3xf64> + %arg0: vector<2xf16> ) { return } // CHECK-LABEL: spirv.func @one_element_vector @@ -389,33 +387,35 @@ func.func @memref_16bit_Input(%arg3: memref<16xf16, #spirv.storage_class> // NOEMU-SAME: memref<16xf16, #spirv.storage_class> func.func @memref_16bit_Output(%arg4: memref<16xf16, #spirv.storage_class>) { return } -// CHECK-LABEL: spirv.func @memref_64bit_StorageBuffer -// CHECK-SAME: !spirv.ptr [0])>, StorageBuffer> -// NOEMU-LABEL: func @memref_64bit_StorageBuffer +// We do not truncate i64 to i32. + +// CHECK-LABEL: func.func @memref_64bit_StorageBuffer +// CHECK-SAME: memref<16xi64, #spirv.storage_class> +// NOEMU-LABEL: func.func @memref_64bit_StorageBuffer // NOEMU-SAME: memref<16xi64, #spirv.storage_class> func.func @memref_64bit_StorageBuffer(%arg0: memref<16xi64, #spirv.storage_class>) { return } -// CHECK-LABEL: spirv.func @memref_64bit_Uniform -// CHECK-SAME: !spirv.ptr [0])>, Uniform> -// NOEMU-LABEL: func @memref_64bit_Uniform +// CHECK-LABEL: func.func @memref_64bit_Uniform +// CHECK-SAME: memref<16xsi64, #spirv.storage_class> +// NOEMU-LABEL: func.func @memref_64bit_Uniform // NOEMU-SAME: memref<16xsi64, #spirv.storage_class> func.func @memref_64bit_Uniform(%arg0: memref<16xsi64, #spirv.storage_class>) { return } -// CHECK-LABEL: spirv.func @memref_64bit_PushConstant -// CHECK-SAME: !spirv.ptr [0])>, PushConstant> -// NOEMU-LABEL: func @memref_64bit_PushConstant +// CHECK-LABEL: func.func @memref_64bit_PushConstant +// CHECK-SAME: memref<16xui64, #spirv.storage_class> +// NOEMU-LABEL: func.func @memref_64bit_PushConstant // NOEMU-SAME: memref<16xui64, #spirv.storage_class> func.func @memref_64bit_PushConstant(%arg0: memref<16xui64, #spirv.storage_class>) { return } -// CHECK-LABEL: spirv.func @memref_64bit_Input -// CHECK-SAME: !spirv.ptr)>, Input> -// NOEMU-LABEL: func @memref_64bit_Input +// CHECK-LABEL: func.func @memref_64bit_Input +// CHECK-SAME: memref<16xf64, #spirv.storage_class> +// NOEMU-LABEL: func.func @memref_64bit_Input // NOEMU-SAME: memref<16xf64, #spirv.storage_class> func.func @memref_64bit_Input(%arg3: memref<16xf64, #spirv.storage_class>) { return } -// CHECK-LABEL: spirv.func @memref_64bit_Output -// CHECK-SAME: !spirv.ptr)>, Output> -// NOEMU-LABEL: func @memref_64bit_Output +// CHECK-LABEL: func.func @memref_64bit_Output +// CHECK-SAME: memref<16xf64, #spirv.storage_class> +// NOEMU-LABEL: func.func @memref_64bit_Output // NOEMU-SAME: memref<16xf64, #spirv.storage_class> func.func @memref_64bit_Output(%arg4: memref<16xf64, #spirv.storage_class>) { return } @@ -791,9 +791,7 @@ module attributes { // CHECK-SAME: !spirv.array<32 x i32> // CHECK-SAME: !spirv.array<32 x i32> // CHECK-SAME: !spirv.array<32 x i32> -// CHECK-SAME: !spirv.array<32 x i32> func.func @int_tensor_types( - %arg0: tensor<8x4xi64>, %arg1: tensor<8x4xi32>, %arg2: tensor<8x4xi16>, %arg3: tensor<8x4xi8> @@ -802,9 +800,7 @@ func.func @int_tensor_types( // CHECK-LABEL: spirv.func @float_tensor_types // CHECK-SAME: !spirv.array<32 x f32> // CHECK-SAME: !spirv.array<32 x f32> -// CHECK-SAME: !spirv.array<32 x f32> func.func @float_tensor_types( - %arg0: tensor<8x4xf64>, %arg1: tensor<8x4xf32>, %arg2: tensor<8x4xf16> ) { return } diff --git a/mlir/test/Conversion/IndexToLLVM/index-to-llvm.mlir b/mlir/test/Conversion/IndexToLLVM/index-to-llvm.mlir index ee8e6629aa719..c6b2273fa1f3f 100644 --- a/mlir/test/Conversion/IndexToLLVM/index-to-llvm.mlir +++ b/mlir/test/Conversion/IndexToLLVM/index-to-llvm.mlir @@ -22,8 +22,14 @@ func.func @trivial_ops(%a: index, %b: index) { %7 = index.maxs %a, %b // CHECK: llvm.intr.umax %8 = index.maxu %a, %b + // CHECK: llvm.shl + %9 = index.shl %a, %b + // CHECK: llvm.ashr + %10 = index.shrs %a, %b + // CHECK: llvm.lshr + %11 = index.shru %a, %b // CHECK: llvm.mlir.constant(true - %9 = index.bool.constant true + %12 = index.bool.constant true return } diff --git a/mlir/test/Conversion/MathToFuncs/math-to-funcs.mlir b/mlir/test/Conversion/MathToFuncs/math-to-funcs.mlir index af0f49254a055..8bc7f7f52ce57 100644 --- a/mlir/test/Conversion/MathToFuncs/math-to-funcs.mlir +++ b/mlir/test/Conversion/MathToFuncs/math-to-funcs.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -split-input-file -pass-pipeline="convert-math-to-funcs" | FileCheck %s +// RUN: mlir-opt %s -split-input-file -pass-pipeline="builtin.module(convert-math-to-funcs)" | FileCheck %s // ----- diff --git a/mlir/test/Conversion/MathToLLVM/math-to-llvm.mlir b/mlir/test/Conversion/MathToLLVM/math-to-llvm.mlir index 415ba1d9f001c..8c7f031cb97d9 100644 --- a/mlir/test/Conversion/MathToLLVM/math-to-llvm.mlir +++ b/mlir/test/Conversion/MathToLLVM/math-to-llvm.mlir @@ -1,14 +1,14 @@ -// RUN: mlir-opt %s -split-input-file -pass-pipeline="func.func(convert-math-to-llvm)" | FileCheck %s +// RUN: mlir-opt %s -split-input-file -pass-pipeline="builtin.module(func.func(convert-math-to-llvm))" | FileCheck %s // CHECK-LABEL: @ops func.func @ops(%arg0: f32, %arg1: f32, %arg2: i32, %arg3: i32, %arg4: f64) { - // CHECK: = "llvm.intr.exp"(%{{.*}}) : (f32) -> f32 + // CHECK: = llvm.intr.exp(%{{.*}}) : (f32) -> f32 %0 = math.exp %arg0 : f32 - // CHECK: = "llvm.intr.exp2"(%{{.*}}) : (f32) -> f32 + // CHECK: = llvm.intr.exp2(%{{.*}}) : (f32) -> f32 %1 = math.exp2 %arg0 : f32 - // CHECK: = "llvm.intr.sqrt"(%{{.*}}) : (f32) -> f32 + // CHECK: = llvm.intr.sqrt(%{{.*}}) : (f32) -> f32 %2 = math.sqrt %arg0 : f32 - // CHECK: = "llvm.intr.sqrt"(%{{.*}}) : (f64) -> f64 + // CHECK: = llvm.intr.sqrt(%{{.*}}) : (f64) -> f64 %3 = math.sqrt %arg4 : f64 func.return } @@ -29,19 +29,31 @@ func.func @absi(%arg0: i32) -> i32 { func.func @log1p(%arg0 : f32) { // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1.000000e+00 : f32) : f32 // CHECK: %[[ADD:.*]] = llvm.fadd %[[ONE]], %arg0 : f32 - // CHECK: %[[LOG:.*]] = "llvm.intr.log"(%[[ADD]]) : (f32) -> f32 + // CHECK: %[[LOG:.*]] = llvm.intr.log(%[[ADD]]) : (f32) -> f32 %0 = math.log1p %arg0 : f32 func.return } // ----- +// CHECK-LABEL: func @log1p_fmf( +// CHECK-SAME: f32 +func.func @log1p_fmf(%arg0 : f32) { + // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1.000000e+00 : f32) : f32 + // CHECK: %[[ADD:.*]] = llvm.fadd %[[ONE]], %arg0 {fastmathFlags = #llvm.fastmath} : f32 + // CHECK: %[[LOG:.*]] = llvm.intr.log(%[[ADD]]) {fastmathFlags = #llvm.fastmath} : (f32) -> f32 + %0 = math.log1p %arg0 fastmath : f32 + func.return +} + +// ----- + // CHECK-LABEL: func @log1p_2dvector( func.func @log1p_2dvector(%arg0 : vector<4x3xf32>) { // CHECK: %[[EXTRACT:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.array<4 x vector<3xf32>> // CHECK: %[[ONE:.*]] = llvm.mlir.constant(dense<1.000000e+00> : vector<3xf32>) : vector<3xf32> // CHECK: %[[ADD:.*]] = llvm.fadd %[[ONE]], %[[EXTRACT]] : vector<3xf32> - // CHECK: %[[LOG:.*]] = "llvm.intr.log"(%[[ADD]]) : (vector<3xf32>) -> vector<3xf32> + // CHECK: %[[LOG:.*]] = llvm.intr.log(%[[ADD]]) : (vector<3xf32>) -> vector<3xf32> // CHECK: %[[INSERT:.*]] = llvm.insertvalue %[[LOG]], %{{.*}}[0] : !llvm.array<4 x vector<3xf32>> %0 = math.log1p %arg0 : vector<4x3xf32> func.return @@ -49,11 +61,24 @@ func.func @log1p_2dvector(%arg0 : vector<4x3xf32>) { // ----- +// CHECK-LABEL: func @log1p_2dvector_fmf( +func.func @log1p_2dvector_fmf(%arg0 : vector<4x3xf32>) { + // CHECK: %[[EXTRACT:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.array<4 x vector<3xf32>> + // CHECK: %[[ONE:.*]] = llvm.mlir.constant(dense<1.000000e+00> : vector<3xf32>) : vector<3xf32> + // CHECK: %[[ADD:.*]] = llvm.fadd %[[ONE]], %[[EXTRACT]] {fastmathFlags = #llvm.fastmath} : vector<3xf32> + // CHECK: %[[LOG:.*]] = llvm.intr.log(%[[ADD]]) {fastmathFlags = #llvm.fastmath} : (vector<3xf32>) -> vector<3xf32> + // CHECK: %[[INSERT:.*]] = llvm.insertvalue %[[LOG]], %{{.*}}[0] : !llvm.array<4 x vector<3xf32>> + %0 = math.log1p %arg0 fastmath : vector<4x3xf32> + func.return +} + +// ----- + // CHECK-LABEL: func @expm1( // CHECK-SAME: f32 func.func @expm1(%arg0 : f32) { // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1.000000e+00 : f32) : f32 - // CHECK: %[[EXP:.*]] = "llvm.intr.exp"(%arg0) : (f32) -> f32 + // CHECK: %[[EXP:.*]] = llvm.intr.exp(%arg0) : (f32) -> f32 // CHECK: %[[SUB:.*]] = llvm.fsub %[[EXP]], %[[ONE]] : f32 %0 = math.expm1 %arg0 : f32 func.return @@ -61,11 +86,47 @@ func.func @expm1(%arg0 : f32) { // ----- +// CHECK-LABEL: func @expm1_fmf( +// CHECK-SAME: f32 +func.func @expm1_fmf(%arg0 : f32) { + // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1.000000e+00 : f32) : f32 + // CHECK: %[[EXP:.*]] = llvm.intr.exp(%arg0) {fastmathFlags = #llvm.fastmath} : (f32) -> f32 + // CHECK: %[[SUB:.*]] = llvm.fsub %[[EXP]], %[[ONE]] {fastmathFlags = #llvm.fastmath} : f32 + %0 = math.expm1 %arg0 fastmath : f32 + func.return +} + +// ----- + +// CHECK-LABEL: func @expm1_vector( +// CHECK-SAME: vector<4xf32> +func.func @expm1_vector(%arg0 : vector<4xf32>) { + // CHECK: %[[ONE:.*]] = llvm.mlir.constant(dense<1.000000e+00> : vector<4xf32>) : vector<4xf32> + // CHECK: %[[EXP:.*]] = llvm.intr.exp(%arg0) : (vector<4xf32>) -> vector<4xf32> + // CHECK: %[[SUB:.*]] = llvm.fsub %[[EXP]], %[[ONE]] : vector<4xf32> + %0 = math.expm1 %arg0 : vector<4xf32> + func.return +} + +// ----- + +// CHECK-LABEL: func @expm1_vector_fmf( +// CHECK-SAME: vector<4xf32> +func.func @expm1_vector_fmf(%arg0 : vector<4xf32>) { + // CHECK: %[[ONE:.*]] = llvm.mlir.constant(dense<1.000000e+00> : vector<4xf32>) : vector<4xf32> + // CHECK: %[[EXP:.*]] = llvm.intr.exp(%arg0) {fastmathFlags = #llvm.fastmath} : (vector<4xf32>) -> vector<4xf32> + // CHECK: %[[SUB:.*]] = llvm.fsub %[[EXP]], %[[ONE]] {fastmathFlags = #llvm.fastmath} : vector<4xf32> + %0 = math.expm1 %arg0 fastmath : vector<4xf32> + func.return +} + +// ----- + // CHECK-LABEL: func @rsqrt( // CHECK-SAME: f32 func.func @rsqrt(%arg0 : f32) { // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1.000000e+00 : f32) : f32 - // CHECK: %[[SQRT:.*]] = "llvm.intr.sqrt"(%arg0) : (f32) -> f32 + // CHECK: %[[SQRT:.*]] = llvm.intr.sqrt(%arg0) : (f32) -> f32 // CHECK: %[[DIV:.*]] = llvm.fdiv %[[ONE]], %[[SQRT]] : f32 %0 = math.rsqrt %arg0 : f32 func.return @@ -76,7 +137,7 @@ func.func @rsqrt(%arg0 : f32) { // CHECK-LABEL: func @sine( // CHECK-SAME: f32 func.func @sine(%arg0 : f32) { - // CHECK: "llvm.intr.sin"(%arg0) : (f32) -> f32 + // CHECK: llvm.intr.sin(%arg0) : (f32) -> f32 %0 = math.sin %arg0 : f32 func.return } @@ -119,7 +180,7 @@ func.func @cttz_vec(%arg0 : vector<4xi32>) { // CHECK-LABEL: func @ctpop( // CHECK-SAME: i32 func.func @ctpop(%arg0 : i32) { - // CHECK: "llvm.intr.ctpop"(%arg0) : (i32) -> i32 + // CHECK: llvm.intr.ctpop(%arg0) : (i32) -> i32 %0 = math.ctpop %arg0 : i32 func.return } @@ -129,7 +190,7 @@ func.func @ctpop(%arg0 : i32) { // CHECK-LABEL: func @ctpop_vector( // CHECK-SAME: vector<3xi32> func.func @ctpop_vector(%arg0 : vector<3xi32>) { - // CHECK: "llvm.intr.ctpop"(%arg0) : (vector<3xi32>) -> vector<3xi32> + // CHECK: llvm.intr.ctpop(%arg0) : (vector<3xi32>) -> vector<3xi32> %0 = math.ctpop %arg0 : vector<3xi32> func.return } @@ -140,7 +201,7 @@ func.func @ctpop_vector(%arg0 : vector<3xi32>) { // CHECK-SAME: f64 func.func @rsqrt_double(%arg0 : f64) { // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1.000000e+00 : f64) : f64 - // CHECK: %[[SQRT:.*]] = "llvm.intr.sqrt"(%arg0) : (f64) -> f64 + // CHECK: %[[SQRT:.*]] = llvm.intr.sqrt(%arg0) : (f64) -> f64 // CHECK: %[[DIV:.*]] = llvm.fdiv %[[ONE]], %[[SQRT]] : f64 %0 = math.rsqrt %arg0 : f64 func.return @@ -148,11 +209,23 @@ func.func @rsqrt_double(%arg0 : f64) { // ----- +// CHECK-LABEL: func @rsqrt_double_fmf( +// CHECK-SAME: f64 +func.func @rsqrt_double_fmf(%arg0 : f64) { + // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1.000000e+00 : f64) : f64 + // CHECK: %[[SQRT:.*]] = llvm.intr.sqrt(%arg0) {fastmathFlags = #llvm.fastmath} : (f64) -> f64 + // CHECK: %[[DIV:.*]] = llvm.fdiv %[[ONE]], %[[SQRT]] {fastmathFlags = #llvm.fastmath} : f64 + %0 = math.rsqrt %arg0 fastmath : f64 + func.return +} + +// ----- + // CHECK-LABEL: func @rsqrt_vector( // CHECK-SAME: vector<4xf32> func.func @rsqrt_vector(%arg0 : vector<4xf32>) { // CHECK: %[[ONE:.*]] = llvm.mlir.constant(dense<1.000000e+00> : vector<4xf32>) : vector<4xf32> - // CHECK: %[[SQRT:.*]] = "llvm.intr.sqrt"(%arg0) : (vector<4xf32>) -> vector<4xf32> + // CHECK: %[[SQRT:.*]] = llvm.intr.sqrt(%arg0) : (vector<4xf32>) -> vector<4xf32> // CHECK: %[[DIV:.*]] = llvm.fdiv %[[ONE]], %[[SQRT]] : vector<4xf32> %0 = math.rsqrt %arg0 : vector<4xf32> func.return @@ -160,11 +233,23 @@ func.func @rsqrt_vector(%arg0 : vector<4xf32>) { // ----- +// CHECK-LABEL: func @rsqrt_vector_fmf( +// CHECK-SAME: vector<4xf32> +func.func @rsqrt_vector_fmf(%arg0 : vector<4xf32>) { + // CHECK: %[[ONE:.*]] = llvm.mlir.constant(dense<1.000000e+00> : vector<4xf32>) : vector<4xf32> + // CHECK: %[[SQRT:.*]] = llvm.intr.sqrt(%arg0) {fastmathFlags = #llvm.fastmath} : (vector<4xf32>) -> vector<4xf32> + // CHECK: %[[DIV:.*]] = llvm.fdiv %[[ONE]], %[[SQRT]] {fastmathFlags = #llvm.fastmath} : vector<4xf32> + %0 = math.rsqrt %arg0 fastmath : vector<4xf32> + func.return +} + +// ----- + // CHECK-LABEL: func @rsqrt_multidim_vector( func.func @rsqrt_multidim_vector(%arg0 : vector<4x3xf32>) { // CHECK: %[[EXTRACT:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.array<4 x vector<3xf32>> // CHECK: %[[ONE:.*]] = llvm.mlir.constant(dense<1.000000e+00> : vector<3xf32>) : vector<3xf32> - // CHECK: %[[SQRT:.*]] = "llvm.intr.sqrt"(%[[EXTRACT]]) : (vector<3xf32>) -> vector<3xf32> + // CHECK: %[[SQRT:.*]] = llvm.intr.sqrt(%[[EXTRACT]]) : (vector<3xf32>) -> vector<3xf32> // CHECK: %[[DIV:.*]] = llvm.fdiv %[[ONE]], %[[SQRT]] : vector<3xf32> // CHECK: %[[INSERT:.*]] = llvm.insertvalue %[[DIV]], %{{.*}}[0] : !llvm.array<4 x vector<3xf32>> %0 = math.rsqrt %arg0 : vector<4x3xf32> @@ -176,7 +261,7 @@ func.func @rsqrt_multidim_vector(%arg0 : vector<4x3xf32>) { // CHECK-LABEL: func @powf( // CHECK-SAME: f64 func.func @powf(%arg0 : f64) { - // CHECK: %[[POWF:.*]] = "llvm.intr.pow"(%arg0, %arg0) : (f64, f64) -> f64 + // CHECK: %[[POWF:.*]] = llvm.intr.pow(%arg0, %arg0) : (f64, f64) -> f64 %0 = math.powf %arg0, %arg0 : f64 func.return } @@ -186,7 +271,7 @@ func.func @powf(%arg0 : f64) { // CHECK-LABEL: func @round( // CHECK-SAME: f32 func.func @round(%arg0 : f32) { - // CHECK: "llvm.intr.round"(%arg0) : (f32) -> f32 + // CHECK: llvm.intr.round(%arg0) : (f32) -> f32 %0 = math.round %arg0 : f32 func.return } @@ -196,7 +281,7 @@ func.func @round(%arg0 : f32) { // CHECK-LABEL: func @roundeven( // CHECK-SAME: f32 func.func @roundeven(%arg0 : f32) { - // CHECK: "llvm.intr.roundeven"(%arg0) : (f32) -> f32 + // CHECK: llvm.intr.roundeven(%arg0) : (f32) -> f32 %0 = math.roundeven %arg0 : f32 func.return } @@ -206,7 +291,23 @@ func.func @roundeven(%arg0 : f32) { // CHECK-LABEL: func @trunc( // CHECK-SAME: f32 func.func @trunc(%arg0 : f32) { - // CHECK: "llvm.intr.trunc"(%arg0) : (f32) -> f32 + // CHECK: llvm.intr.trunc(%arg0) : (f32) -> f32 %0 = math.trunc %arg0 : f32 func.return } + +// ----- + +// CHECK-LABEL: func @fastmath( +// CHECK-SAME: f32 +func.func @fastmath(%arg0 : f32, %arg1 : vector<4xf32>) { + // CHECK: llvm.intr.trunc(%arg0) {fastmathFlags = #llvm.fastmath} : (f32) -> f32 + %0 = math.trunc %arg0 fastmath : f32 + // CHECK: llvm.intr.pow(%arg0, %arg0) {fastmathFlags = #llvm.fastmath} : (f32, f32) -> f32 + %1 = math.powf %arg0, %arg0 fastmath : f32 + // CHECK: llvm.intr.sqrt(%arg0) : (f32) -> f32 + %2 = math.sqrt %arg0 fastmath : f32 + // CHECK: llvm.intr.fma(%arg0, %arg0, %arg0) {fastmathFlags = #llvm.fastmath} : (f32, f32, f32) -> f32 + %3 = math.fma %arg0, %arg0, %arg0 fastmath : f32 + func.return +} diff --git a/mlir/test/Conversion/MathToSPIRV/math-to-gl-spirv.mlir b/mlir/test/Conversion/MathToSPIRV/math-to-gl-spirv.mlir index df39036277ee6..a29b18b6812b9 100644 --- a/mlir/test/Conversion/MathToSPIRV/math-to-gl-spirv.mlir +++ b/mlir/test/Conversion/MathToSPIRV/math-to-gl-spirv.mlir @@ -148,7 +148,7 @@ func.func @powf_scalar(%lhs: f32, %rhs: f32) -> f32 { // CHECK-LABEL: @powf_vector func.func @powf_vector(%lhs: vector<4xf32>, %rhs: vector<4xf32>) -> vector<4xf32> { // CHECK: spirv.FOrdLessThan - // CHEKC: spirv.GL.FAbs + // CHECK: spirv.GL.FAbs // CHECK: spirv.GL.Pow %{{.*}}: vector<4xf32> // CHECK: spirv.FNegate // CHECK: spirv.Select diff --git a/mlir/test/Conversion/MemRefToLLVM/generic-functions.mlir b/mlir/test/Conversion/MemRefToLLVM/generic-functions.mlir index 3d98dbcf8f29d..5aa844c5afc5e 100644 --- a/mlir/test/Conversion/MemRefToLLVM/generic-functions.mlir +++ b/mlir/test/Conversion/MemRefToLLVM/generic-functions.mlir @@ -1,7 +1,7 @@ -// RUN: mlir-opt -pass-pipeline="convert-memref-to-llvm{use-generic-functions=1}" -split-input-file %s \ +// RUN: mlir-opt -pass-pipeline="builtin.module(convert-memref-to-llvm{use-generic-functions=1})" -split-input-file %s \ // RUN: | FileCheck %s --check-prefix="CHECK-NOTALIGNED" -// RUN: mlir-opt -pass-pipeline="convert-memref-to-llvm{use-generic-functions=1 use-aligned-alloc=1}" -split-input-file %s \ +// RUN: mlir-opt -pass-pipeline="builtin.module(convert-memref-to-llvm{use-generic-functions=1 use-aligned-alloc=1})" -split-input-file %s \ // RUN: | FileCheck %s --check-prefix="CHECK-ALIGNED" // CHECK-LABEL: func @alloc() diff --git a/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir b/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir index 12fd3f21e3da0..344e06db09b62 100644 --- a/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir +++ b/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir @@ -1169,6 +1169,12 @@ func.func @extract_aligned_pointer_as_index(%m: memref) -> index { // CHECK-SAME: %[[ARG:.*]]: memref // CHECK: %[[MEM_DESC:.*]] = builtin.unrealized_conversion_cast %[[ARG]] : memref> to !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[BASE:.*]] = llvm.extractvalue %[[MEM_DESC]][0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[ALIGNED_BASE:.*]] = llvm.extractvalue %[[MEM_DESC]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64)> +// CHECK: %[[DESC0:.*]] = llvm.insertvalue %[[BASE]], %[[DESC]][0] : !llvm.struct<(ptr, ptr, i64)> +// CHECK: %[[DESC1:.*]] = llvm.insertvalue %[[ALIGNED_BASE]], %[[DESC0]][1] : !llvm.struct<(ptr, ptr, i64)> +// CHECK: %[[OFF0:.*]] = llvm.mlir.constant(0 : index) : i64 +// CHECK: %[[BASE_BUFFER_DESC:.*]] = llvm.insertvalue %[[OFF0]], %[[DESC1]][2] : !llvm.struct<(ptr, ptr, i64)> // CHECK: %[[OFFSET:.*]] = llvm.extractvalue %[[MEM_DESC]][2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[SIZE0:.*]] = llvm.extractvalue %[[MEM_DESC]][3, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[SIZE1:.*]] = llvm.extractvalue %[[MEM_DESC]][3, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> diff --git a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir index 0a9f8d5611903..c95b2fca9dffd 100644 --- a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir +++ b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir @@ -313,3 +313,119 @@ func.func @async_cp_zfill( return } + +// ----- + +// CHECK-LABEL: func @mma_sp_sync_f16_16832( +func.func @mma_sp_sync_f16_16832(%arg0: vector<4x2xf16>, + %arg1: vector<4x2xf16>, + %arg2: vector<2x2xf16>, + %arg3: vector<2xi16>) -> vector<2x2xf16> { + // CHECK: llvm.extractvalue %{{.*}}[0] : !llvm.array<4 x vector<2xf16>> + // CHECK: llvm.extractvalue %{{.*}}[1] : !llvm.array<4 x vector<2xf16>> + // CHECK: llvm.extractvalue %{{.*}}[2] : !llvm.array<4 x vector<2xf16>> + // CHECK: llvm.extractvalue %{{.*}}[3] : !llvm.array<4 x vector<2xf16>> + + // CHECK: llvm.extractvalue %{{.*}}[0] : !llvm.array<4 x vector<2xf16>> + // CHECK: llvm.extractvalue %{{.*}}[1] : !llvm.array<4 x vector<2xf16>> + // CHECK: llvm.extractvalue %{{.*}}[2] : !llvm.array<4 x vector<2xf16>> + // CHECK: llvm.extractvalue %{{.*}}[3] : !llvm.array<4 x vector<2xf16>> + + // CHECK: llvm.extractvalue %{{.*}}[0] : !llvm.array<2 x vector<2xf16>> + // CHECK: llvm.extractvalue %{{.*}}[1] : !llvm.array<2 x vector<2xf16>> + + // CHECK-NOT llvm.extractvalue + + // CHECK: %[[sparseMetadata:.+]] = llvm.bitcast %{{.+}} : vector<2xi16> to i32 + // CHECK: %[[sparseSelector:.+]] = llvm.mlir.constant(0 : i32) : i32 + + // CHECK: %[[d:.+]] = llvm.inline_asm has_side_effects asm_dialect = att + // CHECK-SAME: "mma.sp.sync.aligned.m16n8k32.row.col.f16.f16.f16.f16 {$0,$1},{$2,$3,$4,$5},{$6,$7,$8,$9},{$10,$11},$12,$13;" + // CHECK-SAME: "=r,=r,r,r,r,r,r,r,r,r,r,r,r,r" + // CHECK-SAME: %[[sparseMetadata]], %[[sparseSelector]] : + // CHECK-SAME: -> !llvm.struct<(vector<2xf16>, vector<2xf16>)> + + %d = nvgpu.mma.sp.sync(%arg0, %arg1, %arg2) metadata(%arg3) {mmaShape = [16, 8, 32]} : + (vector<4x2xf16>, vector<4x2xf16>, vector<2x2xf16>) -> vector<2x2xf16> + + // CHECK-DAG: llvm.extractvalue %[[d]][0] : !llvm.struct<(vector<2xf16>, vector<2xf16>)> + // CHECK-DAG: llvm.extractvalue %[[d]][1] : !llvm.struct<(vector<2xf16>, vector<2xf16>)> + // CHECK: llvm.mlir.undef : !llvm.array<2 x vector<2xf16>> + // CHECK: llvm.insertvalue %{{.+}}, %{{.+}}[0] : !llvm.array<2 x vector<2xf16>> + // CHECK: llvm.insertvalue %{{.+}}, %{{.+}}[1] : !llvm.array<2 x vector<2xf16>> + return %d : vector<2x2xf16> +} + +// ----- + +// CHECK-LABEL: func @mma_sp_sync_f16_16816( +func.func @mma_sp_sync_f16_16816(%arg0: vector<2x2xf16>, + %arg1: vector<2x2xf16>, + %arg2: vector<2x2xf16>, + %arg3: vector<2xi16>) -> vector<2x2xf16> { + + // CHECK: llvm.extractvalue %{{.*}}[0] : !llvm.array<2 x vector<2xf16>> + // CHECK: llvm.extractvalue %{{.*}}[1] : !llvm.array<2 x vector<2xf16>> + + // CHECK: llvm.extractvalue %{{.*}}[0] : !llvm.array<2 x vector<2xf16>> + // CHECK: llvm.extractvalue %{{.*}}[1] : !llvm.array<2 x vector<2xf16>> + + // CHECK: llvm.extractvalue %{{.*}}[0] : !llvm.array<2 x vector<2xf16>> + // CHECK: llvm.extractvalue %{{.*}}[1] : !llvm.array<2 x vector<2xf16>> + + // CHECK-NOT llvm.extractvalue + + // CHECK: %[[sparseMetadata:.+]] = llvm.bitcast %{{.+}} : vector<2xi16> to i32 + // CHECK: %[[sparseSelector:.+]] = llvm.mlir.constant(0 : i32) : i32 + + // CHECK: %[[d:.+]] = llvm.inline_asm has_side_effects asm_dialect = att + // CHECK-SAME: "mma.sp.sync.aligned.m16n8k16.row.col.f16.f16.f16.f16 {$0,$1},{$2,$3},{$4,$5},{$6,$7},$8,$9;" + // CHECK-SAME: "=r,=r,r,r,r,r,r,r,r,r" + // CHECK-SAME: %[[sparseMetadata]], %[[sparseSelector]] : + // CHECK-SAME: -> !llvm.struct<(vector<2xf16>, vector<2xf16>)> + + %d = nvgpu.mma.sp.sync(%arg0, %arg1, %arg2) metadata(%arg3) {mmaShape = [16, 8, 16]} : + (vector<2x2xf16>, vector<2x2xf16>, vector<2x2xf16>) -> vector<2x2xf16> + return %d : vector<2x2xf16> +} + +// ----- + +// CHECK-LABEL: func @mma_sp_sync_i8_16864( +func.func @mma_sp_sync_i8_16864(%arg0: vector<4x4xi8>, + %arg1: vector<4x4xi8>, + %arg2: vector<2x2xi32>, + %arg3: vector<2xi16>) -> vector<2x2xi32> { + + // CHECK: llvm.extractvalue %{{.*}}[0] : !llvm.array<4 x vector<4xi8>> + // CHECK: llvm.bitcast %{{.+}} : vector<4xi8> to i32 + // CHECK: llvm.extractvalue %{{.*}}[1] : !llvm.array<4 x vector<4xi8>> + // CHECK: llvm.bitcast %{{.+}} : vector<4xi8> to i32 + // CHECK: llvm.extractvalue %{{.*}}[2] : !llvm.array<4 x vector<4xi8>> + // CHECK: llvm.bitcast %{{.+}} : vector<4xi8> to i32 + // CHECK: llvm.extractvalue %{{.*}}[3] : !llvm.array<4 x vector<4xi8>> + + + // CHECK: llvm.extractvalue %{{.*}}[0] : !llvm.array<4 x vector<4xi8>> + // CHECK: llvm.bitcast %{{.+}} : vector<4xi8> to i32 + // CHECK: llvm.extractvalue %{{.*}}[1] : !llvm.array<4 x vector<4xi8>> + // CHECK: llvm.bitcast %{{.+}} : vector<4xi8> to i32 + + // CHECK: llvm.extractvalue %{{.*}}[{{.*}}] : !llvm.array<2 x vector<2xi32>> + // CHECK: llvm.extractvalue %{{.*}}[{{.*}}] : !llvm.array<2 x vector<2xi32>> + + // CHECK-NOT llvm.extractvalue + + // CHECK: %[[sparseMetadata:.+]] = llvm.bitcast %{{.+}} : vector<2xi16> to i32 + // CHECK: %[[sparseSelector:.+]] = llvm.mlir.constant(0 : i32) : i32 + + // CHECK: %[[d:.+]] = llvm.inline_asm has_side_effects asm_dialect = att + // CHECK-SAME: "mma.sp.sync.aligned.m16n8k64.row.col.satfinite.s32.s8.s8.s32 + // CHECK-SAME: "=r,=r,=r,=r,r,r,r,r,r,r,r,r,r,r,r,r,r,r" + // CHECK-SAME: %[[sparseMetadata]], %[[sparseSelector]] : + // CHECK-SAME: -> !llvm.struct<(i32, i32, i32, i32) + + %d = nvgpu.mma.sp.sync(%arg0, %arg1, %arg2) metadata(%arg3) {mmaShape = [16, 8, 64]} : + (vector<4x4xi8>, vector<4x4xi8>, vector<2x2xi32>) -> vector<2x2xi32> + return %d : vector<2x2xi32> +} diff --git a/mlir/test/Conversion/SCFToGPU/no_blocks_no_threads.mlir b/mlir/test/Conversion/SCFToGPU/no_blocks_no_threads.mlir index aaa063d9fc4b0..a058365a104a1 100644 --- a/mlir/test/Conversion/SCFToGPU/no_blocks_no_threads.mlir +++ b/mlir/test/Conversion/SCFToGPU/no_blocks_no_threads.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt -pass-pipeline="func.func(convert-affine-for-to-gpu{gpu-block-dims=0 gpu-thread-dims=1})" %s | FileCheck --check-prefix=CHECK-THREADS %s -// RUN: mlir-opt -pass-pipeline="func.func(convert-affine-for-to-gpu{gpu-block-dims=1 gpu-thread-dims=0})" %s | FileCheck --check-prefix=CHECK-BLOCKS %s +// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-affine-for-to-gpu{gpu-block-dims=0 gpu-thread-dims=1}))" %s | FileCheck --check-prefix=CHECK-THREADS %s +// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-affine-for-to-gpu{gpu-block-dims=1 gpu-thread-dims=0}))" %s | FileCheck --check-prefix=CHECK-BLOCKS %s // CHECK-THREADS-LABEL: @one_d_loop // CHECK-BLOCKS-LABEL: @one_d_loop diff --git a/mlir/test/Conversion/SCFToGPU/step_one.mlir b/mlir/test/Conversion/SCFToGPU/step_one.mlir index c4668bfeba747..be6fadfbd0ad3 100644 --- a/mlir/test/Conversion/SCFToGPU/step_one.mlir +++ b/mlir/test/Conversion/SCFToGPU/step_one.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt -pass-pipeline="func.func(convert-affine-for-to-gpu{gpu-block-dims=1 gpu-thread-dims=1})" %s | FileCheck --check-prefix=CHECK-11 %s -// RUN: mlir-opt -pass-pipeline="func.func(convert-affine-for-to-gpu{gpu-block-dims=2 gpu-thread-dims=2})" %s | FileCheck --check-prefix=CHECK-22 %s +// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-affine-for-to-gpu{gpu-block-dims=1 gpu-thread-dims=1}))" %s | FileCheck --check-prefix=CHECK-11 %s +// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-affine-for-to-gpu{gpu-block-dims=2 gpu-thread-dims=2}))" %s | FileCheck --check-prefix=CHECK-22 %s // CHECK-11-LABEL: @step_1 // CHECK-22-LABEL: @step_1 diff --git a/mlir/test/Conversion/SCFToGPU/step_positive.mlir b/mlir/test/Conversion/SCFToGPU/step_positive.mlir index 65d16046382d6..97fd7d598621b 100644 --- a/mlir/test/Conversion/SCFToGPU/step_positive.mlir +++ b/mlir/test/Conversion/SCFToGPU/step_positive.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -pass-pipeline="func.func(convert-affine-for-to-gpu{gpu-block-dims=1 gpu-thread-dims=1})" %s | FileCheck %s +// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-affine-for-to-gpu{gpu-block-dims=1 gpu-thread-dims=1}))" %s | FileCheck %s // CHECK-LABEL: @step_var func.func @step_var(%A : memref, %B : memref) { diff --git a/mlir/test/Conversion/SPIRVToLLVM/bitwise-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/bitwise-ops-to-llvm.mlir index 7c25cf579f0dc..a0afe0dafcaa2 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/bitwise-ops-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/bitwise-ops-to-llvm.mlir @@ -6,14 +6,14 @@ // CHECK-LABEL: @bitcount_scalar spirv.func @bitcount_scalar(%arg0: i16) "None" { - // CHECK: "llvm.intr.ctpop"(%{{.*}}) : (i16) -> i16 + // CHECK: llvm.intr.ctpop(%{{.*}}) : (i16) -> i16 %0 = spirv.BitCount %arg0: i16 spirv.Return } // CHECK-LABEL: @bitcount_vector spirv.func @bitcount_vector(%arg0: vector<3xi32>) "None" { - // CHECK: "llvm.intr.ctpop"(%{{.*}}) : (vector<3xi32>) -> vector<3xi32> + // CHECK: llvm.intr.ctpop(%{{.*}}) : (vector<3xi32>) -> vector<3xi32> %0 = spirv.BitCount %arg0: vector<3xi32> spirv.Return } @@ -24,14 +24,14 @@ spirv.func @bitcount_vector(%arg0: vector<3xi32>) "None" { // CHECK-LABEL: @bitreverse_scalar spirv.func @bitreverse_scalar(%arg0: i64) "None" { - // CHECK: "llvm.intr.bitreverse"(%{{.*}}) : (i64) -> i64 + // CHECK: llvm.intr.bitreverse(%{{.*}}) : (i64) -> i64 %0 = spirv.BitReverse %arg0: i64 spirv.Return } // CHECK-LABEL: @bitreverse_vector spirv.func @bitreverse_vector(%arg0: vector<4xi32>) "None" { - // CHECK: "llvm.intr.bitreverse"(%{{.*}}) : (vector<4xi32>) -> vector<4xi32> + // CHECK: llvm.intr.bitreverse(%{{.*}}) : (vector<4xi32>) -> vector<4xi32> %0 = spirv.BitReverse %arg0: vector<4xi32> spirv.Return } diff --git a/mlir/test/Conversion/SPIRVToLLVM/gl-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/gl-ops-to-llvm.mlir index dffa7dd0f392b..e1936e2fd8abe 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/gl-ops-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/gl-ops-to-llvm.mlir @@ -6,9 +6,9 @@ // CHECK-LABEL: @ceil spirv.func @ceil(%arg0: f32, %arg1: vector<3xf16>) "None" { - // CHECK: "llvm.intr.ceil"(%{{.*}}) : (f32) -> f32 + // CHECK: llvm.intr.ceil(%{{.*}}) : (f32) -> f32 %0 = spirv.GL.Ceil %arg0 : f32 - // CHECK: "llvm.intr.ceil"(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> + // CHECK: llvm.intr.ceil(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> %1 = spirv.GL.Ceil %arg1 : vector<3xf16> spirv.Return } @@ -19,9 +19,9 @@ spirv.func @ceil(%arg0: f32, %arg1: vector<3xf16>) "None" { // CHECK-LABEL: @cos spirv.func @cos(%arg0: f32, %arg1: vector<3xf16>) "None" { - // CHECK: "llvm.intr.cos"(%{{.*}}) : (f32) -> f32 + // CHECK: llvm.intr.cos(%{{.*}}) : (f32) -> f32 %0 = spirv.GL.Cos %arg0 : f32 - // CHECK: "llvm.intr.cos"(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> + // CHECK: llvm.intr.cos(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> %1 = spirv.GL.Cos %arg1 : vector<3xf16> spirv.Return } @@ -32,9 +32,9 @@ spirv.func @cos(%arg0: f32, %arg1: vector<3xf16>) "None" { // CHECK-LABEL: @exp spirv.func @exp(%arg0: f32, %arg1: vector<3xf16>) "None" { - // CHECK: "llvm.intr.exp"(%{{.*}}) : (f32) -> f32 + // CHECK: llvm.intr.exp(%{{.*}}) : (f32) -> f32 %0 = spirv.GL.Exp %arg0 : f32 - // CHECK: "llvm.intr.exp"(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> + // CHECK: llvm.intr.exp(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> %1 = spirv.GL.Exp %arg1 : vector<3xf16> spirv.Return } @@ -45,9 +45,9 @@ spirv.func @exp(%arg0: f32, %arg1: vector<3xf16>) "None" { // CHECK-LABEL: @fabs spirv.func @fabs(%arg0: f32, %arg1: vector<3xf16>) "None" { - // CHECK: "llvm.intr.fabs"(%{{.*}}) : (f32) -> f32 + // CHECK: llvm.intr.fabs(%{{.*}}) : (f32) -> f32 %0 = spirv.GL.FAbs %arg0 : f32 - // CHECK: "llvm.intr.fabs"(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> + // CHECK: llvm.intr.fabs(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> %1 = spirv.GL.FAbs %arg1 : vector<3xf16> spirv.Return } @@ -58,9 +58,9 @@ spirv.func @fabs(%arg0: f32, %arg1: vector<3xf16>) "None" { // CHECK-LABEL: @floor spirv.func @floor(%arg0: f32, %arg1: vector<3xf16>) "None" { - // CHECK: "llvm.intr.floor"(%{{.*}}) : (f32) -> f32 + // CHECK: llvm.intr.floor(%{{.*}}) : (f32) -> f32 %0 = spirv.GL.Floor %arg0 : f32 - // CHECK: "llvm.intr.floor"(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> + // CHECK: llvm.intr.floor(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> %1 = spirv.GL.Floor %arg1 : vector<3xf16> spirv.Return } @@ -71,9 +71,9 @@ spirv.func @floor(%arg0: f32, %arg1: vector<3xf16>) "None" { // CHECK-LABEL: @fmax spirv.func @fmax(%arg0: f32, %arg1: vector<3xf16>) "None" { - // CHECK: "llvm.intr.maxnum"(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 + // CHECK: llvm.intr.maxnum(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 %0 = spirv.GL.FMax %arg0, %arg0 : f32 - // CHECK: "llvm.intr.maxnum"(%{{.*}}, %{{.*}}) : (vector<3xf16>, vector<3xf16>) -> vector<3xf16> + // CHECK: llvm.intr.maxnum(%{{.*}}, %{{.*}}) : (vector<3xf16>, vector<3xf16>) -> vector<3xf16> %1 = spirv.GL.FMax %arg1, %arg1 : vector<3xf16> spirv.Return } @@ -84,9 +84,9 @@ spirv.func @fmax(%arg0: f32, %arg1: vector<3xf16>) "None" { // CHECK-LABEL: @fmin spirv.func @fmin(%arg0: f32, %arg1: vector<3xf16>) "None" { - // CHECK: "llvm.intr.minnum"(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 + // CHECK: llvm.intr.minnum(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 %0 = spirv.GL.FMin %arg0, %arg0 : f32 - // CHECK: "llvm.intr.minnum"(%{{.*}}, %{{.*}}) : (vector<3xf16>, vector<3xf16>) -> vector<3xf16> + // CHECK: llvm.intr.minnum(%{{.*}}, %{{.*}}) : (vector<3xf16>, vector<3xf16>) -> vector<3xf16> %1 = spirv.GL.FMin %arg1, %arg1 : vector<3xf16> spirv.Return } @@ -97,9 +97,9 @@ spirv.func @fmin(%arg0: f32, %arg1: vector<3xf16>) "None" { // CHECK-LABEL: @log spirv.func @log(%arg0: f32, %arg1: vector<3xf16>) "None" { - // CHECK: "llvm.intr.log"(%{{.*}}) : (f32) -> f32 + // CHECK: llvm.intr.log(%{{.*}}) : (f32) -> f32 %0 = spirv.GL.Log %arg0 : f32 - // CHECK: "llvm.intr.log"(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> + // CHECK: llvm.intr.log(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> %1 = spirv.GL.Log %arg1 : vector<3xf16> spirv.Return } @@ -110,9 +110,9 @@ spirv.func @log(%arg0: f32, %arg1: vector<3xf16>) "None" { // CHECK-LABEL: @sin spirv.func @sin(%arg0: f32, %arg1: vector<3xf16>) "None" { - // CHECK: "llvm.intr.sin"(%{{.*}}) : (f32) -> f32 + // CHECK: llvm.intr.sin(%{{.*}}) : (f32) -> f32 %0 = spirv.GL.Sin %arg0 : f32 - // CHECK: "llvm.intr.sin"(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> + // CHECK: llvm.intr.sin(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> %1 = spirv.GL.Sin %arg1 : vector<3xf16> spirv.Return } @@ -123,9 +123,9 @@ spirv.func @sin(%arg0: f32, %arg1: vector<3xf16>) "None" { // CHECK-LABEL: @smax spirv.func @smax(%arg0: i16, %arg1: vector<3xi32>) "None" { - // CHECK: "llvm.intr.smax"(%{{.*}}, %{{.*}}) : (i16, i16) -> i16 + // CHECK: llvm.intr.smax(%{{.*}}, %{{.*}}) : (i16, i16) -> i16 %0 = spirv.GL.SMax %arg0, %arg0 : i16 - // CHECK: "llvm.intr.smax"(%{{.*}}, %{{.*}}) : (vector<3xi32>, vector<3xi32>) -> vector<3xi32> + // CHECK: llvm.intr.smax(%{{.*}}, %{{.*}}) : (vector<3xi32>, vector<3xi32>) -> vector<3xi32> %1 = spirv.GL.SMax %arg1, %arg1 : vector<3xi32> spirv.Return } @@ -136,9 +136,9 @@ spirv.func @smax(%arg0: i16, %arg1: vector<3xi32>) "None" { // CHECK-LABEL: @smin spirv.func @smin(%arg0: i16, %arg1: vector<3xi32>) "None" { - // CHECK: "llvm.intr.smin"(%{{.*}}, %{{.*}}) : (i16, i16) -> i16 + // CHECK: llvm.intr.smin(%{{.*}}, %{{.*}}) : (i16, i16) -> i16 %0 = spirv.GL.SMin %arg0, %arg0 : i16 - // CHECK: "llvm.intr.smin"(%{{.*}}, %{{.*}}) : (vector<3xi32>, vector<3xi32>) -> vector<3xi32> + // CHECK: llvm.intr.smin(%{{.*}}, %{{.*}}) : (vector<3xi32>, vector<3xi32>) -> vector<3xi32> %1 = spirv.GL.SMin %arg1, %arg1 : vector<3xi32> spirv.Return } @@ -149,9 +149,9 @@ spirv.func @smin(%arg0: i16, %arg1: vector<3xi32>) "None" { // CHECK-LABEL: @sqrt spirv.func @sqrt(%arg0: f32, %arg1: vector<3xf16>) "None" { - // CHECK: "llvm.intr.sqrt"(%{{.*}}) : (f32) -> f32 + // CHECK: llvm.intr.sqrt(%{{.*}}) : (f32) -> f32 %0 = spirv.GL.Sqrt %arg0 : f32 - // CHECK: "llvm.intr.sqrt"(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> + // CHECK: llvm.intr.sqrt(%{{.*}}) : (vector<3xf16>) -> vector<3xf16> %1 = spirv.GL.Sqrt %arg1 : vector<3xf16> spirv.Return } @@ -162,8 +162,8 @@ spirv.func @sqrt(%arg0: f32, %arg1: vector<3xf16>) "None" { // CHECK-LABEL: @tan spirv.func @tan(%arg0: f32) "None" { - // CHECK: %[[SIN:.*]] = "llvm.intr.sin"(%{{.*}}) : (f32) -> f32 - // CHECK: %[[COS:.*]] = "llvm.intr.cos"(%{{.*}}) : (f32) -> f32 + // CHECK: %[[SIN:.*]] = llvm.intr.sin(%{{.*}}) : (f32) -> f32 + // CHECK: %[[COS:.*]] = llvm.intr.cos(%{{.*}}) : (f32) -> f32 // CHECK: llvm.fdiv %[[SIN]], %[[COS]] : f32 %0 = spirv.GL.Tan %arg0 : f32 spirv.Return @@ -177,7 +177,7 @@ spirv.func @tan(%arg0: f32) "None" { spirv.func @tanh(%arg0: f32) "None" { // CHECK: %[[TWO:.*]] = llvm.mlir.constant(2.000000e+00 : f32) : f32 // CHECK: %[[X2:.*]] = llvm.fmul %[[TWO]], %{{.*}} : f32 - // CHECK: %[[EXP:.*]] = "llvm.intr.exp"(%[[X2]]) : (f32) -> f32 + // CHECK: %[[EXP:.*]] = llvm.intr.exp(%[[X2]]) : (f32) -> f32 // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1.000000e+00 : f32) : f32 // CHECK: %[[T0:.*]] = llvm.fsub %[[EXP]], %[[ONE]] : f32 // CHECK: %[[T1:.*]] = llvm.fadd %[[EXP]], %[[ONE]] : f32 @@ -193,7 +193,7 @@ spirv.func @tanh(%arg0: f32) "None" { // CHECK-LABEL: @inverse_sqrt spirv.func @inverse_sqrt(%arg0: f32) "None" { // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1.000000e+00 : f32) : f32 - // CHECK: %[[SQRT:.*]] = "llvm.intr.sqrt"(%{{.*}}) : (f32) -> f32 + // CHECK: %[[SQRT:.*]] = llvm.intr.sqrt(%{{.*}}) : (f32) -> f32 // CHECK: llvm.fdiv %[[ONE]], %[[SQRT]] : f32 %0 = spirv.GL.InverseSqrt %arg0 : f32 spirv.Return diff --git a/mlir/test/Conversion/ShapeToStandard/convert-shape-constraints.mlir b/mlir/test/Conversion/ShapeToStandard/convert-shape-constraints.mlir index 0c6aca5e6a5d7..7fa37b869473c 100644 --- a/mlir/test/Conversion/ShapeToStandard/convert-shape-constraints.mlir +++ b/mlir/test/Conversion/ShapeToStandard/convert-shape-constraints.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -pass-pipeline="func.func(convert-shape-constraints)" <%s | FileCheck %s +// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-shape-constraints))" <%s | FileCheck %s // There's not very much useful to check here other than pasting the output. // CHECK-LABEL: func @cstr_broadcastable( diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir index 811bf28535e55..df22ee18639fc 100644 --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt --split-input-file -pass-pipeline="func.func(tosa-to-linalg-named)" %s -verify-diagnostics -o -| FileCheck %s +// RUN: mlir-opt --split-input-file -pass-pipeline="builtin.module(func.func(tosa-to-linalg-named))" %s -verify-diagnostics -o -| FileCheck %s // CHECK-LABEL: @matmul func.func @matmul(%arg0: tensor<1x5x3xf32>, %arg1: tensor<1x3x6xf32>) -> (tensor<1x5x6xf32>) { diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-resize.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-resize.mlir index d1506d0c304c0..70722c893ca42 100644 --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-resize.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-resize.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt --split-input-file -pass-pipeline="func.func(tosa-to-linalg)" %s -o -| FileCheck %s +// RUN: mlir-opt --split-input-file -pass-pipeline="builtin.module(func.func(tosa-to-linalg))" %s -o -| FileCheck %s // CHECK: #map = affine_map<(d0, d1, d2, d3) -> (d0, d3)> // CHECK: #map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir index 6c2626dd1ac00..2aeb7c8607719 100644 --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt --split-input-file -pass-pipeline="func.func(tosa-to-linalg)" %s -verify-diagnostics -o -| FileCheck %s +// RUN: mlir-opt --split-input-file -pass-pipeline="builtin.module(func.func(tosa-to-linalg))" %s -verify-diagnostics -o -| FileCheck %s // CHECK: #[[$MAP0:.*]] = affine_map<() -> ()> diff --git a/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops-mma-sync.mlir b/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops-mma-sync.mlir index ae6329c22eff7..6e6d384f82105 100644 --- a/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops-mma-sync.mlir +++ b/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops-mma-sync.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -split-input-file -pass-pipeline="func.func(convert-vector-to-gpu{use-nvgpu=true})" | FileCheck %s +// RUN: mlir-opt %s -split-input-file -pass-pipeline="builtin.module(func.func(convert-vector-to-gpu{use-nvgpu=true}))" | FileCheck %s //######################################################### // INT8 row-row-row @@ -664,4 +664,4 @@ func.func @m16n8k8_tf32_f32_col_col_row(%arg0: memref<20x20xf32, 3>, %arg1: memr // CHECK: vector.store vector.transfer_write %D, %arg2[%c16, %c8] {in_bounds = [true, true]} : vector<16x8xf32>, memref<20x20xf32> return -} \ No newline at end of file +} diff --git a/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir b/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir index b5d08e0817100..afe3d5d229b9c 100644 --- a/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir +++ b/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-gpu)" -canonicalize | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-gpu),canonicalize)" | FileCheck %s #map0 = affine_map<(d0, d1) -> (d1, d0)> #map1 = affine_map<(d0, d1, d2) -> (d0, d2)> diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir index ed4d398780e16..0a4732aecf0fc 100644 --- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir +++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir @@ -404,14 +404,14 @@ func.func @outerproduct_add(%arg0: vector<2xf32>, %arg1: vector<3xf32>, %arg2: v // CHECK: %[[T6Insert:.*]] = llvm.insertelement %[[T5]] // CHECK: %[[T6:.*]] = llvm.shufflevector %[[T6Insert]] // CHECK: %[[T8:.*]] = llvm.extractvalue %[[T7]][0] : !llvm.array<2 x vector<3xf32>> -// CHECK: %[[T9:.*]] = "llvm.intr.fmuladd"(%[[T6]], %[[B]], %[[T8]]) : (vector<3xf32>, vector<3xf32>, vector<3xf32>) -> vector<3xf32> +// CHECK: %[[T9:.*]] = llvm.intr.fmuladd(%[[T6]], %[[B]], %[[T8]]) : (vector<3xf32>, vector<3xf32>, vector<3xf32>) -> vector<3xf32> // CHECK: %[[T11:.*]] = llvm.insertvalue %[[T9]], %[[T10]][0] : !llvm.array<2 x vector<3xf32>> // CHECK: %[[T12:.*]] = llvm.mlir.constant(1 : i64) : i64 // CHECK: %[[T13:.*]] = llvm.extractelement %[[A]]{{\[}}%[[T12]] : i64] : vector<2xf32> // CHECK: %[[T14Insert:.*]] = llvm.insertelement %[[T13]] // CHECK: %[[T14:.*]] = llvm.shufflevector %[[T14Insert]] // CHECK: %[[T16:.*]] = llvm.extractvalue %[[T7]][1] : !llvm.array<2 x vector<3xf32>> -// CHECK: %[[T17:.*]] = "llvm.intr.fmuladd"(%[[T14]], %[[B]], %[[T16]]) : (vector<3xf32>, vector<3xf32>, vector<3xf32>) -> vector<3xf32> +// CHECK: %[[T17:.*]] = llvm.intr.fmuladd(%[[T14]], %[[B]], %[[T16]]) : (vector<3xf32>, vector<3xf32>, vector<3xf32>) -> vector<3xf32> // CHECK: %[[T18:.*]] = llvm.insertvalue %[[T17]], %[[T11]][1] : !llvm.array<2 x vector<3xf32>> // CHECK: %[[T19:.*]] = builtin.unrealized_conversion_cast %[[T18]] : !llvm.array<2 x vector<3xf32>> to vector<2x3xf32> // CHECK: return %[[T19]] : vector<2x3xf32> @@ -1103,29 +1103,29 @@ func.func @vector_fma(%a: vector<8xf32>, %b: vector<2x4xf32>, %c: vector<1x1x1xf // CHECK-SAME: %[[B:.*]]: vector<2x4xf32> // CHECK-SAME: %[[C:.*]]: vector<1x1x1xf32> // CHECK: %[[BL:.*]] = builtin.unrealized_conversion_cast %[[B]] : vector<2x4xf32> to !llvm.array<2 x vector<4xf32>> - // CHECK: "llvm.intr.fmuladd" + // CHECK: llvm.intr.fmuladd // CHECK-SAME: (vector<8xf32>, vector<8xf32>, vector<8xf32>) -> vector<8xf32> %0 = vector.fma %a, %a, %a : vector<8xf32> // CHECK: %[[b00:.*]] = llvm.extractvalue %[[BL]][0] : !llvm.array<2 x vector<4xf32>> // CHECK: %[[b01:.*]] = llvm.extractvalue %[[BL]][0] : !llvm.array<2 x vector<4xf32>> // CHECK: %[[b02:.*]] = llvm.extractvalue %[[BL]][0] : !llvm.array<2 x vector<4xf32>> - // CHECK: %[[B0:.*]] = "llvm.intr.fmuladd"(%[[b00]], %[[b01]], %[[b02]]) : + // CHECK: %[[B0:.*]] = llvm.intr.fmuladd(%[[b00]], %[[b01]], %[[b02]]) : // CHECK-SAME: (vector<4xf32>, vector<4xf32>, vector<4xf32>) -> vector<4xf32> // CHECK: llvm.insertvalue %[[B0]], {{.*}}[0] : !llvm.array<2 x vector<4xf32>> // CHECK: %[[b10:.*]] = llvm.extractvalue %[[BL]][1] : !llvm.array<2 x vector<4xf32>> // CHECK: %[[b11:.*]] = llvm.extractvalue %[[BL]][1] : !llvm.array<2 x vector<4xf32>> // CHECK: %[[b12:.*]] = llvm.extractvalue %[[BL]][1] : !llvm.array<2 x vector<4xf32>> - // CHECK: %[[B1:.*]] = "llvm.intr.fmuladd"(%[[b10]], %[[b11]], %[[b12]]) : + // CHECK: %[[B1:.*]] = llvm.intr.fmuladd(%[[b10]], %[[b11]], %[[b12]]) : // CHECK-SAME: (vector<4xf32>, vector<4xf32>, vector<4xf32>) -> vector<4xf32> // CHECK: llvm.insertvalue %[[B1]], {{.*}}[1] : !llvm.array<2 x vector<4xf32>> %1 = vector.fma %b, %b, %b : vector<2x4xf32> - // CHECK: %[[C0:.*]] = "llvm.intr.fmuladd" + // CHECK: %[[C0:.*]] = llvm.intr.fmuladd // CHECK-SAME: (vector<1xf32>, vector<1xf32>, vector<1xf32>) -> vector<1xf32> %2 = vector.fma %c, %c, %c : vector<1x1x1xf32> - // CHECK: %[[D0:.*]] = "llvm.intr.fmuladd" + // CHECK: %[[D0:.*]] = llvm.intr.fmuladd // CHECK-SAME: (vector<1xf32>, vector<1xf32>, vector<1xf32>) -> vector<1xf32> %3 = vector.fma %d, %d, %d : vector diff --git a/mlir/test/Conversion/VectorToSCF/tensor-transfer-ops.mlir b/mlir/test/Conversion/VectorToSCF/tensor-transfer-ops.mlir index 1a91e8b9fbc30..dac8e018f845f 100644 --- a/mlir/test/Conversion/VectorToSCF/tensor-transfer-ops.mlir +++ b/mlir/test/Conversion/VectorToSCF/tensor-transfer-ops.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{lower-tensors=true})" -split-input-file -allow-unregistered-dialect | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf{lower-tensors=true}))" -split-input-file -allow-unregistered-dialect | FileCheck %s // CHECK-LABEL: func @transfer_read_2d( // CHECK: %[[ALLOC:.*]] = memref.alloca() : memref> diff --git a/mlir/test/Conversion/VectorToSCF/unrolled-tensor-transfer-ops.mlir b/mlir/test/Conversion/VectorToSCF/unrolled-tensor-transfer-ops.mlir index 4da63a90c5c1a..f8da970b7fc29 100644 --- a/mlir/test/Conversion/VectorToSCF/unrolled-tensor-transfer-ops.mlir +++ b/mlir/test/Conversion/VectorToSCF/unrolled-tensor-transfer-ops.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{full-unroll=true lower-tensors=true})" -split-input-file -allow-unregistered-dialect | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf{full-unroll=true lower-tensors=true}))" -split-input-file -allow-unregistered-dialect | FileCheck %s // CHECK-LABEL: func @transfer_read_2d( // CHECK: %[[V_INIT:.*]] = arith.constant dense<-4.200000e+01> : vector<4x9xf32> diff --git a/mlir/test/Conversion/VectorToSCF/unrolled-vector-to-loops.mlir b/mlir/test/Conversion/VectorToSCF/unrolled-vector-to-loops.mlir index 7cf8cc1b05d41..3817f78f5cdd5 100644 --- a/mlir/test/Conversion/VectorToSCF/unrolled-vector-to-loops.mlir +++ b/mlir/test/Conversion/VectorToSCF/unrolled-vector-to-loops.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{full-unroll=true})" -split-input-file -allow-unregistered-dialect | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf{full-unroll=true}))" -split-input-file -allow-unregistered-dialect | FileCheck %s // CHECK-LABEL: func @transfer_read_inbounds func.func @transfer_read_inbounds(%A : memref) -> (vector<2x3x4xf32>) { diff --git a/mlir/test/Conversion/VectorToSCF/vector-to-scf-mask-and-permutation-map.mlir b/mlir/test/Conversion/VectorToSCF/vector-to-scf-mask-and-permutation-map.mlir index d4b5b34a96bad..8468b813e8f25 100644 --- a/mlir/test/Conversion/VectorToSCF/vector-to-scf-mask-and-permutation-map.mlir +++ b/mlir/test/Conversion/VectorToSCF/vector-to-scf-mask-and-permutation-map.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{lower-permutation-maps=true})" -split-input-file | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf{lower-permutation-maps=true}))" -split-input-file | FileCheck %s // Ensure that the permutation map is lowered (by inserting a transpose op) // before lowering the vector.transfer_read. diff --git a/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir b/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir index e7b72eb1364a8..0d5678117dfb4 100644 --- a/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir +++ b/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf)" -split-input-file -allow-unregistered-dialect | FileCheck %s -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{full-unroll=true})" -split-input-file -allow-unregistered-dialect | FileCheck %s --check-prefix=FULL-UNROLL +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf))" -split-input-file -allow-unregistered-dialect | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf{full-unroll=true}))" -split-input-file -allow-unregistered-dialect | FileCheck %s --check-prefix=FULL-UNROLL // CHECK-LABEL: func @vector_transfer_ops_0d( func.func @vector_transfer_ops_0d(%M: memref) { diff --git a/mlir/test/Dialect/Affine/canonicalize.mlir b/mlir/test/Dialect/Affine/canonicalize.mlir index 5f4b2e90705c2..a4118904d0fe3 100644 --- a/mlir/test/Dialect/Affine/canonicalize.mlir +++ b/mlir/test/Dialect/Affine/canonicalize.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -pass-pipeline='func.func(canonicalize)' | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -pass-pipeline='builtin.module(func.func(canonicalize))' | FileCheck %s // ----- diff --git a/mlir/test/Dialect/Affine/loop-tiling.mlir b/mlir/test/Dialect/Affine/loop-tiling.mlir index b84ffe10867a9..e6c33fd9292fb 100644 --- a/mlir/test/Dialect/Affine/loop-tiling.mlir +++ b/mlir/test/Dialect/Affine/loop-tiling.mlir @@ -133,8 +133,8 @@ func.func @tile_with_symbolic_loop_upper_bounds(%arg0: memref, %arg1: m // CHECK: memref.dim %{{.*}}, %c0 : memref // CHECK-NEXT: affine.for %{{.*}} = 0 to %{{.*}} step 32 { // CHECK-NEXT: affine.for %{{.*}} = 0 to %{{.*}} step 32 { -// CHECK-NEXT: affine.for %{{.*}} = #map(%{{.*}}) to min [[$UBMAP]](%{{.*}})[%{{.*}}] { -// CHECK-NEXT: affine.for %{{.*}} = #map(%{{.*}}) to min [[$UBMAP]](%{{.*}})[%{{.*}}] { +// CHECK-NEXT: affine.for %{{.*}} = #[[$MAP:.*]](%{{.*}}) to min [[$UBMAP]](%{{.*}})[%{{.*}}] { +// CHECK-NEXT: affine.for %{{.*}} = #[[$MAP]](%{{.*}}) to min [[$UBMAP]](%{{.*}})[%{{.*}}] { // CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}] : memref // CHECK-NEXT: affine.for %{{.*}} = 0 to %{{.*}} { // CHECK-NEXT: affine.load diff --git a/mlir/test/Dialect/Affine/loop-unswitch.mlir b/mlir/test/Dialect/Affine/loop-unswitch.mlir index 19c1eed705c53..5a58941937bf5 100644 --- a/mlir/test/Dialect/Affine/loop-unswitch.mlir +++ b/mlir/test/Dialect/Affine/loop-unswitch.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -split-input-file -pass-pipeline="func.func(test-affine-loop-unswitch)" | FileCheck %s +// RUN: mlir-opt %s -split-input-file -pass-pipeline="builtin.module(func.func(test-affine-loop-unswitch))" | FileCheck %s // CHECK-DAG: #[[$SET:.*]] = affine_set<(d0) : (d0 - 2 >= 0)> diff --git a/mlir/test/Dialect/Affine/memref-stride-calculation.mlir b/mlir/test/Dialect/Affine/memref-stride-calculation.mlir index 50cb32076e61e..cce1946b391e7 100644 --- a/mlir/test/Dialect/Affine/memref-stride-calculation.mlir +++ b/mlir/test/Dialect/Affine/memref-stride-calculation.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(test-memref-stride-calculation)" -o /dev/null | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(test-memref-stride-calculation))" -o /dev/null | FileCheck %s func.func @f(%0: index) { // CHECK-LABEL: Testing: f diff --git a/mlir/test/Dialect/Affine/simplify-structures.mlir b/mlir/test/Dialect/Affine/simplify-structures.mlir index 903d11ea865fe..2c693ea1551c0 100644 --- a/mlir/test/Dialect/Affine/simplify-structures.mlir +++ b/mlir/test/Dialect/Affine/simplify-structures.mlir @@ -557,3 +557,13 @@ func.func @semiaffine_modulo(%arg0: index) -> index { // CHECK: affine.apply #[[$MAP]]()[%{{.*}}] return %a : index } + +// ----- + +// CHECK-DAG: #[[$MAP:.*]] = affine_map<()[s0, s1, s2] -> (s2 mod 2 + (s1 floordiv 2) * 2 + ((s2 floordiv 2) * s0) * 2)> +// CHECK-LABEL: func @semiaffine_modulo_dim +func.func @semiaffine_modulo_dim(%arg0: index, %arg1: index, %arg2: index) -> index { + %a = affine.apply affine_map<(d0)[s0, s1] -> (((d0 floordiv 2) * s0 + s1 floordiv 2) * 2 + d0 mod 2)> (%arg0)[%arg1, %arg2] + //CHECK: affine.apply #[[$MAP]]()[%{{.*}}, %{{.*}}, %{{.*}}] + return %a : index +} diff --git a/mlir/test/Dialect/Arith/canonicalize.mlir b/mlir/test/Dialect/Arith/canonicalize.mlir index 337eec00f3bf9..336324ef4eec9 100644 --- a/mlir/test/Dialect/Arith/canonicalize.mlir +++ b/mlir/test/Dialect/Arith/canonicalize.mlir @@ -162,7 +162,7 @@ func.func @cmpi_const_right(%arg0: i64) // ----- -// CHECK-LABEL: @cmpOfExtSI +// CHECK-LABEL: @cmpOfExtSI( // CHECK-NEXT: return %arg0 func.func @cmpOfExtSI(%arg0: i1) -> i1 { %ext = arith.extsi %arg0 : i1 to i64 @@ -171,7 +171,7 @@ func.func @cmpOfExtSI(%arg0: i1) -> i1 { return %res : i1 } -// CHECK-LABEL: @cmpOfExtUI +// CHECK-LABEL: @cmpOfExtUI( // CHECK-NEXT: return %arg0 func.func @cmpOfExtUI(%arg0: i1) -> i1 { %ext = arith.extui %arg0 : i1 to i64 @@ -182,6 +182,26 @@ func.func @cmpOfExtUI(%arg0: i1) -> i1 { // ----- +// CHECK-LABEL: @cmpOfExtSIVector( +// CHECK-NEXT: return %arg0 +func.func @cmpOfExtSIVector(%arg0: vector<4xi1>) -> vector<4xi1> { + %ext = arith.extsi %arg0 : vector<4xi1> to vector<4xi64> + %c0 = arith.constant dense<0> : vector<4xi64> + %res = arith.cmpi ne, %ext, %c0 : vector<4xi64> + return %res : vector<4xi1> +} + +// CHECK-LABEL: @cmpOfExtUIVector( +// CHECK-NEXT: return %arg0 +func.func @cmpOfExtUIVector(%arg0: vector<4xi1>) -> vector<4xi1> { + %ext = arith.extui %arg0 : vector<4xi1> to vector<4xi64> + %c0 = arith.constant dense<0> : vector<4xi64> + %res = arith.cmpi ne, %ext, %c0 : vector<4xi64> + return %res : vector<4xi1> +} + +// ----- + // CHECK-LABEL: @extSIOfExtUI // CHECK: %[[res:.+]] = arith.extui %arg0 : i1 to i64 // CHECK: return %[[res]] @@ -1660,3 +1680,5 @@ func.func @xorxor3(%a : i32, %b : i32) -> i32 { %res = arith.xori %b, %c : i32 return %res : i32 } + +// ----- diff --git a/mlir/test/Dialect/Async/async-to-async-runtime-eliminate-blocking.mlir b/mlir/test/Dialect/Async/async-to-async-runtime-eliminate-blocking.mlir deleted file mode 100644 index 5a85c3d8974e6..0000000000000 --- a/mlir/test/Dialect/Async/async-to-async-runtime-eliminate-blocking.mlir +++ /dev/null @@ -1,324 +0,0 @@ -// RUN: mlir-opt %s -split-input-file \ -// RUN: -async-to-async-runtime="eliminate-blocking-await-ops=true" \ -// RUN: | FileCheck %s --dump-input=always - -// CHECK-LABEL: func @simple_callee -// CHECK-SAME: (%[[ARG:.*]]: f32) -// CHECK-SAME: -> (!async.token, !async.value {builtin.foo = "bar"}) -func.func @simple_callee(%arg0: f32) -> (f32 {builtin.foo = "bar"}) { -// CHECK: %[[TOKEN:.*]] = async.runtime.create : !async.token -// CHECK: %[[RETURNED_STORAGE:.*]] = async.runtime.create : !async.value -// CHECK: %[[ID:.*]] = async.coro.id -// CHECK: %[[HDL:.*]] = async.coro.begin %[[ID]] -// CHECK: cf.br ^[[ORIGINAL_ENTRY:.*]] -// CHECK ^[[ORIGINAL_ENTRY]]: -// CHECK: %[[VAL:.*]] = arith.addf %[[ARG]], %[[ARG]] : f32 - %0 = arith.addf %arg0, %arg0 : f32 -// CHECK: %[[VAL_STORAGE:.*]] = async.runtime.create : !async.value - %1 = async.runtime.create: !async.value -// CHECK: async.runtime.store %[[VAL]], %[[VAL_STORAGE]] : - async.runtime.store %0, %1: !async.value -// CHECK: async.runtime.set_available %[[VAL_STORAGE]] : !async.value - async.runtime.set_available %1: !async.value - -// CHECK: %[[SAVED:.*]] = async.coro.save %[[HDL]] -// CHECK: async.runtime.await_and_resume %[[VAL_STORAGE]], %[[HDL]] -// CHECK: async.coro.suspend %[[SAVED]] -// CHECK-SAME: ^[[SUSPEND:.*]], ^[[RESUME:.*]], ^[[CLEANUP:.*]] - %2 = async.await %1 : !async.value - -// CHECK: ^[[RESUME]]: -// CHECK: %[[IS_ERROR:.*]] = async.runtime.is_error %[[VAL_STORAGE]] : !async.value -// CHECK: cf.cond_br %[[IS_ERROR]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_OK:.*]] - -// CHECK: ^[[BRANCH_OK]]: -// CHECK: %[[LOADED:.*]] = async.runtime.load %[[VAL_STORAGE]] : -// CHECK: %[[RETURNED:.*]] = arith.mulf %[[ARG]], %[[LOADED]] : f32 -// CHECK: async.runtime.store %[[RETURNED]], %[[RETURNED_STORAGE]] : -// CHECK: async.runtime.set_available %[[RETURNED_STORAGE]] -// CHECK: async.runtime.set_available %[[TOKEN]] -// CHECK: cf.br ^[[CLEANUP]] - %3 = arith.mulf %arg0, %2 : f32 - return %3: f32 - -// CHECK: ^[[BRANCH_ERROR]]: -// CHECK: async.runtime.set_error %[[TOKEN]] -// CHECK: async.runtime.set_error %[[RETURNED_STORAGE]] -// CHECK: cf.br ^[[CLEANUP]] - - -// CHECK: ^[[CLEANUP]]: -// CHECK: async.coro.free %[[ID]], %[[HDL]] -// CHECK: cf.br ^[[SUSPEND]] - -// CHECK: ^[[SUSPEND]]: -// CHECK: async.coro.end %[[HDL]] -// CHECK: return %[[TOKEN]], %[[RETURNED_STORAGE]] : !async.token, !async.value -} - -// CHECK-LABEL: func @simple_caller() -// CHECK-SAME: -> (!async.token, !async.value) -func.func @simple_caller() -> f32 { -// CHECK: %[[TOKEN:.*]] = async.runtime.create : !async.token -// CHECK: %[[RETURNED_STORAGE:.*]] = async.runtime.create : !async.value -// CHECK: %[[ID:.*]] = async.coro.id -// CHECK: %[[HDL:.*]] = async.coro.begin %[[ID]] -// CHECK: cf.br ^[[ORIGINAL_ENTRY:.*]] -// CHECK ^[[ORIGINAL_ENTRY]]: - -// CHECK: %[[CONSTANT:.*]] = arith.constant - %c = arith.constant 1.0 : f32 -// CHECK: %[[RETURNED_TO_CALLER:.*]]:2 = call @simple_callee(%[[CONSTANT]]) : (f32) -> (!async.token, !async.value) -// CHECK: %[[SAVED:.*]] = async.coro.save %[[HDL]] -// CHECK: async.runtime.await_and_resume %[[RETURNED_TO_CALLER]]#0, %[[HDL]] -// CHECK: async.coro.suspend %[[SAVED]] -// CHECK-SAME: ^[[SUSPEND:.*]], ^[[RESUME:.*]], ^[[CLEANUP:.*]] - %r = call @simple_callee(%c): (f32) -> f32 - -// CHECK: ^[[RESUME]]: -// CHECK: %[[IS_TOKEN_ERROR:.*]] = async.runtime.is_error %[[RETURNED_TO_CALLER]]#0 : !async.token -// CHECK: cf.cond_br %[[IS_TOKEN_ERROR]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_TOKEN_OK:.*]] - -// CHECK: ^[[BRANCH_TOKEN_OK]]: -// CHECK: %[[IS_VALUE_ERROR:.*]] = async.runtime.is_error %[[RETURNED_TO_CALLER]]#1 : !async.value -// CHECK: cf.cond_br %[[IS_VALUE_ERROR]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_VALUE_OK:.*]] - -// CHECK: ^[[BRANCH_VALUE_OK]]: -// CHECK: %[[LOADED:.*]] = async.runtime.load %[[RETURNED_TO_CALLER]]#1 : -// CHECK: async.runtime.store %[[LOADED]], %[[RETURNED_STORAGE]] : -// CHECK: async.runtime.set_available %[[RETURNED_STORAGE]] -// CHECK: async.runtime.set_available %[[TOKEN]] -// CHECK: cf.br ^[[CLEANUP]] - return %r: f32 -// CHECK: ^[[BRANCH_ERROR]]: -// CHECK: async.runtime.set_error %[[TOKEN]] -// CHECK: async.runtime.set_error %[[RETURNED_STORAGE]] -// CHECK: cf.br ^[[CLEANUP]] - - -// CHECK: ^[[CLEANUP]]: -// CHECK: async.coro.free %[[ID]], %[[HDL]] -// CHECK: cf.br ^[[SUSPEND]] - -// CHECK: ^[[SUSPEND]]: -// CHECK: async.coro.end %[[HDL]] -// CHECK: return %[[TOKEN]], %[[RETURNED_STORAGE]] : !async.token, !async.value -} - -// CHECK-LABEL: func @double_caller() -// CHECK-SAME: -> (!async.token, !async.value) -func.func @double_caller() -> f32 { -// CHECK: %[[TOKEN:.*]] = async.runtime.create : !async.token -// CHECK: %[[RETURNED_STORAGE:.*]] = async.runtime.create : !async.value -// CHECK: %[[ID:.*]] = async.coro.id -// CHECK: %[[HDL:.*]] = async.coro.begin %[[ID]] -// CHECK: cf.br ^[[ORIGINAL_ENTRY:.*]] -// CHECK ^[[ORIGINAL_ENTRY]]: - -// CHECK: %[[CONSTANT:.*]] = arith.constant - %c = arith.constant 1.0 : f32 -// CHECK: %[[RETURNED_TO_CALLER_1:.*]]:2 = call @simple_callee(%[[CONSTANT]]) : (f32) -> (!async.token, !async.value) -// CHECK: %[[SAVED_1:.*]] = async.coro.save %[[HDL]] -// CHECK: async.runtime.await_and_resume %[[RETURNED_TO_CALLER_1]]#0, %[[HDL]] -// CHECK: async.coro.suspend %[[SAVED_1]] -// CHECK-SAME: ^[[SUSPEND:.*]], ^[[RESUME_1:.*]], ^[[CLEANUP:.*]] - %r = call @simple_callee(%c): (f32) -> f32 - -// CHECK: ^[[RESUME_1]]: -// CHECK: %[[IS_TOKEN_ERROR_1:.*]] = async.runtime.is_error %[[RETURNED_TO_CALLER_1]]#0 : !async.token -// CHECK: cf.cond_br %[[IS_TOKEN_ERROR_1]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_TOKEN_OK_1:.*]] - -// CHECK: ^[[BRANCH_TOKEN_OK_1]]: -// CHECK: %[[IS_VALUE_ERROR_1:.*]] = async.runtime.is_error %[[RETURNED_TO_CALLER_1]]#1 : !async.value -// CHECK: cf.cond_br %[[IS_VALUE_ERROR_1]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_VALUE_OK_1:.*]] - -// CHECK: ^[[BRANCH_VALUE_OK_1]]: -// CHECK: %[[LOADED_1:.*]] = async.runtime.load %[[RETURNED_TO_CALLER_1]]#1 : -// CHECK: %[[RETURNED_TO_CALLER_2:.*]]:2 = call @simple_callee(%[[LOADED_1]]) : (f32) -> (!async.token, !async.value) -// CHECK: %[[SAVED_2:.*]] = async.coro.save %[[HDL]] -// CHECK: async.runtime.await_and_resume %[[RETURNED_TO_CALLER_2]]#0, %[[HDL]] -// CHECK: async.coro.suspend %[[SAVED_2]] -// CHECK-SAME: ^[[SUSPEND:.*]], ^[[RESUME_2:.*]], ^[[CLEANUP:.*]] - %s = call @simple_callee(%r): (f32) -> f32 - -// CHECK: ^[[RESUME_2]]: -// CHECK: %[[IS_TOKEN_ERROR_2:.*]] = async.runtime.is_error %[[RETURNED_TO_CALLER_2]]#0 : !async.token -// CHECK: cf.cond_br %[[IS_TOKEN_ERROR_2]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_TOKEN_OK_2:.*]] - -// CHECK: ^[[BRANCH_TOKEN_OK_2]]: -// CHECK: %[[IS_VALUE_ERROR_2:.*]] = async.runtime.is_error %[[RETURNED_TO_CALLER_2]]#1 : !async.value -// CHECK: cf.cond_br %[[IS_VALUE_ERROR_2]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_VALUE_OK_2:.*]] - -// CHECK: ^[[BRANCH_VALUE_OK_2]]: -// CHECK: %[[LOADED_2:.*]] = async.runtime.load %[[RETURNED_TO_CALLER_2]]#1 : -// CHECK: async.runtime.store %[[LOADED_2]], %[[RETURNED_STORAGE]] : -// CHECK: async.runtime.set_available %[[RETURNED_STORAGE]] -// CHECK: async.runtime.set_available %[[TOKEN]] -// CHECK: cf.br ^[[CLEANUP]] - return %s: f32 -// CHECK: ^[[BRANCH_ERROR]]: -// CHECK: async.runtime.set_error %[[TOKEN]] -// CHECK: async.runtime.set_error %[[RETURNED_STORAGE]] -// CHECK: cf.br ^[[CLEANUP]] - -// CHECK: ^[[CLEANUP]]: -// CHECK: async.coro.free %[[ID]], %[[HDL]] -// CHECK: cf.br ^[[SUSPEND]] - -// CHECK: ^[[SUSPEND]]: -// CHECK: async.coro.end %[[HDL]] -// CHECK: return %[[TOKEN]], %[[RETURNED_STORAGE]] : !async.token, !async.value -} - -// CHECK-LABEL: func @recursive -// CHECK-SAME: (%[[ARG:.*]]: !async.token) -> !async.token -func.func @recursive(%arg: !async.token) { -// CHECK: %[[TOKEN:.*]] = async.runtime.create : !async.token -// CHECK: %[[ID:.*]] = async.coro.id -// CHECK: %[[HDL:.*]] = async.coro.begin %[[ID]] -// CHECK: %[[SAVED_1:.*]] = async.coro.save %[[HDL]] -// CHECK: async.runtime.await_and_resume %[[ARG]], %[[HDL]] -// CHECK: async.coro.suspend %[[SAVED_1]] -// CHECK-SAME: ^[[SUSPEND:.*]], ^[[RESUME_1:.*]], ^[[CLEANUP:.*]] - - async.await %arg : !async.token -// CHECK: ^[[RESUME_1]]: -// CHECK: %[[IS_ERROR:.*]] = async.runtime.is_error %[[ARG]] : !async.token -// CHECK: cf.cond_br %[[IS_ERROR]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_OK:.*]] - -// CHECK: ^[[BRANCH_OK]]: -// CHECK: %[[GIVEN:.*]] = async.runtime.create : !async.token -%r = async.runtime.create : !async.token -// CHECK: async.runtime.set_available %[[GIVEN]] -async.runtime.set_available %r: !async.token -// CHECK: %[[RETURNED_TO_CALLER:.*]] = call @recursive(%[[GIVEN]]) : (!async.token) -> !async.token -call @recursive(%r): (!async.token) -> () -// CHECK: %[[SAVED_2:.*]] = async.coro.save %[[HDL]] -// CHECK: async.runtime.await_and_resume %[[RETURNED_TO_CALLER]], %[[HDL]] -// CHECK: async.coro.suspend %[[SAVED_2]] -// CHECK-SAME: ^[[SUSPEND:.*]], ^[[RESUME_2:.*]], ^[[CLEANUP:.*]] - -// CHECK: ^[[RESUME_2]]: -// CHECK: async.runtime.set_available %[[TOKEN]] -// CHECK: cf.br ^[[CLEANUP]] - -// CHECK: ^[[BRANCH_ERROR]]: -// CHECK: async.runtime.set_error %[[TOKEN]] -// CHECK: cf.br ^[[CLEANUP]] -return - -// CHECK: ^[[CLEANUP]]: -// CHECK: async.coro.free %[[ID]], %[[HDL]] -// CHECK: cf.br ^[[SUSPEND]] - -// CHECK: ^[[SUSPEND]]: -// CHECK: async.coro.end %[[HDL]] -// CHECK: return %[[TOKEN]] : !async.token -} - -// CHECK-LABEL: func @corecursive1 -// CHECK-SAME: (%[[ARG:.*]]: !async.token) -> !async.token -func.func @corecursive1(%arg: !async.token) { -// CHECK: %[[TOKEN:.*]] = async.runtime.create : !async.token -// CHECK: %[[ID:.*]] = async.coro.id -// CHECK: %[[HDL:.*]] = async.coro.begin %[[ID]] -// CHECK: %[[SAVED_1:.*]] = async.coro.save %[[HDL]] -// CHECK: async.runtime.await_and_resume %[[ARG]], %[[HDL]] -// CHECK: async.coro.suspend %[[SAVED_1]] -// CHECK-SAME: ^[[SUSPEND:.*]], ^[[RESUME_1:.*]], ^[[CLEANUP:.*]] - - async.await %arg : !async.token -// CHECK: ^[[RESUME_1]]: -// CHECK: %[[IS_ERROR:.*]] = async.runtime.is_error %[[ARG]] : !async.token -// CHECK: cf.cond_br %[[IS_ERROR]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_OK:.*]] - -// CHECK: ^[[BRANCH_OK]]: -// CHECK: %[[GIVEN:.*]] = async.runtime.create : !async.token -%r = async.runtime.create : !async.token -// CHECK: async.runtime.set_available %[[GIVEN]] -async.runtime.set_available %r: !async.token -// CHECK: %[[RETURNED_TO_CALLER:.*]] = call @corecursive2(%[[GIVEN]]) : (!async.token) -> !async.token -call @corecursive2(%r): (!async.token) -> () -// CHECK: %[[SAVED_2:.*]] = async.coro.save %[[HDL]] -// CHECK: async.runtime.await_and_resume %[[RETURNED_TO_CALLER]], %[[HDL]] -// CHECK: async.coro.suspend %[[SAVED_2]] -// CHECK-SAME: ^[[SUSPEND:.*]], ^[[RESUME_2:.*]], ^[[CLEANUP:.*]] - -// CHECK: ^[[RESUME_2]]: -// CHECK: async.runtime.set_available %[[TOKEN]] -// CHECK: cf.br ^[[CLEANUP]] - -// CHECK: ^[[BRANCH_ERROR]]: -// CHECK: async.runtime.set_error %[[TOKEN]] -// CHECK: cf.br ^[[CLEANUP]] -return - -// CHECK: ^[[CLEANUP]]: -// CHECK: async.coro.free %[[ID]], %[[HDL]] -// CHECK: cf.br ^[[SUSPEND]] - -// CHECK: ^[[SUSPEND]]: -// CHECK: async.coro.end %[[HDL]] -// CHECK: return %[[TOKEN]] : !async.token -} - -// CHECK-LABEL: func @corecursive2 -// CHECK-SAME: (%[[ARG:.*]]: !async.token) -> !async.token -func.func @corecursive2(%arg: !async.token) { -// CHECK: %[[TOKEN:.*]] = async.runtime.create : !async.token -// CHECK: %[[ID:.*]] = async.coro.id -// CHECK: %[[HDL:.*]] = async.coro.begin %[[ID]] -// CHECK: %[[SAVED_1:.*]] = async.coro.save %[[HDL]] -// CHECK: async.runtime.await_and_resume %[[ARG]], %[[HDL]] -// CHECK: async.coro.suspend %[[SAVED_1]] -// CHECK-SAME: ^[[SUSPEND:.*]], ^[[RESUME_1:.*]], ^[[CLEANUP:.*]] - - async.await %arg : !async.token -// CHECK: ^[[RESUME_1]]: -// CHECK: %[[IS_ERROR:.*]] = async.runtime.is_error %[[ARG]] : !async.token -// CHECK: cf.cond_br %[[IS_ERROR]], ^[[BRANCH_ERROR:.*]], ^[[BRANCH_OK:.*]] - -// CHECK: ^[[BRANCH_OK]]: -// CHECK: %[[GIVEN:.*]] = async.runtime.create : !async.token -%r = async.runtime.create : !async.token -// CHECK: async.runtime.set_available %[[GIVEN]] -async.runtime.set_available %r: !async.token -// CHECK: %[[RETURNED_TO_CALLER:.*]] = call @corecursive1(%[[GIVEN]]) : (!async.token) -> !async.token -call @corecursive1(%r): (!async.token) -> () -// CHECK: %[[SAVED_2:.*]] = async.coro.save %[[HDL]] -// CHECK: async.runtime.await_and_resume %[[RETURNED_TO_CALLER]], %[[HDL]] -// CHECK: async.coro.suspend %[[SAVED_2]] -// CHECK-SAME: ^[[SUSPEND:.*]], ^[[RESUME_2:.*]], ^[[CLEANUP:.*]] - -// CHECK: ^[[RESUME_2]]: -// CHECK: async.runtime.set_available %[[TOKEN]] -// CHECK: cf.br ^[[CLEANUP]] - -// CHECK: ^[[BRANCH_ERROR]]: -// CHECK: async.runtime.set_error %[[TOKEN]] -// CHECK: cf.br ^[[CLEANUP]] -return - -// CHECK: ^[[CLEANUP]]: -// CHECK: async.coro.free %[[ID]], %[[HDL]] -// CHECK: cf.br ^[[SUSPEND]] - -// CHECK: ^[[SUSPEND]]: -// CHECK: async.coro.end %[[HDL]] -// CHECK: return %[[TOKEN]] : !async.token -} - -// CHECK-LABEL: func @caller_allowed_to_block -// CHECK-SAME: () -> f32 -func.func @caller_allowed_to_block() -> f32 attributes { async.allowed_to_block } { -// CHECK: %[[CONSTANT:.*]] = arith.constant - %c = arith.constant 1.0 : f32 -// CHECK: %[[RETURNED_TO_CALLER:.*]]:2 = call @simple_callee(%[[CONSTANT]]) : (f32) -> (!async.token, !async.value) -// CHECK: async.runtime.await %[[RETURNED_TO_CALLER]]#0 -// CHECK: async.runtime.await %[[RETURNED_TO_CALLER]]#1 -// CHECK: %[[RETURNED:.*]] = async.runtime.load %[[RETURNED_TO_CALLER]]#1 - %r = call @simple_callee(%c): (f32) -> f32 - -// CHECK: return %[[RETURNED]] : f32 - return %r: f32 -} diff --git a/mlir/test/Dialect/Async/async-to-async-runtime.mlir b/mlir/test/Dialect/Async/async-to-async-runtime.mlir index d7ebfb9e77926..1551e55c90c08 100644 --- a/mlir/test/Dialect/Async/async-to-async-runtime.mlir +++ b/mlir/test/Dialect/Async/async-to-async-runtime.mlir @@ -433,3 +433,25 @@ func.func @clone_constants(%arg0: f32, %arg1: memref<1xf32>) { // CHECK-SAME: ) -> !async.token // CHECK: %[[CST:.*]] = arith.constant 0 : index // CHECK: memref.store %[[VALUE]], %[[MEMREF]][%[[CST]]] + +// ----- +// Async Functions should be none blocking + +// CHECK-LABEL: @async_func_await +async.func @async_func_await(%arg0: f32, %arg1: !async.value) + -> !async.token { + %0 = async.await %arg1 : !async.value + return +} +// Create token for return op, and mark a function as a coroutine. +// CHECK: %[[TOKEN:.*]] = async.runtime.create : !async.token +// CHECK: %[[ID:.*]] = async.coro.id +// CHECK: %[[HDL:.*]] = async.coro.begin +// CHECK: cf.br ^[[ORIGIN_ENTRY:.*]] + +// CHECK: ^[[ORIGIN_ENTRY]]: +// CHECK: %[[SAVED:.*]] = async.coro.save %[[HDL]] +// CHECK: async.runtime.await_and_resume %[[arg1:.*]], %[[HDL]] : +// CHECK-SAME: !async.value +// CHECK: async.coro.suspend %[[SAVED]] +// CHECK-SAME: ^[[SUSPEND:.*]], ^[[RESUME:.*]], ^[[CLEANUP:.*]] diff --git a/mlir/test/Dialect/Async/ops.mlir b/mlir/test/Dialect/Async/ops.mlir index c391fffd87ebc..36f40d5d074ff 100644 --- a/mlir/test/Dialect/Async/ops.mlir +++ b/mlir/test/Dialect/Async/ops.mlir @@ -136,3 +136,34 @@ func.func @create_group_and_await_all(%arg0: !async.token, %3 = arith.addi %1, %2 : index return %3 : index } + +// CHECK-LABEL: @async_func_return_token +async.func @async_func_return_token() -> !async.token { + // CHECK: return + return +} + +// CHECK-LABEL: @async_func_return_value +async.func @async_func_return_value() -> !async.value { + %0 = arith.constant 42 : i32 + // CHECK: return %[[value:.*]] : i32 + return %0 : i32 +} + +// CHECK-LABEL: @async_func_return_optional_token +async.func @async_func_return_optional_token() -> (!async.token, !async.value) { + %0 = arith.constant 42 : i32 + // CHECK: return %[[value:.*]] : i32 + return %0 : i32 +} + +// CHECK-LABEL: @async_call +func.func @async_call() { + // CHECK: async.call @async_func_return_token + // CHECK: async.call @async_func_return_value + // CHECK: async.call @async_func_return_optional_token + %0 = async.call @async_func_return_token() : () -> !async.token + %1 = async.call @async_func_return_value() : () -> !async.value + %2, %3 = async.call @async_func_return_optional_token() : () -> (!async.token, !async.value) + return +} diff --git a/mlir/test/Dialect/Async/verify.mlir b/mlir/test/Dialect/Async/verify.mlir index 7ec3528abb655..69387a77d9695 100644 --- a/mlir/test/Dialect/Async/verify.mlir +++ b/mlir/test/Dialect/Async/verify.mlir @@ -19,3 +19,29 @@ func.func @wrong_async_await_result_type(%arg0: !async.value) { // expected-error @+1 {{'async.await' op result type 'f64' does not match async value type 'f32'}} %0 = "async.await"(%arg0): (!async.value) -> f64 } + + +// ----- +// expected-error @+1 {{'async.func' op result is expected to be at least of size 1, but got 0}} +async.func @wrong_async_func_void_result_type(%arg0: f32) { + return +} + + +// ----- +// expected-error @+1 {{'async.func' op result type must be async value type or async token type, but got 'f32'}} +async.func @wrong_async_func_result_type(%arg0: f32) -> f32 { + return %arg0 : f32 +} + +// ----- +// expected-error @+1 {{'async.func' op results' (optional) async token type is expected to appear as the 1st return value, but got 2}} +async.func @wrong_async_func_token_type_placement(%arg0: f32) -> (!async.value, !async.token) { + return %arg0 : f32 +} + +// ----- +async.func @wrong_async_func_return_type(%arg0: f32) -> (!async.token, !async.value) { + // expected-error @+1 {{'async.return' op operand types do not match the types returned from the parent FuncOp}} + return %arg0 : f32 +} diff --git a/mlir/test/Dialect/ControlFlow/canonicalize.mlir b/mlir/test/Dialect/ControlFlow/canonicalize.mlir index 9ad790af8f499..8cef84549c8f0 100644 --- a/mlir/test/Dialect/ControlFlow/canonicalize.mlir +++ b/mlir/test/Dialect/ControlFlow/canonicalize.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -allow-unregistered-dialect -pass-pipeline='func.func(canonicalize)' -split-input-file | FileCheck --dump-input-context 20 %s +// RUN: mlir-opt %s -allow-unregistered-dialect -pass-pipeline='builtin.module(func.func(canonicalize))' -split-input-file | FileCheck --dump-input-context 20 %s /// Test the folding of BranchOp. diff --git a/mlir/test/Dialect/GPU/promotion.mlir b/mlir/test/Dialect/GPU/promotion.mlir index bf22add0a685b..db33f5cf4b5b0 100644 --- a/mlir/test/Dialect/GPU/promotion.mlir +++ b/mlir/test/Dialect/GPU/promotion.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect -test-gpu-memory-promotion -pass-pipeline='gpu.module(gpu.func(test-gpu-memory-promotion))' -split-input-file %s | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect -pass-pipeline='builtin.module(gpu.module(gpu.func(test-gpu-memory-promotion)))' -split-input-file %s | FileCheck %s gpu.module @foo { diff --git a/mlir/test/Dialect/Index/index-canonicalize.mlir b/mlir/test/Dialect/Index/index-canonicalize.mlir index f9b33f88a1a26..288593f64c3f7 100644 --- a/mlir/test/Dialect/Index/index-canonicalize.mlir +++ b/mlir/test/Dialect/Index/index-canonicalize.mlir @@ -279,6 +279,111 @@ func.func @maxu() -> index { return %0 : index } +// CHECK-LABEL: @shl +func.func @shl() -> index { + %lhs = index.constant 128 + %rhs = index.constant 2 + // CHECK: %[[A:.*]] = index.constant 512 + %0 = index.shl %lhs, %rhs + // CHECK: return %[[A]] + return %0 : index +} + +// CHECK-LABEL: @shl_32 +func.func @shl_32() -> index { + %lhs = index.constant 1 + %rhs = index.constant 32 + // CHECK: index.shl + %0 = index.shl %lhs, %rhs + return %0 : index +} + +// CHECK-LABEL: @shl_edge +func.func @shl_edge() -> index { + %lhs = index.constant 4000000000 + %rhs = index.constant 31 + // CHECK: %[[A:.*]] = index.constant 858{{[0-9]+}} + %0 = index.shl %lhs, %rhs + // CHECK: return %[[A]] + return %0 : index +} + +// CHECK-LABEL: @shrs +func.func @shrs() -> index { + %lhs = index.constant 128 + %rhs = index.constant 2 + // CHECK: %[[A:.*]] = index.constant 32 + %0 = index.shrs %lhs, %rhs + // CHECK: return %[[A]] + return %0 : index +} + +// CHECK-LABEL: @shrs_32 +func.func @shrs_32() -> index { + %lhs = index.constant 4000000000000 + %rhs = index.constant 32 + // CHECK: index.shrs + %0 = index.shrs %lhs, %rhs + return %0 : index +} + +// CHECK-LABEL: @shrs_nofold +func.func @shrs_nofold() -> index { + %lhs = index.constant 0x100000000 + %rhs = index.constant 1 + // CHECK: index.shrs + %0 = index.shrs %lhs, %rhs + return %0 : index +} + +// CHECK-LABEL: @shrs_edge +func.func @shrs_edge() -> index { + %lhs = index.constant 0x10000000000 + %rhs = index.constant 3 + // CHECK: %[[A:.*]] = index.constant 137{{[0-9]+}} + %0 = index.shrs %lhs, %rhs + // CHECK: return %[[A]] + return %0 : index +} + +// CHECK-LABEL: @shru +func.func @shru() -> index { + %lhs = index.constant 128 + %rhs = index.constant 2 + // CHECK: %[[A:.*]] = index.constant 32 + %0 = index.shru %lhs, %rhs + // CHECK: return %[[A]] + return %0 : index +} + +// CHECK-LABEL: @shru_32 +func.func @shru_32() -> index { + %lhs = index.constant 4000000000000 + %rhs = index.constant 32 + // CHECK: index.shru + %0 = index.shru %lhs, %rhs + return %0 : index +} + +// CHECK-LABEL: @shru_nofold +func.func @shru_nofold() -> index { + %lhs = index.constant 0x100000000 + %rhs = index.constant 1 + // CHECK: index.shru + %0 = index.shru %lhs, %rhs + return %0 : index +} + +// CHECK-LABEL: @shru_edge +func.func @shru_edge() -> index { + %lhs = index.constant 0x10000000000 + %rhs = index.constant 3 + // CHECK: %[[A:.*]] = index.constant 137{{[0-9]+}} + %0 = index.shru %lhs, %rhs + // CHECK: return %[[A]] + return %0 : index +} + // CHECK-LABEL: @cmp func.func @cmp() -> (i1, i1, i1, i1) { %a = index.constant 0 diff --git a/mlir/test/Dialect/Index/index-ops.mlir b/mlir/test/Dialect/Index/index-ops.mlir index 2176efe337309..d1a409780cd51 100644 --- a/mlir/test/Dialect/Index/index-ops.mlir +++ b/mlir/test/Dialect/Index/index-ops.mlir @@ -27,6 +27,12 @@ func.func @binary_ops(%a: index, %b: index) { %10 = index.maxs %a, %b // CHECK-NEXT: index.maxu %[[A]], %[[B]] %11 = index.maxu %a, %b + // CHECK-NEXT: index.shl %[[A]], %[[B]] + %12 = index.shl %a, %b + // CHECK-NEXT: index.shrs %[[A]], %[[B]] + %13 = index.shrs %a, %b + // CHECK-NEXT: index.shru %[[A]], %[[B]] + %14 = index.shru %a, %b return } diff --git a/mlir/test/Dialect/LLVMIR/call-intrin.mlir b/mlir/test/Dialect/LLVMIR/call-intrin.mlir new file mode 100644 index 0000000000000..30f5c9fb82572 --- /dev/null +++ b/mlir/test/Dialect/LLVMIR/call-intrin.mlir @@ -0,0 +1,82 @@ +// RUN: mlir-translate -mlir-to-llvmir -split-input-file -verify-diagnostics %s | FileCheck %s + +// CHECK: ; ModuleID = 'LLVMDialectModule' +// CHECK: source_filename = "LLVMDialectModule" +// CHECK: declare ptr @malloc(i64) +// CHECK: declare void @free(ptr) +// CHECK: define <4 x float> @round_sse41() { +// CHECK: %1 = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> , <4 x float> , i32 1) +// CHECK: ret <4 x float> %1 +// CHECK: } +llvm.func @round_sse41() -> vector<4xf32> { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.mlir.constant(dense<0.2> : vector<4xf32>) : vector<4xf32> + %res = llvm.call_intrinsic "llvm.x86.sse41.round.ss"(%1, %1, %0) : (vector<4xf32>, vector<4xf32>, i32) -> vector<4xf32> {} + llvm.return %res: vector<4xf32> +} + +// ----- + +// CHECK: ; ModuleID = 'LLVMDialectModule' +// CHECK: source_filename = "LLVMDialectModule" + +// CHECK: declare ptr @malloc(i64) + +// CHECK: declare void @free(ptr) + +// CHECK: define float @round_overloaded() { +// CHECK: %1 = call float @llvm.round.f32(float 1.000000e+00) +// CHECK: ret float %1 +// CHECK: } +llvm.func @round_overloaded() -> f32 { + %0 = llvm.mlir.constant(1.0 : f32) : f32 + %res = llvm.call_intrinsic "llvm.round"(%0) : (f32) -> f32 {} + llvm.return %res: f32 +} + +// ----- + +// CHECK: ; ModuleID = 'LLVMDialectModule' +// CHECK: source_filename = "LLVMDialectModule" +// CHECK: declare ptr @malloc(i64) +// CHECK: declare void @free(ptr) +// CHECK: define void @lifetime_start() { +// CHECK: %1 = alloca float, i8 1, align 4 +// CHECK: call void @llvm.lifetime.start.p0(i64 4, ptr %1) +// CHECK: ret void +// CHECK: } +llvm.func @lifetime_start() { + %0 = llvm.mlir.constant(4 : i64) : i64 + %1 = llvm.mlir.constant(1 : i8) : i8 + %2 = llvm.alloca %1 x f32 : (i8) -> !llvm.ptr + llvm.call_intrinsic "llvm.lifetime.start"(%0, %2) : (i64, !llvm.ptr) -> () {} + llvm.return +} + +// ----- + +llvm.func @variadic() { + %0 = llvm.mlir.constant(1 : i8) : i8 + %1 = llvm.alloca %0 x f32 : (i8) -> !llvm.ptr + llvm.call_intrinsic "llvm.localescape"(%1, %1) : (!llvm.ptr, !llvm.ptr) -> () + llvm.return +} + +// ----- + +llvm.func @no_intrinsic() { + // expected-error@below {{'llvm.call_intrinsic' op couldn't find intrinsic: "llvm.does_not_exist"}} + // expected-error@below {{LLVM Translation failed for operation: llvm.call_intrinsic}} + llvm.call_intrinsic "llvm.does_not_exist"() : () -> () + llvm.return +} + +// ----- + +llvm.func @bad_types() { + %0 = llvm.mlir.constant(1 : i8) : i8 + // expected-error@below {{'llvm.call_intrinsic' op intrinsic type is not a match}} + // expected-error@below {{LLVM Translation failed for operation: llvm.call_intrinsic}} + llvm.call_intrinsic "llvm.round"(%0) : (i8) -> i8 {} + llvm.return +} diff --git a/mlir/test/Dialect/LLVMIR/func.mlir b/mlir/test/Dialect/LLVMIR/func.mlir index 17cc6bf564793..7746d5c04b811 100644 --- a/mlir/test/Dialect/LLVMIR/func.mlir +++ b/mlir/test/Dialect/LLVMIR/func.mlir @@ -104,6 +104,24 @@ module { llvm.return } + // CHECK: llvm.func @llvm_noalias_decl(!llvm.ptr {llvm.noalias}) + llvm.func @llvm_noalias_decl(!llvm.ptr {llvm.noalias}) + // CHECK: llvm.func @byrefattr_decl(!llvm.ptr {llvm.byref = i32}) + llvm.func @byrefattr_decl(!llvm.ptr {llvm.byref = i32}) + // CHECK: llvm.func @byvalattr_decl(!llvm.ptr {llvm.byval = i32}) + llvm.func @byvalattr_decl(!llvm.ptr {llvm.byval = i32}) + // CHECK: llvm.func @sretattr_decl(!llvm.ptr {llvm.sret = i32}) + llvm.func @sretattr_decl(!llvm.ptr {llvm.sret = i32}) + // CHECK: llvm.func @nestattr_decl(!llvm.ptr {llvm.nest}) + llvm.func @nestattr_decl(!llvm.ptr {llvm.nest}) + // CHECK: llvm.func @noundefattr_decl(i32 {llvm.noundef}) + llvm.func @noundefattr_decl(i32 {llvm.noundef}) + // CHECK: llvm.func @llvm_align_decl(!llvm.ptr {llvm.align = 4 : i64}) + llvm.func @llvm_align_decl(!llvm.ptr {llvm.align = 4}) + // CHECK: llvm.func @inallocaattr_decl(!llvm.ptr {llvm.inalloca = i32}) + llvm.func @inallocaattr_decl(!llvm.ptr {llvm.inalloca = i32}) + + // CHECK: llvm.func @variadic(...) llvm.func @variadic(...) diff --git a/mlir/test/Dialect/LLVMIR/optimize-for-nvvm.mlir b/mlir/test/Dialect/LLVMIR/optimize-for-nvvm.mlir index e1cfd0c44f89b..b98d2e08b7548 100644 --- a/mlir/test/Dialect/LLVMIR/optimize-for-nvvm.mlir +++ b/mlir/test/Dialect/LLVMIR/optimize-for-nvvm.mlir @@ -9,8 +9,8 @@ llvm.func @fdiv_fp16(%arg0 : f16, %arg1 : f16) -> f16 { // CHECK-DAG: %[[rcp:.*]] = nvvm.rcp.approx.ftz.f %[[rhs]] : f32 // CHECK-DAG: %[[approx:.*]] = llvm.fmul %[[lhs]], %[[rcp]] : f32 // CHECK-DAG: %[[neg:.*]] = llvm.fneg %[[rhs]] : f32 - // CHECK-DAG: %[[err:.*]] = "llvm.intr.fma"(%[[approx]], %[[neg]], %[[lhs]]) : (f32, f32, f32) -> f32 - // CHECK-DAG: %[[refined:.*]] = "llvm.intr.fma"(%[[err]], %[[rcp]], %[[approx]]) : (f32, f32, f32) -> f32 + // CHECK-DAG: %[[err:.*]] = llvm.intr.fma(%[[approx]], %[[neg]], %[[lhs]]) : (f32, f32, f32) -> f32 + // CHECK-DAG: %[[refined:.*]] = llvm.intr.fma(%[[err]], %[[rcp]], %[[approx]]) : (f32, f32, f32) -> f32 // CHECK-DAG: %[[cast:.*]] = llvm.bitcast %[[approx]] : f32 to i32 // CHECK-DAG: %[[exp:.*]] = llvm.and %[[cast]], %[[mask]] : i32 // CHECK-DAG: %[[is_zero:.*]] = llvm.icmp "eq" %[[exp]], %[[c0]] : i32 diff --git a/mlir/test/Dialect/LLVMIR/roundtrip.mlir b/mlir/test/Dialect/LLVMIR/roundtrip.mlir index 34ce89504c530..884a53dd40cd1 100644 --- a/mlir/test/Dialect/LLVMIR/roundtrip.mlir +++ b/mlir/test/Dialect/LLVMIR/roundtrip.mlir @@ -146,23 +146,23 @@ func.func @ops(%arg0: i32, %arg1: f32, // CHECK: %{{.*}} = llvm.fneg %[[FLOAT]] : f32 %29 = llvm.fneg %arg1 : f32 -// CHECK: "llvm.intr.sin"(%[[FLOAT]]) : (f32) -> f32 - %30 = "llvm.intr.sin"(%arg1) : (f32) -> f32 +// CHECK: llvm.intr.sin(%[[FLOAT]]) : (f32) -> f32 + %30 = llvm.intr.sin(%arg1) : (f32) -> f32 -// CHECK: "llvm.intr.pow"(%[[FLOAT]], %[[FLOAT]]) : (f32, f32) -> f32 - %31 = "llvm.intr.pow"(%arg1, %arg1) : (f32, f32) -> f32 +// CHECK: llvm.intr.pow(%[[FLOAT]], %[[FLOAT]]) : (f32, f32) -> f32 + %31 = llvm.intr.pow(%arg1, %arg1) : (f32, f32) -> f32 -// CHECK: "llvm.intr.powi"(%[[FLOAT]], %[[I32]]) : (f32, i32) -> f32 - %a31 = "llvm.intr.powi"(%arg1, %arg0) : (f32, i32) -> f32 +// CHECK: llvm.intr.powi(%[[FLOAT]], %[[I32]]) : (f32, i32) -> f32 + %a31 = llvm.intr.powi(%arg1, %arg0) : (f32, i32) -> f32 -// CHECK: "llvm.intr.bitreverse"(%{{.*}}) : (i32) -> i32 - %32 = "llvm.intr.bitreverse"(%arg0) : (i32) -> i32 +// CHECK: llvm.intr.bitreverse(%{{.*}}) : (i32) -> i32 + %32 = llvm.intr.bitreverse(%arg0) : (i32) -> i32 -// CHECK: "llvm.intr.ctpop"(%{{.*}}) : (i32) -> i32 - %33 = "llvm.intr.ctpop"(%arg0) : (i32) -> i32 +// CHECK: llvm.intr.ctpop(%{{.*}}) : (i32) -> i32 + %33 = llvm.intr.ctpop(%arg0) : (i32) -> i32 -// CHECK: "llvm.intr.round"(%[[FLOAT]]) : (f32) -> f32 - %34 = "llvm.intr.round"(%arg1) : (f32) -> f32 +// CHECK: llvm.intr.round(%[[FLOAT]]) : (f32) -> f32 + %34 = llvm.intr.round(%arg1) : (f32) -> f32 // CHECK: "llvm.intr.memcpy"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr, !llvm.ptr, i32, i1) -> () "llvm.intr.memcpy"(%arg2, %arg3, %arg0, %arg4) : (!llvm.ptr, !llvm.ptr, i32, i1) -> () @@ -483,6 +483,11 @@ func.func @fastmathFlags(%arg0: f32, %arg1: f32, %arg2: i32, %arg3: vector<2 x f // CHECK: {{.*}} = llvm.fneg %arg0 : f32 %10 = llvm.fneg %arg0 {fastmathFlags = #llvm.fastmath} : f32 + +// CHECK: {{.*}} = llvm.intr.sin(%arg0) {fastmathFlags = #llvm.fastmath} : (f32) -> f32 + %11 = llvm.intr.sin(%arg0) {fastmathFlags = #llvm.fastmath} : (f32) -> f32 +// CHECK: {{.*}} = llvm.intr.sin(%arg0) {fastmathFlags = #llvm.fastmath} : (f32) -> f32 + %12 = llvm.intr.sin(%arg0) {fastmathFlags = #llvm.fastmath} : (f32) -> f32 return } diff --git a/mlir/test/Dialect/LLVMIR/terminator.mlir b/mlir/test/Dialect/LLVMIR/terminator.mlir index 6c2a2bf00f09e..86b70735d3efa 100644 --- a/mlir/test/Dialect/LLVMIR/terminator.mlir +++ b/mlir/test/Dialect/LLVMIR/terminator.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -pass-pipeline='func.func(canonicalize)' %s | FileCheck %s +// RUN: mlir-opt -pass-pipeline='builtin.module(func.func(canonicalize))' %s | FileCheck %s // verify that terminators survive the canonicalizer // CHECK-LABEL: @return diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir index 8f8f6000966e2..3f1118334ef5c 100644 --- a/mlir/test/Dialect/Linalg/canonicalize.mlir +++ b/mlir/test/Dialect/Linalg/canonicalize.mlir @@ -846,3 +846,27 @@ func.func @identity_mixed(%arg0 : tensor, %arg1: memref) { // CHECK-SAME: iterator_types = ["parallel"] // CHECK-SAME: } ins(%[[ARG1]] : tensor) // CHECK-SAME: outs(%[[ARG2]] : memref) { + +// ----- + +// Just make sure that we don't crash. + +// CHECK-LABEL: func @dedeplicate_regression_test +func.func @dedeplicate_regression_test(%0: tensor<4xf32>, %1: memref<4xf32>) { + %36 = linalg.generic + {indexing_maps = [affine_map<(d0) -> (d0)>, + affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], + iterator_types = ["parallel"]} + ins(%1, %1 : memref<4xf32>, memref<4xf32>) + outs(%0 : tensor<4xf32>) { + ^bb0(%in: f32, %in_24: f32, %out: f32): + linalg.yield %in : f32 + } -> tensor<4xf32> + %53 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>], + iterator_types = ["parallel"]} + outs(%36 : tensor<4xf32>) { + ^bb0(%out: f32): + linalg.yield %out : f32 + } -> tensor<4xf32> + return +} diff --git a/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir b/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir index 41cc866060302..a6552e0a5264e 100644 --- a/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir +++ b/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -pass-pipeline="func.func(convert-elementwise-to-linalg)" -split-input-file %s | FileCheck %s +// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-elementwise-to-linalg))" -split-input-file %s | FileCheck %s // In-depth checking of the linalg.generic op for a very trivial case. // CHECK: #[[$MAP:.*]] = affine_map<() -> ()> diff --git a/mlir/test/Dialect/Linalg/detensorize_0d.mlir b/mlir/test/Dialect/Linalg/detensorize_0d.mlir index 5450580dbf1a4..6fa84301be600 100644 --- a/mlir/test/Dialect/Linalg/detensorize_0d.mlir +++ b/mlir/test/Dialect/Linalg/detensorize_0d.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -allow-unregistered-dialect -pass-pipeline="func.func(linalg-detensorize{aggressive-mode})" | FileCheck %s +// RUN: mlir-opt %s -allow-unregistered-dialect -pass-pipeline="builtin.module(func.func(linalg-detensorize{aggressive-mode}))" | FileCheck %s #map = affine_map<() -> ()> diff --git a/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir b/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir index c0cf7abb7d21b..87820d0a90a8a 100644 --- a/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir +++ b/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -split-input-file -allow-unregistered-dialect -pass-pipeline="func.func(linalg-detensorize)" | FileCheck %s +// RUN: mlir-opt %s -split-input-file -allow-unregistered-dialect -pass-pipeline="builtin.module(func.func(linalg-detensorize))" | FileCheck %s // TODO: Detensoring breaks if %arg0 or %arg1 are passed directly as tensors. Fix that. func.func @if_true_test(%arg0: i1, %arg1: i32) -> tensor attributes {} { diff --git a/mlir/test/Dialect/Linalg/detensorize_if.mlir b/mlir/test/Dialect/Linalg/detensorize_if.mlir index 1720d6f9ece14..d11c5a5d6ce5f 100644 --- a/mlir/test/Dialect/Linalg/detensorize_if.mlir +++ b/mlir/test/Dialect/Linalg/detensorize_if.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -split-input-file -allow-unregistered-dialect -pass-pipeline="func.func(linalg-detensorize)" | FileCheck %s +// RUN: mlir-opt %s -split-input-file -allow-unregistered-dialect -pass-pipeline="builtin.module(func.func(linalg-detensorize))" | FileCheck %s #map0 = affine_map<() -> ()> diff --git a/mlir/test/Dialect/Linalg/detensorize_trivial.mlir b/mlir/test/Dialect/Linalg/detensorize_trivial.mlir index fa65ae3ec9654..02fa7ace13b9d 100644 --- a/mlir/test/Dialect/Linalg/detensorize_trivial.mlir +++ b/mlir/test/Dialect/Linalg/detensorize_trivial.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(linalg-detensorize{aggressive-mode})" | FileCheck %s -check-prefix=DET-ALL -// RUN: mlir-opt %s -pass-pipeline="func.func(linalg-detensorize)" | FileCheck %s -check-prefix=DET-CF +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(linalg-detensorize{aggressive-mode}))" | FileCheck %s -check-prefix=DET-ALL +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(linalg-detensorize))" | FileCheck %s -check-prefix=DET-CF #map0 = affine_map<() -> ()> diff --git a/mlir/test/Dialect/Linalg/detensorize_while.mlir b/mlir/test/Dialect/Linalg/detensorize_while.mlir index 7b70053c9266b..e10c46c629aa0 100644 --- a/mlir/test/Dialect/Linalg/detensorize_while.mlir +++ b/mlir/test/Dialect/Linalg/detensorize_while.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(linalg-detensorize{aggressive-mode})" | FileCheck %s -check-prefix=DET-ALL -// RUN: mlir-opt %s -pass-pipeline="func.func(linalg-detensorize)" | FileCheck %s -check-prefix=DET-CF +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(linalg-detensorize{aggressive-mode}))" | FileCheck %s -check-prefix=DET-ALL +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(linalg-detensorize))" | FileCheck %s -check-prefix=DET-CF #map0 = affine_map<() -> ()> diff --git a/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir b/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir index a0d3cff344add..5af2ff8ac84e6 100644 --- a/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir +++ b/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(linalg-detensorize{aggressive-mode})" | FileCheck %s -check-prefix=DET-ALL -// RUN: mlir-opt %s -pass-pipeline="func.func(linalg-detensorize)" | FileCheck %s -check-prefix=DET-CF +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(linalg-detensorize{aggressive-mode}))" | FileCheck %s -check-prefix=DET-ALL +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(linalg-detensorize))" | FileCheck %s -check-prefix=DET-CF #map0 = affine_map<() -> ()> #map1 = affine_map<(i) -> ()> diff --git a/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir b/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir index 59137f949b356..9e0706322ba69 100644 --- a/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir +++ b/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -allow-unregistered-dialect -pass-pipeline="func.func(linalg-detensorize)" | FileCheck %s +// RUN: mlir-opt %s -allow-unregistered-dialect -pass-pipeline="builtin.module(func.func(linalg-detensorize))" | FileCheck %s #map0 = affine_map<() -> ()> diff --git a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir index 12ecdda129b9f..4ff1f19fe36b5 100644 --- a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir +++ b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -split-input-file -pass-pipeline="func.func(linalg-fold-unit-extent-dims)" | FileCheck %s +// RUN: mlir-opt %s -split-input-file -pass-pipeline="builtin.module(func.func(linalg-fold-unit-extent-dims))" | FileCheck %s #accesses = [ affine_map<(i, j, k, l, m) -> (i, k, m)>, @@ -775,9 +775,9 @@ func.func @input_stays_same(%arg0 : memref>, %arg1 return %shape : memref } -// CHECK: #[[MAP1:.*]] = affine_map<(d0, d1, d2) -> (d0, 0, d2)> -// CHECK: #[[MAP2:.*]] = affine_map<(d0, d1, d2) -> ()> -// CHECK: #[[MAP3:.*]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> +// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0, d1, d2) -> (d0, 0, d2)> +// CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0, d1, d2) -> ()> +// CHECK-DAG: #[[MAP3:.*]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> // CHECK: func @input_stays_same( // CHECK-SAME: %[[ARG0:.*]]: memref>, // CHECK-SAME: %[[ARG1:.*]]: f32, %[[ARG2:.*]]: memref) diff --git a/mlir/test/Dialect/Linalg/fold-unit-trip-loops.mlir b/mlir/test/Dialect/Linalg/fold-unit-trip-loops.mlir index 3107583640206..3822fe8c39748 100644 --- a/mlir/test/Dialect/Linalg/fold-unit-trip-loops.mlir +++ b/mlir/test/Dialect/Linalg/fold-unit-trip-loops.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -split-input-file -pass-pipeline="func.func(linalg-fold-unit-extent-dims{fold-one-trip-loops-only})" | FileCheck %s +// RUN: mlir-opt %s -split-input-file -pass-pipeline="builtin.module(func.func(linalg-fold-unit-extent-dims{fold-one-trip-loops-only}))" | FileCheck %s #accesses = [ affine_map<(i, j, k, l, m) -> (i, k, m)>, diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir index 9200c6117a493..5a1c2afdebbdd 100644 --- a/mlir/test/Dialect/Linalg/invalid.mlir +++ b/mlir/test/Dialect/Linalg/invalid.mlir @@ -673,3 +673,81 @@ func.func @transpose_input_init_rank_mismatch(%input: tensor<16x32xf32>, permutation = [1, 0, 2] func.return %transpose : tensor<32x64x16xf32> } + +// ----- + +func.func @broadcast_unsorted_dims( + %input: tensor<4x16xf32>, %init: tensor<4x8x16xf32>) + -> tensor<4x8x16xf32> { + // expected-error @+1 {{'linalg.broadcast' op dimensions should be in sorted order}} + %bcast = linalg.broadcast + ins(%input:tensor<4x16xf32>) + outs(%init:tensor<4x8x16xf32>) + dimensions = [1, 0] + func.return %bcast : tensor<4x8x16xf32> +} + +// ----- + +func.func @broadcast_input_dims_rank_mismatch( + %input: tensor<4x16xf32>, %init: tensor<4x8x16xf32>) + -> tensor<4x8x16xf32> { + // expected-error @+1 {{'linalg.broadcast' op input rank does match the number of dimensions. expected: 2, got: 1}} + %bcast = linalg.broadcast + ins(%input:tensor<4x16xf32>) + outs(%init:tensor<4x8x16xf32>) + dimensions = [0] + func.return %bcast : tensor<4x8x16xf32> +} + +// ----- + +func.func @broadcast_unsorted_dims( + %input: tensor<4x16xf32>, %init: tensor<4x8x16xf32>) + -> tensor<4x8x16xf32> { + // expected-error @+1 {{'linalg.broadcast' op dimension 1 is out of range. expected range: [0, 2], got: 5}} + %bcast = linalg.broadcast + ins(%input:tensor<4x16xf32>) + outs(%init:tensor<4x8x16xf32>) + dimensions = [0, 5] + func.return %bcast : tensor<4x8x16xf32> +} + +// ----- + +func.func @broadcast_mapped_dim_mismatch( + %input: tensor<4x16xf32>, %init: tensor<5x8x16xf32>) + -> tensor<5x8x16xf32> { + // expected-error @+1 {{'linalg.broadcast' op input dim 0 should match init dim 0. input: 4, init: 5}} + %bcast = linalg.broadcast + ins(%input:tensor<4x16xf32>) + outs(%init:tensor<5x8x16xf32>) + dimensions = [0, 2] + func.return %bcast : tensor<5x8x16xf32> +} + +// ----- + +func.func @broadcast_added_dynamic_mismatch( + %input: tensor<4x16xf32>, %init: tensor<4x?x16xf32>) + -> tensor<4x?x16xf32> { + // expected-error @+1 {{'linalg.broadcast' op init dim 1 can't be dynamic, because it's not matched to input}} + %bcast = linalg.broadcast + ins(%input:tensor<4x16xf32>) + outs(%init:tensor<4x?x16xf32>) + dimensions = [0, 2] + func.return %bcast : tensor<4x?x16xf32> +} + +// ----- + +func.func @broadcast_size_1_extension_not_supported( + %input: tensor<1x16xf32>, %init: tensor<4x?x16xf32>) + -> tensor<4x?x16xf32> { + // expected-error @+1 {{'linalg.broadcast' op input dim 0 should match init dim 0. input: 1, init: 4}} + %bcast = linalg.broadcast + ins(%input:tensor<1x16xf32>) + outs(%init:tensor<4x?x16xf32>) + dimensions = [0, 2] + func.return %bcast : tensor<4x?x16xf32> +} diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir index 58dec2be2373a..9d100d5117fdd 100644 --- a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir @@ -388,6 +388,19 @@ func.func @transpose(%input: tensor<16x32x64xf32>, // ----- +// CHECK-LABEL: func @broadcast +// CHECK-SAME: %[[ARG0:.*]]: memref<8x32xf32 +func.func @broadcast(%input: tensor<8x32xf32>, + %init: tensor<8x16x32xf32>) -> tensor<8x16x32xf32> { + %bcast = linalg.broadcast + ins(%input:tensor<8x32xf32>) + outs(%init:tensor<8x16x32xf32>) + dimensions = [0, 2] + func.return %bcast : tensor<8x16x32xf32> +} + +// ----- + //===----------------------------------------------------------------------===// // AllocTensorOp elimination would produce SSA violations for the example below. //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/Linalg/roundtrip.mlir b/mlir/test/Dialect/Linalg/roundtrip.mlir index fc0e3e057d9a8..64c2bea1f7ee1 100644 --- a/mlir/test/Dialect/Linalg/roundtrip.mlir +++ b/mlir/test/Dialect/Linalg/roundtrip.mlir @@ -517,3 +517,53 @@ func.func @transpose_memref(%input: memref<16x32x64xf32>, func.return } // CHECK-LABEL: func @transpose_memref + +// ----- + +func.func @broadcast_static_sizes(%input: tensor<8x32xf32>, + %init: tensor<8x16x32xf32>) -> tensor<8x16x32xf32> { + %bcast = linalg.broadcast + ins(%input:tensor<8x32xf32>) + outs(%init:tensor<8x16x32xf32>) + dimensions = [0, 2] + func.return %bcast : tensor<8x16x32xf32> +} +// CHECK-LABEL: func @broadcast_static_sizes +// CHECK: linalg.broadcast +// CHECK-NEXT: ins +// CHECK-NEXT: outs +// CHECK-NEXT: dimensions + +// ----- + +func.func @broadcast_with_dynamic_sizes( + %input: tensor<8x?xf32>, %init: tensor<8x16x?xf32>) + -> tensor<8x16x?xf32> { + %bcast = linalg.broadcast + ins(%input:tensor<8x?xf32>) + outs(%init:tensor<8x16x?xf32>) + dimensions = [0, 2] + func.return %bcast : tensor<8x16x?xf32> +} +// CHECK-LABEL: func @broadcast_with_dynamic_sizes +// CHECK: linalg.broadcast +// CHECK-NEXT: ins +// CHECK-NEXT: outs +// CHECK-NEXT: dimensions + +// ----- + +func.func @broadcast_memref(%input: memref<8x32xf32>, + %init: memref<8x16x32xf32>) { + linalg.broadcast + ins(%input:memref<8x32xf32>) + outs(%init:memref<8x16x32xf32>) + dimensions = [0, 2] + func.return +} + +// CHECK-LABEL: func @broadcast_memref +// CHECK: linalg.broadcast +// CHECK-NEXT: ins +// CHECK-NEXT: outs +// CHECK-NEXT: dimensions diff --git a/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir b/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir index ee5f98bc2ce01..cb7a92198c04d 100644 --- a/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir @@ -112,7 +112,7 @@ func.func @generic_split_3d(%input: tensor<32x2xf32>, %input_2: tensor<5x32xf32> // CHECK-DAG: #[[$MAP3:.*]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> // CHECK-DAG: #[[$MAP4:.*]] = affine_map<(d0, d1, d2) -> (d0, d1)> // CHECK-LABEL: func @generic_split_3d -// CHECK-DAG: %[[ID:.*]] = arith.constant -3.40282347E+38 : f32 +// CHECK-DAG: %[[ID:.*]] = arith.constant 0xFF800000 : f32 // CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0, 1], [2]] : tensor<32x2xf32> into tensor<4x8x2xf32> // CHECK-DAG: %[[I2:.*]] = tensor.expand_shape %{{.*}}[0], [1, 2]] : tensor<5x32xf32> into tensor<5x4x8xf32> // CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<5x2x4xf32> @@ -238,7 +238,7 @@ func.func @generic_split_3d(%input: tensor<32x2xf32>, %input_2: tensor<5x32xf32> } ins(%input, %input_2 : tensor<32x2xf32>, tensor<5x32xf32>) outs(%output : tensor<5x2xf32>) { ^bb0(%arg0: f32, %arg1: f32, %arg2: f32): %3 = arith.addf %arg0, %arg1 : f32 - %4 = arith.maxf %3, %arg2 : f32 + %4 = arith.minf %3, %arg2 : f32 linalg.yield %4 : f32 } -> tensor<5x2xf32> return %0 : tensor<5x2xf32> @@ -250,7 +250,7 @@ func.func @generic_split_3d(%input: tensor<32x2xf32>, %input_2: tensor<5x32xf32> // CHECK-DAG: #[[$MAP3:.*]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> // CHECK-DAG: #[[$MAP4:.*]] = affine_map<(d0, d1, d2) -> (d0, d1)> // CHECK-LABEL: func @generic_split_3d -// CHECK-DAG: %[[ID:.*]] = arith.constant -3.40282347E+38 : f32 +// CHECK-DAG: %[[ID:.*]] = arith.constant 0x7F800000 : f32 // CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0, 1], [2]] : tensor<32x2xf32> into tensor<8x4x2xf32> // CHECK-DAG: %[[I2:.*]] = tensor.expand_shape %{{.*}}[0], [1, 2]] : tensor<5x32xf32> into tensor<5x8x4xf32> // CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<5x2x4xf32> @@ -258,12 +258,12 @@ func.func @generic_split_3d(%input: tensor<32x2xf32>, %input_2: tensor<5x32xf32> // CHECK: %[[G:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]], iterator_types = ["parallel", "reduction", "parallel", "parallel"]} // CHECK-SAME: ins(%[[I1]], %[[I2]] : tensor<8x4x2xf32>, tensor<5x8x4xf32>) outs(%[[F]] : tensor<5x2x4xf32>) { // CHECK: arith.addf -// CHECK: arith.maxf +// CHECK: arith.minf // CHECK: linalg.yield // CHECK: } -> tensor<5x2x4xf32> // CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]], iterator_types = ["parallel", "parallel", "reduction"]} // CHECK-SAME: ins(%[[G]] : tensor<5x2x4xf32>) outs(%{{.*}} : tensor<5x2xf32>) { -// CHECK: arith.maxf +// CHECK: arith.minf // CHECK: linalg.yield // CHECK: } -> tensor<5x2xf32> // CHECK: return %[[R]] : tensor<5x2xf32> diff --git a/mlir/test/Dialect/Linalg/transform-tile-reduction.mlir b/mlir/test/Dialect/Linalg/transform-tile-reduction.mlir new file mode 100644 index 0000000000000..dad2f8476d1ff --- /dev/null +++ b/mlir/test/Dialect/Linalg/transform-tile-reduction.mlir @@ -0,0 +1,88 @@ +// RUN: mlir-opt %s -test-transform-dialect-interpreter -split-input-file -canonicalize | FileCheck %s + +func.func @reduction_tile(%arg0: tensor, %out: tensor) -> tensor { + %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, + affine_map<(d0, d1) -> (d0)>], + iterator_types = ["parallel", "reduction"]} + ins(%arg0 : tensor) + outs(%out : tensor) { + ^bb0(%arg7: f32, %arg9: f32): + %1 = arith.mulf %arg7, %arg7 : f32 + %2 = arith.addf %1, %arg9 : f32 + linalg.yield %2 : f32 + } -> tensor + return %red : tensor +} + +transform.sequence failures(propagate) { +^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 + %1, %2, %3 = transform.structured.tile_reduction_using_scf %0 { tile_sizes = [0, 5] } +} + +// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0, d1) -> (d0, d1)> +// CHECK-DAG: #[[MAP1:.*]] = affine_map<(d0, d1) -> (d0)> +// CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 5)> +// CHECK: func @reduction_tile(%[[ARG0:.+]]: tensor, %[[ARG1:.+]]: tensor +// CHECK-DAG: %[[I:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK-DAG: %[[C5:.*]] = arith.constant 5 : index +// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[D0:.*]] = tensor.dim %[[ARG0]], %[[C0]] : tensor +// CHECK-DAG: %[[D1:.*]] = tensor.dim %[[ARG0]], %[[C1]] : tensor +// CHECK-DAG: %[[D2:.*]] = tensor.dim %[[ARG1]], %[[C0]] : tensor +// CHECK: %[[E:.*]] = tensor.empty(%[[D2]]) : tensor +// CHECK: %[[F:.*]] = linalg.fill ins(%[[I]] : f32) outs(%[[E]] : tensor) -> tensor +// CHECK: %[[L:.*]] = scf.for %[[K:.*]] = %[[C0]] to %[[D1]] step %[[C5]] iter_args(%[[ARG3:.*]] = %[[F]]) -> (tensor) { +// CHECK: %[[PS:.*]] = affine.min #[[MAP2]](%[[K]])[%[[D1]]] +// CHECK: %[[EXT2:.*]] = tensor.extract_slice %[[ARG0]][0, %[[K:.*]]] [%[[D0]], %[[PS]]] [1, 1] : tensor to tensor +// CHECK: %[[EXT:.*]] = tensor.extract_slice %[[ARG3]][0, 0] [%[[D0]], %[[PS]]] [1, 1] : tensor to tensor +// CHECK: %[[PR:.*]] = linalg.generic {indexing_maps = [#[[MAP0]], #[[MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[EXT2]] : tensor) outs(%[[EXT]] : tensor) { +// CHECK: arith.mulf +// CHECK: arith.addf +// CHECK: linalg.yield +// CHECK: } -> tensor +// CHECK: %[[D3:.*]] = tensor.dim %[[PR]], %[[C0]] : tensor +// CHECK: %[[D4:.*]] = tensor.dim %[[PR]], %[[C1]] : tensor +// CHECK: %[[INS:.*]] = tensor.insert_slice %[[PR]] into %[[ARG3]][0, 0] [%[[D3]], %[[D4]]] [1, 1] : tensor into tensor +// CHECK: scf.yield %[[INS]] : tensor +// CHECK: } +// CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[MAP0]], #[[MAP1]]], iterator_types = ["parallel", "reduction"]} ins(%[[L]] : tensor) outs(%[[ARG1]] : tensor) { +// CHECK: arith.addf +// CHECK: linalg.yield +// CHECK: } -> tensor +// CHECK: return %[[R]] : tensor + +// ----- + +func.func @reduction_tile_transpose(%arg0: tensor, %out: tensor) -> tensor { + %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, + affine_map<(d0, d1) -> (d1)>], + iterator_types = ["reduction", "parallel"]} + ins(%arg0 : tensor) + outs(%out : tensor) { + ^bb0(%arg7: f32, %arg9: f32): + %42 = arith.addf %arg7, %arg9 : f32 + linalg.yield %42 : f32 + } -> tensor + return %red : tensor +} + +transform.sequence failures(propagate) { +^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 + %1, %2, %3 = transform.structured.tile_reduction_using_scf %0 { tile_sizes = [5, 0] } +} + +// CHECK: func @reduction_tile_transpose +// CHECK: tensor.empty(%{{.*}}) : tensor<5x?xf32> +// CHECK: linalg.fill {{.*}} : tensor<5x?xf32>) -> tensor<5x?xf32> +// CHECK: scf.for +// CHECK: linalg.generic +// CHECK: %[[D3:.*]] = tensor.dim %{{.*}}, %[[C0]] : tensor +// CHECK: %[[D4:.*]] = tensor.dim %{{.*}}, %[[C1]] : tensor +// CHECK: %[[INS:.*]] = tensor.insert_slice %[[PR]] into %[[ARG3]][0, 0] [%[[D3]], %[[D4]]] [1, 1] : tensor into tensor<5x?xf32> +// CHECK: scf.yield {{.*}} : tensor<5x?xf32> +// CHECK: } +// CHECK: linalg.generic +// CHECK: return diff --git a/mlir/test/Dialect/Linalg/vectorize-convolution.mlir b/mlir/test/Dialect/Linalg/vectorize-convolution.mlir index e7495765b3ec7..1374c996128a1 100644 --- a/mlir/test/Dialect/Linalg/vectorize-convolution.mlir +++ b/mlir/test/Dialect/Linalg/vectorize-convolution.mlir @@ -61,6 +61,70 @@ func.func @conv1d_nwc_4x2x8_memref(%input: memref<4x6x3xf32>, %filter: memref<1x // ----- +// The i8i8i32 case is similar to f32 case, so checking one case is enough for +// test coverage. +func.func @conv1d_nwc_4x2x8_i8i8i32_memref(%input: memref<4x6x3xi8>, %filter: memref<1x3x8xi8>, %output: memref<4x2x8xi32>) { + linalg.conv_1d_nwc_wcf + {dilations = dense<1> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>} + ins(%input, %filter : memref<4x6x3xi8>, memref<1x3x8xi8>) + outs(%output : memref<4x2x8xi32>) + return +} + +// CHECK: #[[INPUT_MAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)> +// CHECK: #[[FILTER_MAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d3, d2)> +// CHECK: #[[OUTPUT_MAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)> + +// CHECK: func @conv1d_nwc_4x2x8_i8i8i32_memref +// CHECK-SAME: (%[[INPUT:.+]]: memref<4x6x3xi8>, %[[FILTER:.+]]: memref<1x3x8xi8>, %[[OUTPUT:.+]]: memref<4x2x8xi32>) + +// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index +// CHECK-DAG: %[[C0_I8:.+]] = arith.constant 0 : i8 +// CHECK-DAG: %[[C0_I32:.+]] = arith.constant 0 : i32 + +/// Read the whole data in one shot. +// CHECK-DAG: %[[V_INPUT_R:.+]] = vector.transfer_read %[[INPUT]][%[[C0]], %[[C0]], %[[C0]]], %[[C0_I8]] +// CHECK-DAG: %[[V_FILTER_R:.+]] = vector.transfer_read %[[FILTER]][%[[C0]], %[[C0]], %[[C0]]], %[[C0_I8]] +// CHECK-DAG: %[[V_OUTPUT_R:.+]] = vector.transfer_read %[[OUTPUT]][%[[C0]], %[[C0]], %[[C0]]], %[[C0_I32]] + +// CHECK: %[[V_INPUT_0:.+]] = vector.extract_strided_slice %[[V_INPUT_R]] +// CHECK-SAME: {offsets = [0, 0, 0], sizes = [4, 1, 3], strides = [1, 1, 1]} : vector<4x4x3xi8> to vector<4x1x3xi8> +// CHECK: %[[V_INPUT_1:.+]] = vector.extract_strided_slice %[[V_INPUT_R]] +// CHECK-SAME: {offsets = [0, 3, 0], sizes = [4, 1, 3], strides = [1, 1, 1]} : vector<4x4x3xi8> to vector<4x1x3xi8> + +// CHECK: %[[V_FILTER:.+]] = vector.extract %[[V_FILTER_R]][0] : vector<1x3x8xi8> + +// CHECK: %[[V_OUTPUT_0:.+]] = vector.extract_strided_slice %[[V_OUTPUT_R]] +// CHECK-SAME: {offsets = [0, 0, 0], sizes = [4, 1, 8], strides = [1, 1, 1]} : vector<4x2x8xi32> to vector<4x1x8xi32> +// CHECK: %[[V_OUTPUT_1:.+]] = vector.extract_strided_slice %[[V_OUTPUT_R]] +// CHECK-SAME: {offsets = [0, 1, 0], sizes = [4, 1, 8], strides = [1, 1, 1]} : vector<4x2x8xi32> to vector<4x1x8xi32> + +/// w == 0, kw == 0 +// CHECK: %[[CONTRACT_0:.+]] = vector.contract { +// CHECK-SAME: indexing_maps = [#[[INPUT_MAP]], #[[FILTER_MAP]], #[[OUTPUT_MAP]]], +// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"] +// CHECK-SAME: %[[V_INPUT_0]], %[[V_FILTER]], %[[V_OUTPUT_0]] +// CHECK-SAME: : vector<4x1x3xi8>, vector<3x8xi8> into vector<4x1x8xi32> + +/// w == 1, kw == 0 +// CHECK: %[[CONTRACT_1:.+]] = vector.contract { +// CHECK-SAME: indexing_maps = [#[[INPUT_MAP]], #[[FILTER_MAP]], #[[OUTPUT_MAP]]], +// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"] +// CHECK-SAME: %[[V_INPUT_1]], %[[V_FILTER]], %[[V_OUTPUT_1]] +// CHECK-SAME: : vector<4x1x3xi8>, vector<3x8xi8> into vector<4x1x8xi32> + +/// w == 0, kw == 0 +// CHECK: %[[RES_0:.+]] = vector.insert_strided_slice %[[CONTRACT_0]], %[[V_OUTPUT_R]] +// CHECK-SAME: {offsets = [0, 0, 0], strides = [1, 1, 1]} : vector<4x1x8xi32> into vector<4x2x8xi32> +/// w == 1, kw == 0 +// CHECK: %[[RES_1:.+]] = vector.insert_strided_slice %[[CONTRACT_1]], %[[RES_0]] +// CHECK-SAME: {offsets = [0, 1, 0], strides = [1, 1, 1]} : vector<4x1x8xi32> into vector<4x2x8xi32> + +// Write the result back in one shot. +// CHECK: vector.transfer_write %[[RES_1]], %[[OUTPUT]][%[[C0]], %[[C0]], %[[C0]]] + +// ----- + func.func @conv1d_nwc_4x2x8_memref(%input: memref<4x6x3xf32>, %filter: memref<2x3x8xf32>, %output: memref<4x2x8xf32>) { linalg.conv_1d_nwc_wcf {dilations = dense<2> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>} diff --git a/mlir/test/Dialect/Math/algebraic-simplification.mlir b/mlir/test/Dialect/Math/algebraic-simplification.mlir index 806779ad9198d..21c9f7a8e7f17 100644 --- a/mlir/test/Dialect/Math/algebraic-simplification.mlir +++ b/mlir/test/Dialect/Math/algebraic-simplification.mlir @@ -74,6 +74,22 @@ func.func @pow_rsqrt(%arg0: f32, %arg1 : vector<4xf32>) -> (f32, vector<4xf32>) return %0, %1 : f32, vector<4xf32> } +// CHECK-LABEL: @pow_0_75 +func.func @pow_0_75(%arg0: f32, %arg1 : vector<4xf32>) -> (f32, vector<4xf32>) { + // CHECK: %[[SQRT1S:.*]] = math.sqrt %arg0 + // CHECK: %[[SQRT2S:.*]] = math.sqrt %[[SQRT1S]] + // CHECK: %[[SCALAR:.*]] = arith.mulf %[[SQRT1S]], %[[SQRT2S]] + // CHECK: %[[SQRT1V:.*]] = math.sqrt %arg1 + // CHECK: %[[SQRT2V:.*]] = math.sqrt %[[SQRT1V]] + // CHECK: %[[VECTOR:.*]] = arith.mulf %[[SQRT1V]], %[[SQRT2V]] + // CHECK: return %[[SCALAR]], %[[VECTOR]] + %c = arith.constant 0.75 : f32 + %v = arith.constant dense <0.75> : vector<4xf32> + %0 = math.powf %arg0, %c : f32 + %1 = math.powf %arg1, %v : vector<4xf32> + return %0, %1 : f32, vector<4xf32> +} + // CHECK-LABEL: @ipowi_zero_exp( // CHECK-SAME: %[[ARG0:.+]]: i32 // CHECK-SAME: %[[ARG1:.+]]: vector<4xi32> diff --git a/mlir/test/Dialect/Math/ops.mlir b/mlir/test/Dialect/Math/ops.mlir index d984cbb66f8c2..7e121f80dd79e 100644 --- a/mlir/test/Dialect/Math/ops.mlir +++ b/mlir/test/Dialect/Math/ops.mlir @@ -269,3 +269,17 @@ func.func @trunc(%f: f32, %v: vector<4xf32>, %t: tensor<4x4x?xf32>) { %2 = math.trunc %t : tensor<4x4x?xf32> return } + +// CHECK-LABEL: func @fastmath( +// CHECK-SAME: %[[F:.*]]: f32, %[[V:.*]]: vector<4xf32>, %[[T:.*]]: tensor<4x4x?xf32>) +func.func @fastmath(%f: f32, %v: vector<4xf32>, %t: tensor<4x4x?xf32>) { + // CHECK: %{{.*}} = math.trunc %[[F]] fastmath : f32 + %0 = math.trunc %f fastmath : f32 + // CHECK: %{{.*}} = math.powf %[[V]], %[[V]] fastmath : vector<4xf32> + %1 = math.powf %v, %v fastmath : vector<4xf32> + // CHECK: %{{.*}} = math.fma %[[T]], %[[T]], %[[T]] : tensor<4x4x?xf32> + %2 = math.fma %t, %t, %t fastmath : tensor<4x4x?xf32> + // CHECK: %{{.*}} = math.absf %[[F]] fastmath : f32 + %3 = math.absf %f fastmath : f32 + return +} diff --git a/mlir/test/Dialect/MemRef/canonicalize.mlir b/mlir/test/Dialect/MemRef/canonicalize.mlir index 5a418022800cf..3fd4ae1c81c96 100644 --- a/mlir/test/Dialect/MemRef/canonicalize.mlir +++ b/mlir/test/Dialect/MemRef/canonicalize.mlir @@ -57,7 +57,7 @@ func.func @subview_canonicalize(%arg0 : memref, %arg1 : index, // CHECK-SAME: [4, 1, %{{[a-zA-Z0-9_]+}}] [1, 1, 1] // CHECK-SAME: : memref to memref<4x1x?xf32 // CHECK: %[[RESULT:.+]] = memref.cast %[[SUBVIEW]] -// CHEKC: return %[[RESULT]] +// CHECK: return %[[RESULT]] // ----- diff --git a/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir b/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir index 393f0f49e15f7..ded7374d3ed82 100644 --- a/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir +++ b/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir @@ -416,7 +416,7 @@ func.func @fold_static_stride_subview_with_affine_load_store_expand_shape_when_a // CHECK-NEXT: affine.for %[[ARG6:.*]] = 0 to 1 { // CHECK-NEXT: %[[TMP1:.*]] = affine.apply #[[$MAP0]](%[[ARG3]], %[[ARG4]], %[[ARG5]], %[[ARG6]]) // CHECK-NEXT: %[[TMP2:.*]] = affine.apply #[[$MAP1]](%[[ARG3]], %[[TMP1]]) -// CHECK-NEXT: %[[TMP3:.*]] = affine.apply #map2(%[[ARG5]], %[[ARG6]]) +// CHECK-NEXT: %[[TMP3:.*]] = affine.apply #{{.*}}(%[[ARG5]], %[[ARG6]]) // CHECK-NEXT: affine.load %[[ARG0]][%[[TMP2]], %[[TMP3]]] : memref<1024x1024xf32> // ----- diff --git a/mlir/test/Dialect/NVGPU/optimize-shared-memory.mlir b/mlir/test/Dialect/NVGPU/optimize-shared-memory.mlir index f4e855f56b54d..bb1108ea3115a 100644 --- a/mlir/test/Dialect/NVGPU/optimize-shared-memory.mlir +++ b/mlir/test/Dialect/NVGPU/optimize-shared-memory.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -split-input-file --pass-pipeline='func.func(nvgpu-optimize-shared-memory)' | FileCheck %s +// RUN: mlir-opt %s -split-input-file --pass-pipeline='builtin.module(func.func(nvgpu-optimize-shared-memory))' | FileCheck %s // CHECK: @optimize_128x32xf16_32x128xf16([[arg0:%.+]]: memref<{{.*}}>, [[ldRow:%.+]]: index, [[ldCol:%.+]]: index, [[stRow:%.+]]: index, [[stCol:%.+]]: index, [[fragRow:%.+]]: index, [[fragCol:%.+]]: index) func.func @optimize_128x32xf16_32x128xf16(%arg0: memref<128x128xf16>, diff --git a/mlir/test/Dialect/NVGPU/roundtrip.mlir b/mlir/test/Dialect/NVGPU/roundtrip.mlir index 524f1fd6907b7..ad516b4d2c200 100644 --- a/mlir/test/Dialect/NVGPU/roundtrip.mlir +++ b/mlir/test/Dialect/NVGPU/roundtrip.mlir @@ -19,6 +19,44 @@ func.func @mma_sync(%arg0: vector<4x2xf16>, return %d : vector<2x2xf16> } +// CHECK-LABEL: func @mma_sp_sync_f16_16832( +func.func @mma_sp_sync_f16_16832(%arg0: vector<4x2xf16>, + %arg1: vector<4x2xf16>, + %arg2: vector<2x2xf16>, + %arg3: vector<2xi16>) -> vector<2x2xf16> { + // CHECK: nvgpu.mma.sp.sync(%{{.*}}, %{{.*}}, %{{.*}}) metadata(%{{.+}}) { + // CHECK-SAME: mmaShape = [16, 8, 32] + // CHECK-SAME: (vector<4x2xf16>, vector<4x2xf16>, vector<2x2xf16>) -> vector<2x2xf16> + %d = nvgpu.mma.sp.sync(%arg0, %arg1, %arg2) metadata(%arg3) {mmaShape = [16, 8, 32]} : + (vector<4x2xf16>, vector<4x2xf16>, vector<2x2xf16>) -> vector<2x2xf16> + return %d : vector<2x2xf16> +} + +// CHECK-LABEL: func @mma_sp_sync_f16_16816( +func.func @mma_sp_sync_f16_16816(%arg0: vector<2x2xf16>, + %arg1: vector<2x2xf16>, + %arg2: vector<2x2xf16>, + %arg3: vector<2xi16>) -> vector<2x2xf16> { + // CHECK: nvgpu.mma.sp.sync(%{{.*}}, %{{.*}}, %{{.*}}) metadata(%{{.+}}) { + // CHECK-SAME: mmaShape = [16, 8, 16] + // CHECK-SAME: (vector<2x2xf16>, vector<2x2xf16>, vector<2x2xf16>) -> vector<2x2xf16> + %d = nvgpu.mma.sp.sync(%arg0, %arg1, %arg2) metadata(%arg3) {mmaShape = [16, 8, 16]} : + (vector<2x2xf16>, vector<2x2xf16>, vector<2x2xf16>) -> vector<2x2xf16> + return %d : vector<2x2xf16> +} + +// CHECK-LABEL: func @mma_sp_sync_i8_16864( +func.func @mma_sp_sync_i8_16864(%arg0: vector<4x4xi8>, + %arg1: vector<4x4xi8>, + %arg2: vector<2x2xi32>, + %arg3: vector<2xi16>) -> vector<2x2xi32> { + // CHECK: nvgpu.mma.sp.sync(%{{.*}}, %{{.*}}, %{{.*}}) metadata(%{{.+}}) { + // CHECK-SAME: mmaShape = [16, 8, 64] + // CHECK-SAME: (vector<4x4xi8>, vector<4x4xi8>, vector<2x2xi32>) -> vector<2x2xi32> + %d = nvgpu.mma.sp.sync(%arg0, %arg1, %arg2) metadata(%arg3) {mmaShape = [16, 8, 64]} : + (vector<4x4xi8>, vector<4x4xi8>, vector<2x2xi32>) -> vector<2x2xi32> + return %d : vector<2x2xi32> +} func.func @async_cp(%dst : memref<2x7x5xf32, 3>, %src : memref<4x5xf32>){ // CHECK-LABEL: func @async_cp diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir index fad78b2b0ca96..ba276047a742c 100644 --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -796,12 +796,12 @@ func.func @omp_atomic_update(%x : memref, %expr : i32, %xBool : memref, } // CHECK: omp.atomic.update %[[X]] : memref // CHECK-NEXT: (%[[XVAL:.*]]: i32): - // CHECK-NEXT: %[[NEWVAL:.*]] = "llvm.intr.smax"(%[[XVAL]], %[[EXPR]]) : (i32, i32) -> i32 + // CHECK-NEXT: %[[NEWVAL:.*]] = llvm.intr.smax(%[[XVAL]], %[[EXPR]]) : (i32, i32) -> i32 // CHECK-NEXT: omp.yield(%[[NEWVAL]] : i32) // CHECK-NEXT: } omp.atomic.update %x : memref { ^bb0(%xval: i32): - %newval = "llvm.intr.smax"(%xval, %expr) : (i32, i32) -> i32 + %newval = llvm.intr.smax(%xval, %expr) : (i32, i32) -> i32 omp.yield(%newval : i32) } diff --git a/mlir/test/Dialect/Quant/canonicalize.mlir b/mlir/test/Dialect/Quant/canonicalize.mlir index fca8116d40e76..c67f1290c9d76 100644 --- a/mlir/test/Dialect/Quant/canonicalize.mlir +++ b/mlir/test/Dialect/Quant/canonicalize.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -split-input-file -pass-pipeline='func.func(canonicalize)' | FileCheck %s +// RUN: mlir-opt %s -split-input-file -pass-pipeline='builtin.module(func.func(canonicalize))' | FileCheck %s // ----- // CHECK-LABEL: redundant_scast diff --git a/mlir/test/Dialect/SCF/canonicalize.mlir b/mlir/test/Dialect/SCF/canonicalize.mlir index 7dde0d7647f30..b6ac36282fc43 100644 --- a/mlir/test/Dialect/SCF/canonicalize.mlir +++ b/mlir/test/Dialect/SCF/canonicalize.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline='func.func(canonicalize)' -split-input-file | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(canonicalize))' -split-input-file | FileCheck %s // ----- diff --git a/mlir/test/Dialect/SCF/for-loop-specialization.mlir b/mlir/test/Dialect/SCF/for-loop-specialization.mlir index 40e8d7dfe4571..ff66c6c1e47f5 100644 --- a/mlir/test/Dialect/SCF/for-loop-specialization.mlir +++ b/mlir/test/Dialect/SCF/for-loop-specialization.mlir @@ -23,7 +23,7 @@ func.func @for(%outer: index, %A: memref, %B: memref, // CHECK: [[CST_0:%.*]] = arith.constant 0 : index // CHECK: [[CST_1:%.*]] = arith.constant 1 : index // CHECK: [[DIM_0:%.*]] = memref.dim [[ARG1]], [[CST_0]] : memref -// CHECK: [[MIN:%.*]] = affine.min #map(){{\[}}[[DIM_0]], [[ARG0]]] +// CHECK: [[MIN:%.*]] = affine.min #{{.*}}(){{\[}}[[DIM_0]], [[ARG0]]] // CHECK: [[CST_1024:%.*]] = arith.constant 1024 : index // CHECK: [[PRED:%.*]] = arith.cmpi eq, [[MIN]], [[CST_1024]] : index // CHECK: scf.if [[PRED]] { diff --git a/mlir/test/Dialect/SCF/for-loop-to-while-loop.mlir b/mlir/test/Dialect/SCF/for-loop-to-while-loop.mlir index 40a12ac5afb17..d2e14f3e25fa8 100644 --- a/mlir/test/Dialect/SCF/for-loop-to-while-loop.mlir +++ b/mlir/test/Dialect/SCF/for-loop-to-while-loop.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline='func.func(scf-for-to-while)' -split-input-file | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(scf-for-to-while))' -split-input-file | FileCheck %s // NOTE: Assertions have been autogenerated by utils/generate-test-checks.py // CHECK-LABEL: func @single_loop( diff --git a/mlir/test/Dialect/SCF/loop-range.mlir b/mlir/test/Dialect/SCF/loop-range.mlir index 3494621fb92bc..cd3b4861fc18d 100644 --- a/mlir/test/Dialect/SCF/loop-range.mlir +++ b/mlir/test/Dialect/SCF/loop-range.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline='func.func(scf-for-loop-range-folding)' -split-input-file | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(scf-for-loop-range-folding))' -split-input-file | FileCheck %s func.func @fold_one_loop(%arg0: memref, %arg1: index, %arg2: index) { %c0 = arith.constant 0 : index diff --git a/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir b/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir index 96befd7d57b99..aab64b2751caf 100644 --- a/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir +++ b/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func.func(scf-parallel-loop-fusion)' -split-input-file | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(scf-parallel-loop-fusion))' -split-input-file | FileCheck %s func.func @fuse_empty_loops() { %c2 = arith.constant 2 : index diff --git a/mlir/test/Dialect/SCF/parallel-loop-specialization.mlir b/mlir/test/Dialect/SCF/parallel-loop-specialization.mlir index f03254405bfee..73c823ca8d55e 100644 --- a/mlir/test/Dialect/SCF/parallel-loop-specialization.mlir +++ b/mlir/test/Dialect/SCF/parallel-loop-specialization.mlir @@ -26,8 +26,8 @@ func.func @parallel_loop(%outer_i0: index, %outer_i1: index, %A: memref // CHECK: [[VAL_7:%.*]] = arith.constant 1 : index // CHECK: [[VAL_8:%.*]] = memref.dim [[VAL_2]], [[VAL_6]] : memref // CHECK: [[VAL_9:%.*]] = memref.dim [[VAL_2]], [[VAL_7]] : memref -// CHECK: [[VAL_10:%.*]] = affine.min #map(){{\[}}[[VAL_8]], [[VAL_0]]] -// CHECK: [[VAL_11:%.*]] = affine.min #map1(){{\[}}[[VAL_9]], [[VAL_1]]] +// CHECK: [[VAL_10:%.*]] = affine.min #{{.*}}(){{\[}}[[VAL_8]], [[VAL_0]]] +// CHECK: [[VAL_11:%.*]] = affine.min #{{.*}}(){{\[}}[[VAL_9]], [[VAL_1]]] // CHECK: [[VAL_12:%.*]] = arith.constant 1024 : index // CHECK: [[VAL_13:%.*]] = arith.cmpi eq, [[VAL_10]], [[VAL_12]] : index // CHECK: [[VAL_14:%.*]] = arith.constant 64 : index diff --git a/mlir/test/Dialect/SCF/parallel-loop-tiling-inbound-check.mlir b/mlir/test/Dialect/SCF/parallel-loop-tiling-inbound-check.mlir index 75cade55aef3d..7491550c1dc7c 100644 --- a/mlir/test/Dialect/SCF/parallel-loop-tiling-inbound-check.mlir +++ b/mlir/test/Dialect/SCF/parallel-loop-tiling-inbound-check.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline='func.func(scf-parallel-loop-tiling{parallel-loop-tile-sizes=1,4 no-min-max-bounds=true})' -split-input-file | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(scf-parallel-loop-tiling{parallel-loop-tile-sizes=1,4 no-min-max-bounds=true}))' -split-input-file | FileCheck %s func.func @parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index, %arg5 : index, diff --git a/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir b/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir index af2b567fb7c3f..41b0d85b3752e 100644 --- a/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir +++ b/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline='func.func(scf-parallel-loop-tiling{parallel-loop-tile-sizes=1,4})' -split-input-file | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(scf-parallel-loop-tiling{parallel-loop-tile-sizes=1,4}))' -split-input-file | FileCheck %s func.func @parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index, %arg5 : index, @@ -13,7 +13,7 @@ func.func @parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index, return } -// CHECK: #map = affine_map<(d0, d1, d2) -> (d0, d1 - d2)> +// CHECK: #[[$MAP:.*]] = affine_map<(d0, d1, d2) -> (d0, d1 - d2)> // CHECK-LABEL: func @parallel_loop( // CHECK-SAME: [[ARG1:%.*]]: index, [[ARG2:%.*]]: index, [[ARG3:%.*]]: index, [[ARG4:%.*]]: index, [[ARG5:%.*]]: index, [[ARG6:%.*]]: index, [[ARG7:%.*]]: memref, [[ARG8:%.*]]: memref, [[ARG9:%.*]]: memref, [[ARG10:%.*]]: memref) { // CHECK: [[C0:%.*]] = arith.constant 0 : index @@ -22,8 +22,8 @@ func.func @parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index, // CHECK: [[V1:%.*]] = arith.muli [[ARG5]], [[C1]] : index // CHECK: [[V2:%.*]] = arith.muli [[ARG6]], [[C4]] : index // CHECK: scf.parallel ([[V3:%.*]], [[V4:%.*]]) = ([[ARG1]], [[ARG2]]) to ([[ARG3]], [[ARG4]]) step ([[V1]], [[V2]]) { -// CHECK: [[V5:%.*]] = affine.min #map([[V1]], [[ARG3]], [[V3]]) -// CHECK: [[V6:%.*]] = affine.min #map([[V2]], [[ARG4]], [[V4]]) +// CHECK: [[V5:%.*]] = affine.min #[[$MAP]]([[V1]], [[ARG3]], [[V3]]) +// CHECK: [[V6:%.*]] = affine.min #[[$MAP]]([[V2]], [[ARG4]], [[V4]]) // CHECK: scf.parallel ([[V7:%.*]], [[V8:%.*]]) = ([[C0]], [[C0]]) to ([[V5]], [[V6]]) step ([[ARG5]], [[ARG6]]) { // CHECK: [[V9:%.*]] = arith.addi [[V7]], [[V3]] : index // CHECK: [[V10:%.*]] = arith.addi [[V8]], [[V4]] : index @@ -91,7 +91,7 @@ func.func @tile_nested_innermost() { // CHECK: [[V3:%.*]] = arith.muli [[C1]], [[C1_1]] : index // CHECK: [[V4:%.*]] = arith.muli [[C1]], [[C4]] : index // CHECK: scf.parallel ([[V5:%.*]], [[V6:%.*]]) = ([[C0]], [[C0]]) to ([[C2]], [[C2]]) step ([[V3]], [[V4]]) { -// CHECK: [[V7:%.*]] = affine.min #map([[V4]], [[C2]], [[V6]]) +// CHECK: [[V7:%.*]] = affine.min #{{.*}}([[V4]], [[C2]], [[V6]]) // CHECK: scf.parallel ([[V8:%.*]], [[V9:%.*]]) = ([[C0_1]], [[C0_1]]) to ([[V3]], [[V7]]) step ([[C1]], [[C1]]) { // CHECK: = arith.addi [[V8]], [[V5]] : index // CHECK: = arith.addi [[V9]], [[V6]] : index @@ -104,7 +104,7 @@ func.func @tile_nested_innermost() { // CHECK: [[V10:%.*]] = arith.muli [[C1]], [[C1_2]] : index // CHECK: [[V11:%.*]] = arith.muli [[C1]], [[C4_1]] : index // CHECK: scf.parallel ([[V12:%.*]], [[V13:%.*]]) = ([[C0]], [[C0]]) to ([[C2]], [[C2]]) step ([[V10]], [[V11]]) { -// CHECK: [[V14:%.*]] = affine.min #map([[V11]], [[C2]], [[V13]]) +// CHECK: [[V14:%.*]] = affine.min #{{.*}}([[V11]], [[C2]], [[V13]]) // CHECK: scf.parallel ([[V15:%.*]], [[V16:%.*]]) = ([[C0_2]], [[C0_2]]) to ([[V10]], [[V14]]) step ([[C1]], [[C1]]) { // CHECK: = arith.addi [[V15]], [[V12]] : index // CHECK: = arith.addi [[V16]], [[V13]] : index diff --git a/mlir/test/Dialect/SPIRV/Transforms/canonicalize.mlir b/mlir/test/Dialect/SPIRV/Transforms/canonicalize.mlir index 950cfcc2c0884..e65f92e66bb47 100644 --- a/mlir/test/Dialect/SPIRV/Transforms/canonicalize.mlir +++ b/mlir/test/Dialect/SPIRV/Transforms/canonicalize.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -split-input-file -pass-pipeline='func.func(canonicalize)' | FileCheck %s +// RUN: mlir-opt %s -split-input-file -pass-pipeline='builtin.module(func.func(canonicalize))' | FileCheck %s //===----------------------------------------------------------------------===// // spirv.AccessChain @@ -86,6 +86,30 @@ func.func @convert_bitcast_multi_use(%arg0 : vector<2xf32>, %arg1 : !spirv.ptr i64 { + // CHECK: spirv.ReturnValue %[[ARG]] + %0 = spirv.Bitcast %arg0 : i64 to f64 + %1 = spirv.Bitcast %0 : f64 to i64 + spirv.ReturnValue %1 : i64 +} + +// ----- + +// CHECK-LABEL: @convert_bitcast_chained_roundtip +// CHECK-SAME: %[[ARG:.+]]: i64 +func.func @convert_bitcast_chained_roundtip(%arg0 : i64) -> i64 { + // CHECK: spirv.ReturnValue %[[ARG]] + %0 = spirv.Bitcast %arg0 : i64 to f64 + %1 = spirv.Bitcast %0 : f64 to vector<2xi32> + %2 = spirv.Bitcast %1 : vector<2xi32> to vector<2xf32> + %3 = spirv.Bitcast %2 : vector<2xf32> to i64 + spirv.ReturnValue %3 : i64 +} + +// ----- + //===----------------------------------------------------------------------===// // spirv.CompositeExtract //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/SPIRV/Transforms/inlining.mlir b/mlir/test/Dialect/SPIRV/Transforms/inlining.mlir index e0aa98e8d050b..d1937c44262f2 100644 --- a/mlir/test/Dialect/SPIRV/Transforms/inlining.mlir +++ b/mlir/test/Dialect/SPIRV/Transforms/inlining.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -split-input-file -pass-pipeline='spirv.module(inline{default-pipeline=''})' | FileCheck %s +// RUN: mlir-opt %s -split-input-file -pass-pipeline='builtin.module(spirv.module(inline{default-pipeline=''}))' | FileCheck %s spirv.module Logical GLSL450 { spirv.func @callee() "None" { diff --git a/mlir/test/Dialect/SparseTensor/buffer_rewriting.mlir b/mlir/test/Dialect/SparseTensor/buffer_rewriting.mlir index 114bfd874609f..f5634524f7e66 100644 --- a/mlir/test/Dialect/SparseTensor/buffer_rewriting.mlir +++ b/mlir/test/Dialect/SparseTensor/buffer_rewriting.mlir @@ -92,28 +92,14 @@ func.func @sparse_push_back_inbound(%arg0: memref, %arg1: memref // CHECK: return %[[C]] // CHECK: } -// CHECK-LABEL: func.func private @_sparse_may_swap_1_i8_f32_index( -// CHECK-SAME: %[[I:arg0]]: index, -// CHECK-SAME: %[[J:.*]]: index, -// CHECK-SAME: %[[X0:.*]]: memref, -// CHECK-SAME: %[[Y0:.*]]: memref, -// CHECK-SAME: %[[Y1:.*]]: memref) { -// CHECK: %[[C:.*]] = arith.cmpi ne, %[[I]], %[[J]] -// CHECK: scf.if %[[C]] { -// CHECK: %[[Vx0i:.*]] = memref.load %[[X0]]{{\[}}%[[I]]] -// CHECK: %[[Vx0j:.*]] = memref.load %[[X0]]{{\[}}%[[J]]] -// CHECK: memref.store %[[Vx0j]], %[[X0]]{{\[}}%[[I]]] -// CHECK: memref.store %[[Vx0i]], %[[X0]]{{\[}}%[[J]]] -// CHECK: %[[Vy0i:.*]] = memref.load %[[Y0]]{{\[}}%[[I]]] -// CHECK: %[[Vy0j:.*]] = memref.load %[[Y0]]{{\[}}%[[J]]] -// CHECK: memref.store %[[Vy0j]], %[[Y0]]{{\[}}%[[I]]] -// CHECK: memref.store %[[Vy0i]], %[[Y0]]{{\[}}%[[J]]] -// CHECK: %[[Vy1i:.*]] = memref.load %[[Y1]]{{\[}}%[[I]]] -// CHECK: %[[Vy1j:.*]] = memref.load %[[Y1]]{{\[}}%[[J]]] -// CHECK: memref.store %[[Vy1j]], %[[Y1]]{{\[}}%[[I]]] -// CHECK: memref.store %[[Vy1i]], %[[Y1]]{{\[}}%[[J]]] -// CHECK: } -// CHECK: return +// CHECK-LABEL: func.func private @_sparse_compare_eq_1_i8( +// CHECK-SAME: %[[I:arg0]]: index, +// CHECK-SAME: %[[J:.*]]: index, +// CHECK-SAME: %[[X0:.*]]: memref) -> i1 { +// CHECK: %[[VI:.*]] = memref.load %[[X0]]{{\[}}%[[I]]] +// CHECK: %[[VJ:.*]] = memref.load %[[X0]]{{\[}}%[[J]]] +// CHECK: %[[C:.*]] = arith.cmpi eq, %[[VI]], %[[VJ]] +// CHECK: return %[[C]] // CHECK: } // CHECK-LABEL: func.func private @_sparse_partition_1_i8_f32_index( @@ -123,22 +109,27 @@ func.func @sparse_push_back_inbound(%arg0: memref, %arg1: memref // CHECK-SAME: %[[Y0:.*]]: memref, // CHECK-SAME: %[[Y1:.*]]: memref) -> index { // CHECK: %[[C1:.*]] = arith.constant 1 -// CHECK: %[[I:.*]] = arith.subi %[[L]], %[[C1]] -// CHECK: %[[Hm1:.*]] = arith.subi %[[H]], %[[C1]] -// CHECK: %[[I3:.*]] = scf.for %[[J:.*]] = %[[L]] to %[[Hm1]] step %[[C1]] iter_args(%[[I2:.*]] = %[[I]]) -> (index) { -// CHECK: %[[COND:.*]] = func.call @_sparse_less_than_1_i8(%[[J]], %[[Hm1]], %[[X0]]) -// CHECK: %[[IF:.*]] = scf.if %[[COND]] -> (index) { -// CHECK: %[[Ip1:.*]] = arith.addi %[[I2]], %[[C1]] -// CHECK: func.call @_sparse_may_swap_1_i8_f32_index(%[[Ip1]], %[[J]], %[[X0]], %[[Y0]], %[[Y1]]) -// CHECK: scf.yield %[[Ip1]] +// CHECK: %[[VAL_6:.*]] = arith.constant - +// CHECK: %[[SUM:.*]] = arith.addi %[[L]], %[[H]] +// CHECK: %[[P:.*]] = arith.shrui %[[SUM]], %[[C1]] +// CHECK: %[[J:.*]] = arith.subi %[[H]], %[[C1]] +// CHECK: %[[W:.*]]:3 = scf.while (%[[Ib:.*]] = %[[L]], %[[Jb:.*]] = %[[J]], %[[pb:.*]] = %[[P]]) : (index, index, index) -> (index, index, index) { +// CHECK: %[[Cn:.*]] = arith.cmpi ult, %[[Ib]], %[[Jb]] +// CHECK: scf.condition(%[[Cn]]) %[[Ib]], %[[Jb]], %[[pb]] +// CHECK: } do { +// CHECK: ^bb0(%[[Ia:.*]]: index, %[[Ja:.*]]: index, %[[Pa:.*]]: index): +// CHECK: %[[I2:.*]] = scf.while +// CHECK: %[[Ieq:.*]] = func.call @_sparse_compare_eq_1_i8(%[[I2:.*]], %[[Pa]], %[[X0]]) +// CHECK: %[[J2:.*]] = scf.while +// CHECK: %[[Jeq:.*]] = func.call @_sparse_compare_eq_1_i8(%[[J2:.*]], %[[Pa]], %[[X0]]) +// CHECK: %[[Cn2:.*]] = arith.cmpi ult, %[[I2]], %[[J2]] +// CHECK: %[[If:.*]]:3 = scf.if %[[Cn2]] -> (index, index, index) { // CHECK: } else { -// CHECK: scf.yield %[[I2]] +// CHECK: scf.yield %[[I2]], %[[J2]], %[[Pa]] // CHECK: } -// CHECK: scf.yield %[[IF:.*]] +// CHECK: scf.yield %[[If:.*]]#0, %[[If]]#1, %[[If]]#2 // CHECK: } -// CHECK: %[[I3p1:.*]] = arith.addi %[[I3:.*]], %[[C1]] : index -// CHECK: call @_sparse_may_swap_1_i8_f32_index(%[[I3p1]], %[[Hm1]], %[[X0]], %[[Y0]], %[[Y1]]) -// CHECK: return %[[I3p1]] +// CHECK: return %[[W:.*]]#2 // CHECK: } // CHECK-LABEL: func.func private @_sparse_sort_nonstable_1_i8_f32_index( @@ -181,7 +172,7 @@ func.func @sparse_sort_1d2v(%arg0: index, %arg1: memref<10xi8>, %arg2: memref, %arg3: memref, %arg4: memref) -> i1 { -// CHECK-DAG: func.func private @_sparse_may_swap_3_index(%arg0: index, %arg1: index, %arg2: memref, %arg3: memref, %arg4: memref) { +// CHECK-DAG: func.func private @_sparse_compare_eq_3_index(%arg0: index, %arg1: index, %arg2: memref, %arg3: memref, %arg4: memref) -> i1 { // CHECK-DAG: func.func private @_sparse_partition_3_index(%arg0: index, %arg1: index, %arg2: memref, %arg3: memref, %arg4: memref) -> index { // CHECK-DAG: func.func private @_sparse_sort_nonstable_3_index(%arg0: index, %arg1: index, %arg2: memref, %arg3: memref, %arg4: memref) { // CHECK-LABEL: func.func @sparse_sort_3d diff --git a/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir b/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir index d67e11b92dd9c..cb1f16ef2cd20 100644 --- a/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir +++ b/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir @@ -116,6 +116,7 @@ func.func @sparse_convert_complex(%arg0: tensor<100xcomplex>) -> tensor<100 // CHECK-RWT: %[[V:.*]] = tensor.extract %[[A]]{{\[}}%[[FI]], %[[FJ]]] : tensor<2x4xf64> // CHECK-RWT: %[[NZ:.*]] = arith.cmpf une, %[[V]], %[[F0]] : f64 // CHECK-RWT: scf.if %[[NZ]] { +// // FIXME: the SSA chain is broken here! // CHECK-RWT: %{{.*}} = sparse_tensor.insert %[[V]] into %[[COO]]{{\[}}%[[FI]], %[[FJ]]] // CHECK-RWT: } // CHECK-RWT: } @@ -126,11 +127,13 @@ func.func @sparse_convert_complex(%arg0: tensor<100xcomplex>) -> tensor<100 // CHECK-RWT: %[[V2:.*]] = sparse_tensor.values %[[COO]] // CHECK-RWT: sparse_tensor.sort %[[NNZ]], %[[I0]], %[[I1]] jointly %[[V2]] // CHECK-RWT: %[[DST:.*]] = bufferization.alloc_tensor() -// CHECK-RWT: sparse_tensor.foreach in %[[COO]] -// CHECK-RWT: ^bb0(%[[FI0:.*]]: index, %[[FI1:.*]]: index, %[[FV:.*]]: f64): -// CHECK-RWT: sparse_tensor.insert %[[FV]] into %[[DST]]{{\[}}%[[FI0]], %[[FI1]]] +// CHECK-RWT: %[[NEW_T:.*]] = sparse_tensor.foreach in %[[COO]] init(%[[DST]]) +// CHECK-RWT: ^bb0(%[[FI0:.*]]: index, %[[FI1:.*]]: index, %[[FV:.*]]: f64, %[[R0:.*]]: tensor +// CHECK-RWT: %[[RET:.*]] = sparse_tensor.insert %[[FV]] into %[[R0]]{{\[}}%[[FI0]], %[[FI1]]] +// CHECK-RWT: sparse_tensor.yield %[[RET]] // CHECK-RWT: } -// CHECK-RWT: %[[R:.*]] = sparse_tensor.convert %[[DST]] +// CHECK-RWT: %[[NT:.*]] = sparse_tensor.load %[[NEW_T]] hasInserts +// CHECK-RWT: %[[R:.*]] = sparse_tensor.convert %[[NT]] // CHECK-RWT: bufferization.dealloc_tensor %[[COO]] // CHECK-RWT: return %[[R]] : tensor<2x4xf64, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>> func.func @sparse_convert_2d(%arg0: tensor<2x4xf64>) -> tensor<2x4xf64, #CSR> { @@ -179,6 +182,7 @@ func.func @sparse_convert_2d(%arg0: tensor<2x4xf64>) -> tensor<2x4xf64, #CSR> { // CHECK-RWT: %[[I1r:.*]] = tensor.extract %[[SI]]{{\[}}%[[FI]], %[[C1]]] : tensor<2x2xi64> // CHECK-RWT: %[[I1:.*]] = arith.index_cast %[[I1r]] : i64 to index // CHECK-RWT: %[[V:.*]] = tensor.extract %[[SV]]{{\[}}%[[FI]]] : tensor<2xf32> +// // FIXME: the SSA chain is broken here! // CHECK-RWT: sparse_tensor.insert %[[V]] into %[[COO]]{{\[}}%[[I0]], %[[I1]]] // CHECK-RWT: } // CHECK-RWT: %[[TI0:.*]] = sparse_tensor.indices %[[COO]] {dimension = 0 : index} @@ -187,11 +191,13 @@ func.func @sparse_convert_2d(%arg0: tensor<2x4xf64>) -> tensor<2x4xf64, #CSR> { // CHECK-RWT: %[[TV:.*]] = sparse_tensor.values %[[COO]] // CHECK-RWT: sparse_tensor.sort %[[NNZ]], %[[TI0]], %[[TI1]] jointly %[[TV]] // CHECK-RWT: %[[DST:.*]] = bufferization.alloc_tensor() -// CHECK-RWT: sparse_tensor.foreach in %[[COO]] -// CHECK-RWT: ^bb0(%[[F2I0:.*]]: index, %[[F2I1:.*]]: index, %[[F2V:.*]]: f32): -// CHECK-RWT: sparse_tensor.insert %[[F2V]] into %[[DST]]{{\[}}%[[F2I0]], %[[F2I1]]] +// CHECK-RWT: %[[RET:.*]] = sparse_tensor.foreach in %[[COO]] init(%[[DST]]) +// CHECK-RWT: ^bb0(%[[F2I0:.*]]: index, %[[F2I1:.*]]: index, %[[F2V:.*]]: f32, %[[R0:.*]]: tensor +// CHECK-RWT: %[[NEW_T:.*]] = sparse_tensor.insert %[[F2V]] into %[[R0]]{{\[}}%[[F2I0]], %[[F2I1]]] +// CHECK-RWT: sparse_tensor.yield %[[NEW_T]] // CHECK-RWT: } -// CHECK-RWT: %[[R:.*]] = sparse_tensor.convert %[[DST]] +// CHECK-RWT: %[[T:.*]] = sparse_tensor.load %[[RET]] hasInserts +// CHECK-RWT: %[[R:.*]] = sparse_tensor.convert %[[T]] // CHECK-RWT: bufferization.dealloc_tensor %[[COO]] // CHECK-RWT: return %[[R]] : tensor<8x7xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>> func.func @sparse_constant() -> tensor<8x7xf32, #CSR>{ diff --git a/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir b/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir index 92f9e46b90938..17145f8d37380 100644 --- a/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir +++ b/mlir/test/Dialect/SparseTensor/convert_sparse2sparse.mlir @@ -94,11 +94,13 @@ func.func @sparse_convert_1d_ss(%arg0: tensor) -> tensor // CHECK-RWT: %[[V:.*]] = sparse_tensor.values %[[A]] // CHECK-RWT: sparse_tensor.sort %[[NNZ]], %[[I0]] jointly %[[V]] // CHECK-RWT: %[[DST:.*]] = bufferization.alloc_tensor(%[[D]]) -// CHECK-RWT: sparse_tensor.foreach in %[[A]] -// CHECK-RWT: ^bb0(%[[FI2:.*]]: index, %[[FV2:.*]]: f32): -// CHECK-RWT: sparse_tensor.insert %[[FV2]] into %[[DST]]{{\[}}%[[FI2]]] +// CHECK-RWT: %[[RET:.*]] = sparse_tensor.foreach in %[[A]] init(%[[DST]]) +// CHECK-RWT: ^bb0(%[[FI2:.*]]: index, %[[FV2:.*]]: f32, %[[T:.*]]: tensor> func.func @sparse_convert(%arg0: tensor) -> tensor { %0 = sparse_tensor.convert %arg0 : tensor to tensor diff --git a/mlir/test/Dialect/SparseTensor/invalid.mlir b/mlir/test/Dialect/SparseTensor/invalid.mlir index 1ab4a66665287..02fb97bc866c6 100644 --- a/mlir/test/Dialect/SparseTensor/invalid.mlir +++ b/mlir/test/Dialect/SparseTensor/invalid.mlir @@ -551,6 +551,51 @@ func.func @sparse_tensor_foreach(%arg0: tensor<2x4xf64, #DCSR>) -> () { // ----- +#DCSR = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}> +func.func @sparse_tensor_foreach(%arg0: tensor<2x4xf64, #DCSR>) -> () { + // expected-error@+1 {{Unmatched element type between input tensor and block argument}} + sparse_tensor.foreach in %arg0 : tensor<2x4xf64, #DCSR> do { + ^bb0(%1: index, %2: index, %v: f32) : + } + return +} + +// ----- + +#DCSR = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}> +func.func @sparse_tensor_foreach(%arg0: tensor<2x4xf64, #DCSR>, %arg1: f32) -> () { + // expected-error@+1 {{Mismatch in number of init arguments and results}} + sparse_tensor.foreach in %arg0 init(%arg1) : tensor<2x4xf64, #DCSR>, f32 do { + ^bb0(%1: index, %2: index, %v: f32, %r1 : i32) : + } + return +} + +// ----- + +#DCSR = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}> +func.func @sparse_tensor_foreach(%arg0: tensor<2x4xf64, #DCSR>, %arg1: f32) -> () { + // expected-error@+1 {{Mismatch in types of init arguments and results}} + %1 = sparse_tensor.foreach in %arg0 init(%arg1) : tensor<2x4xf64, #DCSR>, f32 -> i32 do { + ^bb0(%1: index, %2: index, %v: f32, %r0 : f32) : + } + return +} + +// ----- + +#DCSR = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}> +func.func @sparse_tensor_foreach(%arg0: tensor<2x4xf64, #DCSR>, %arg1: f32) -> () { + // expected-error@+1 {{Mismatch in types of yield values and results}} + %1 = sparse_tensor.foreach in %arg0 init(%arg1) : tensor<2x4xf64, #DCSR>, f32 -> f32 do { + ^bb0(%1: index, %2: index, %v: f32, %r0 : f32) : + sparse_tensor.yield %1 : index + } + return +} + +// ----- + // TODO: a test case with empty xs doesn't work due to some parser issues. func.func @sparse_sort_x_type( %arg0: index, %arg1: memref) { @@ -577,6 +622,32 @@ func.func @sparse_sort_mismatch_x_type(%arg0: index, %arg1: memref<10xindex>, %a // ----- +func.func @sparse_sort_coo_x_type( %arg0: index, %arg1: memref) { + // expected-error@+1 {{operand #1 must be 1D memref of integer or index values}} + sparse_tensor.sort_coo %arg0, %arg1: memref + return +} + +// ----- + +func.func @sparse_sort_coo_x_too_small(%arg0: memref<50xindex>) { + %i20 = arith.constant 20 : index + // expected-error@+1 {{Expected dimension(xy) >= n * (nx + ny) got 50 < 60}} + sparse_tensor.sort_coo %i20, %arg0 {nx = 2 : index, ny = 1 : index} : memref<50xindex> + return +} + +// ----- + +func.func @sparse_sort_coo_y_too_small(%arg0: memref<60xindex>, %arg1: memref<10xf32>) { + %i20 = arith.constant 20 : index + // expected-error@+1 {{Expected dimension(y) >= n got 10 < 20}} + sparse_tensor.sort_coo %i20, %arg0 jointly %arg1 {nx = 2 : index, ny = 1 : index} : memref<60xindex> jointly memref<10xf32> + return +} + +// ----- + #CSR = #sparse_tensor.encoding<{dimLevelType = ["dense", "compressed"]}> func.func @sparse_alloc_escapes(%arg0: index) -> tensor<10x?xf64, #CSR> { diff --git a/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir b/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir index 79b616dec8304..3a6cf999df90a 100644 --- a/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir @@ -18,18 +18,19 @@ // CHECK: %[[T:.*]] = bufferization.alloc_tensor(%[[D0]], %[[D1]]) // CHECK: %[[N:.*]] = call @getSparseTensorReaderNNZ(%[[R]]) // CHECK: %[[VB:.*]] = memref.alloca() -// CHECK: scf.for %{{.*}} = %[[C0]] to %[[N]] step %[[C1]] { +// CHECK: %[[T2:.*]] = scf.for %{{.*}} = %[[C0]] to %[[N]] step %[[C1]] iter_args(%[[A2:.*]] = %[[T]]) // CHECK: func.call @getSparseTensorReaderNextF32(%[[R]], %[[DS]], %[[VB]]) // CHECK: %[[E0:.*]] = memref.load %[[DS]]{{\[}}%[[C0]]] // CHECK: %[[E1:.*]] = memref.load %[[DS]]{{\[}}%[[C1]]] // CHECK: %[[V:.*]] = memref.load %[[VB]][] -// CHECK: sparse_tensor.insert %[[V]] into %[[T]]{{\[}}%[[E0]], %[[E1]]] +// CHECK: %[[T1:.*]] = sparse_tensor.insert %[[V]] into %[[A2]]{{\[}}%[[E0]], %[[E1]]] +// CHECK: scf.yield %[[T1]] // CHECK: } // CHECK: call @delSparseTensorReader(%[[R]]) -// CHECK: %[[R:.*]] = sparse_tensor.convert %[[T]] -// CHECK: bufferization.dealloc_tensor %[[T]] +// CHECK: %[[T3:.*]] = sparse_tensor.load %[[T2]] hasInserts +// CHECK: %[[R:.*]] = sparse_tensor.convert %[[T3]] +// CHECK: bufferization.dealloc_tensor %[[T3]] // CHECK: return %[[R]] -// CHECK: } func.func @sparse_new(%arg0: !llvm.ptr) -> tensor { %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor return %0 : tensor diff --git a/mlir/test/Dialect/SparseTensor/roundtrip.mlir b/mlir/test/Dialect/SparseTensor/roundtrip.mlir index e19a5ee833f83..bc664ae3d2d00 100644 --- a/mlir/test/Dialect/SparseTensor/roundtrip.mlir +++ b/mlir/test/Dialect/SparseTensor/roundtrip.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -split-input-file | mlir-opt | FileCheck %s +// RUN: mlir-opt %s -split-input-file | mlir-opt -split-input-file | FileCheck %s #SparseVector = #sparse_tensor.encoding<{dimLevelType = ["compressed"]}> @@ -411,6 +411,26 @@ func.func @sparse_tensor_foreach(%arg0: tensor<2x4xf64, #DCSR>) -> () { return } +// ----- + +#DCSR = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}> + +// CHECK-LABEL: func @sparse_tensor_foreach( +// CHECK-SAME: %[[A0:.*]]: tensor<2x4xf64, #sparse_tensor.encoding<{{{.*}}}>>, +// CHECK-SAME: %[[A1:.*]]: f32 +// CHECK-NEXT: %[[RET:.*]] = sparse_tensor.foreach in %[[A0]] init(%[[A1]]) +// CHECK-NEXT: ^bb0(%[[TMP_1:.*]]: index, %[[TMP_2:.*]]: index, %[[TMP_v:.*]]: f64, %[[TMP_r:.*]]: f32) +// CHECK: sparse_tensor.yield %[[TMP_r]] : f32 +// CHECK: } +func.func @sparse_tensor_foreach(%arg0: tensor<2x4xf64, #DCSR>, %arg1: f32) -> () { + %ret = sparse_tensor.foreach in %arg0 init(%arg1): tensor<2x4xf64, #DCSR>, f32 -> f32 + do { + ^bb0(%1: index, %2: index, %v: f64, %r: f32) : + sparse_tensor.yield %r : f32 + } + return +} + // ---- // CHECK-LABEL: func @sparse_sort_1d0v( @@ -464,3 +484,18 @@ func.func @sparse_sort_stable(%arg0: index, %arg1: memref<10xi8>, %arg2: memref< sparse_tensor.sort stable %arg0, %arg1, %arg2 jointly %arg3 : memref<10xi8>, memref<20xi8> jointly memref<10xf64> return %arg1, %arg2, %arg3 : memref<10xi8>, memref<20xi8>, memref<10xf64> } + +// ----- + +func.func @sparse_sort_coo(%arg0: index, %arg1: memref) -> (memref) { + sparse_tensor.sort_coo %arg0, %arg1 { nx=2 : index, ny=1 : index}: memref + return %arg1 : memref +} + +// ----- + +func.func @sparse_sort_coo_stable(%arg0: index, %arg1: memref, %arg2: memref) -> (memref, memref) { + sparse_tensor.sort_coo stable %arg0, %arg1 jointly %arg2 { nx=2 : index, ny=1 : index}: memref jointly memref + return %arg1, %arg2 : memref, memref +} + diff --git a/mlir/test/Dialect/SparseTensor/scf_1_N_conversion.mlir b/mlir/test/Dialect/SparseTensor/scf_1_N_conversion.mlir index 334d58c623936..207e46b3d45ae 100644 --- a/mlir/test/Dialect/SparseTensor/scf_1_N_conversion.mlir +++ b/mlir/test/Dialect/SparseTensor/scf_1_N_conversion.mlir @@ -30,3 +30,68 @@ func.func @for(%in: tensor<1024xf32, #SparseVector>, return %1 : tensor<1024xf32, #SparseVector> } + +// CHECK-LABEL: func @if( +// CHECK-SAME: %[[DIM_SIZE:.*0]]: memref<1xindex>, +// CHECK-SAME: %[[DIM_CURSOR:.*1]]: memref<1xindex>, +// CHECK-SAME: %[[MEM_SIZE:.*2]]: memref<3xindex>, +// CHECK-SAME: %[[POINTER:.*3]]: memref, +// CHECK-SAME: %[[INDICES:.*4]]: memref, +// CHECK-SAME: %[[VALUE:.*5]]: memref, +// CHECK-SAME: %[[DIM_SIZE_1:.*6]]: memref<1xindex>, +// CHECK-SAME: %[[DIM_CURSOR_1:.*7]]: memref<1xindex>, +// CHECK-SAME: %[[MEM_SIZE_1:.*8]]: memref<3xindex>, +// CHECK-SAME: %[[POINTER_1:.*9]]: memref, +// CHECK-SAME: %[[INDICES_1:.*10]]: memref, +// CHECK-SAME: %[[VALUE_1:.*11]]: memref, +// CHECK-SAME: %[[TMP_arg12:.*12]]: i1) -> +// CHECK-SAME: (memref<1xindex>, memref<1xindex>, memref<3xindex>, memref, memref, memref) { +// CHECK: %[[SV:.*]]:6 = scf.if %[[TMP_arg12]] -> (memref<1xindex>, memref<1xindex>, memref<3xindex>, memref, memref, memref) { +// CHECK: scf.yield %[[DIM_SIZE]], %[[DIM_CURSOR]], %[[MEM_SIZE]], %[[POINTER]], %[[INDICES]], %[[VALUE]] : memref<1xindex>, memref<1xindex>, memref<3xindex>, memref, memref, memref +// CHECK: } else { +// CHECK: scf.yield %[[DIM_SIZE_1]], %[[DIM_CURSOR_1]], %[[MEM_SIZE_1]], %[[POINTER_1]], %[[INDICES_1]], %[[VALUE_1]] : memref<1xindex>, memref<1xindex>, memref<3xindex>, memref, memref, memref +// CHECK: } +// CHECK: return %[[SV]]#0, %[[SV]]#1, %[[SV]]#2, %[[SV]]#3, %[[SV]]#4, %[[SV]]#5 : memref<1xindex>, memref<1xindex>, memref<3xindex>, memref, memref, memref +func.func @if(%t: tensor<1024xf32, #SparseVector>, + %f: tensor<1024xf32, #SparseVector>, + %c: i1) -> tensor<1024xf32, #SparseVector> { + %1 = scf.if %c -> tensor<1024xf32, #SparseVector> { + scf.yield %t : tensor<1024xf32, #SparseVector> + } else { + scf.yield %f : tensor<1024xf32, #SparseVector> + } + + return %1 : tensor<1024xf32, #SparseVector> +} + +// CHECK-LABEL: func @while( +// CHECK-SAME: %[[DIM_SIZE:.*0]]: memref<1xindex>, +// CHECK-SAME: %[[DIM_CURSOR:.*1]]: memref<1xindex>, +// CHECK-SAME: %[[MEM_SIZE:.*2]]: memref<3xindex>, +// CHECK-SAME: %[[POINTER:.*3]]: memref, +// CHECK-SAME: %[[INDICES:.*4]]: memref, +// CHECK-SAME: %[[VALUE:.*5]]: memref, +// CHECK-SAME: %[[TMP_arg6:.*6]]: i1) -> +// CHECK-SAME: (memref<1xindex>, memref<1xindex>, memref<3xindex>, memref, memref, memref) { +// CHECK: %[[SV:.*]]:6 = scf.while ( +// CHECK-SAME: %[[TMP_arg7:.*]] = %[[DIM_SIZE]], +// CHECK-SAME: %[[TMP_arg8:.*]] = %[[DIM_CURSOR]], +// CHECK-SAME: %[[TMP_arg9:.*]] = %[[MEM_SIZE]], +// CHECK-SAME: %[[TMP_arg10:.*]] = %[[POINTER]], +// CHECK-SAME: %[[TMP_arg11:.*]] = %[[INDICES]], +// CHECK-SAME: %[[TMP_arg12:.*]] = %[[VALUE]]) +// CHECK: scf.condition(%[[TMP_arg6]]) %[[TMP_arg7]], %[[TMP_arg8]], %[[TMP_arg9]], %[[TMP_arg10]], %[[TMP_arg11]], %[[TMP_arg12]] : memref<1xindex>, memref<1xindex>, memref<3xindex>, memref, memref, memref +// CHECK: } do { +// CHECK: ^bb0(%[[TMP_arg7]]: memref<1xindex>, %[[TMP_arg8]]: memref<1xindex>, %[[TMP_arg9]]: memref<3xindex>, %[[TMP_arg10]]: memref, %[[TMP_arg11]]: memref, %[[TMP_arg12]]: memref): +// CHECK: scf.yield %[[TMP_arg7]], %[[TMP_arg8]], %[[TMP_arg9]], %[[TMP_arg10]], %[[TMP_arg11]], %[[TMP_arg12]] : memref<1xindex>, memref<1xindex>, memref<3xindex>, memref, memref, memref +// CHECK: } +// CHECK: return %[[SV]]#0, %[[SV]]#1, %[[SV]]#2, %[[SV]]#3, %[[SV]]#4, %[[SV]]#5 : memref<1xindex>, memref<1xindex>, memref<3xindex>, memref, memref, memref +func.func @while(%arg0: tensor<1024xf32, #SparseVector>, %c: i1) -> tensor<1024xf32, #SparseVector> { + %0 = scf.while (%arg4 = %arg0) : (tensor<1024xf32, #SparseVector>) -> tensor<1024xf32, #SparseVector> { + scf.condition(%c) %arg4 : tensor<1024xf32, #SparseVector> + } do { + ^bb0(%arg7: tensor<1024xf32, #SparseVector>): + scf.yield %arg7 : tensor<1024xf32, #SparseVector> + } + return %0: tensor<1024xf32, #SparseVector> +} diff --git a/mlir/test/Dialect/SparseTensor/sparse_concat_codegen.mlir b/mlir/test/Dialect/SparseTensor/sparse_concat_codegen.mlir index 7280c6f5e7ba3..717819bd0cb16 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_concat_codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_concat_codegen.mlir @@ -19,16 +19,18 @@ // CHECK: %[[TMP_5:.*]] = sparse_tensor.values %[[TMP_arg0]] : tensor<2x4xf64, #sparse_tensor // CHECK: %[[TMP_6:.*]] = memref.load %[[TMP_1]][%[[TMP_c0]]] : memref // CHECK: %[[TMP_7:.*]] = memref.load %[[TMP_1]][%[[TMP_c1]]] : memref -// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_6]] to %[[TMP_7]] step %[[TMP_c1]] { +// CHECK: %[[RET_1:.*]] = scf.for %[[TMP_arg3:.*]] = %[[TMP_6]] to %[[TMP_7]] step %[[TMP_c1]] iter_args(%[[A0:.*]] = %[[TMP_0]]) // CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_2]][%[[TMP_arg3]]] : memref // CHECK-DAG: %[[TMP_25:.*]] = memref.load %[[TMP_3]][%[[TMP_arg3]]] : memref // CHECK-DAG: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index // CHECK: %[[TMP_26:.*]] = memref.load %[[TMP_3]][%[[TMP_24]]] : memref -// CHECK: scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] { +// CHECK: %[[RET_4:.*]] = scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] iter_args(%[[A1:.*]] = %[[A0]]) // CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_4]][%[[TMP_arg4]]] : memref // CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_5]][%[[TMP_arg4]]] : memref -// CHECK: sparse_tensor.insert %[[TMP_28]] into %[[TMP_0]][%[[TMP_23]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor +// CHECK: %[[NEW_1:.*]] = sparse_tensor.insert %[[TMP_28]] into %[[A1]][%[[TMP_23]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor +// CHECK: scf.yield %[[NEW_1]] // CHECK: } +// CHECK: scf.yield %[[RET_4]] // CHECK: } // CHECK: %[[TMP_8:.*]] = sparse_tensor.pointers %[[TMP_arg1]] {dimension = 0 : index} : tensor<3x4xf64, #sparse_tensor // CHECK: %[[TMP_9:.*]] = sparse_tensor.indices %[[TMP_arg1]] {dimension = 0 : index} : tensor<3x4xf64, #sparse_tensor @@ -37,17 +39,19 @@ // CHECK: %[[TMP_12:.*]] = sparse_tensor.values %[[TMP_arg1]] : tensor<3x4xf64, #sparse_tensor // CHECK: %[[TMP_13:.*]] = memref.load %[[TMP_8]][%[[TMP_c0]]] : memref // CHECK: %[[TMP_14:.*]] = memref.load %[[TMP_8]][%[[TMP_c1]]] : memref -// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_13]] to %[[TMP_14]] step %[[TMP_c1]] { +// CHECK: %[[RET_2:.*]] = scf.for %[[TMP_arg3:.*]] = %[[TMP_13]] to %[[TMP_14]] step %[[TMP_c1]] iter_args(%[[A2:.*]] = %[[RET_1]]) // CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_9]][%[[TMP_arg3]]] : memref // CHECK-DAG: %[[TMP_25:.*]] = memref.load %[[TMP_10]][%[[TMP_arg3]]] : memref // CHECK-DAG: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index // CHECK: %[[TMP_26:.*]] = memref.load %[[TMP_10]][%[[TMP_24]]] : memref -// CHECK: scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] { +// CHECK: %[[RET_5:.*]] = scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] iter_args(%[[A3:.*]] = %[[A2]]) // CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_11]][%[[TMP_arg4]]] : memref // CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_12]][%[[TMP_arg4]]] : memref // CHECK: %[[TMP_29:.*]] = arith.addi %[[TMP_23]], %[[TMP_c2]] : index -// CHECK: sparse_tensor.insert %[[TMP_28]] into %[[TMP_0]][%[[TMP_29]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor +// CHECK: %[[NEW_2:.*]] = sparse_tensor.insert %[[TMP_28]] into %[[A3]][%[[TMP_29]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor +// CHECK: scf.yield %[[NEW_2]] // CHECK: } +// CHECK: scf.yield %[[RET_5]] // CHECK: } // CHECK: %[[TMP_15:.*]] = sparse_tensor.pointers %[[TMP_arg2]] {dimension = 0 : index} : tensor<4x4xf64, #sparse_tensor // CHECK: %[[TMP_16:.*]] = sparse_tensor.indices %[[TMP_arg2]] {dimension = 0 : index} : tensor<4x4xf64, #sparse_tensor @@ -56,19 +60,22 @@ // CHECK: %[[TMP_19:.*]] = sparse_tensor.values %[[TMP_arg2]] : tensor<4x4xf64, #sparse_tensor // CHECK: %[[TMP_20:.*]] = memref.load %[[TMP_15]][%[[TMP_c0]]] : memref // CHECK: %[[TMP_21:.*]] = memref.load %[[TMP_15]][%[[TMP_c1]]] : memref -// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_20]] to %[[TMP_21]] step %[[TMP_c1]] { +// CHECK: %[[RET_3:.*]] = scf.for %[[TMP_arg3:.*]] = %[[TMP_20]] to %[[TMP_21]] step %[[TMP_c1]] iter_args(%[[A4:.*]] = %[[RET_2]]) // CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_16]][%[[TMP_arg3]]] : memref // CHECK: %[[TMP_25:.*]] = memref.load %[[TMP_17]][%[[TMP_arg3]]] : memref // CHECK: %[[TMP_24:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index // CHECK: %[[TMP_26:.*]] = memref.load %[[TMP_17]][%[[TMP_24]]] : memref -// CHECK: scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] { +// CHECK: %[[RET_6:.*]] = scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] iter_args(%[[A5:.*]] = %[[A4]]) // CHECK: %[[TMP_27:.*]] = memref.load %[[TMP_18]][%[[TMP_arg4]]] : memref // CHECK: %[[TMP_28:.*]] = memref.load %[[TMP_19]][%[[TMP_arg4]]] : memref // CHECK: %[[TMP_29:.*]] = arith.addi %[[TMP_23]], %[[TMP_c5]] : index -// CHECK: sparse_tensor.insert %[[TMP_28]] into %[[TMP_0]][%[[TMP_29]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor +// CHECK: %[[NEW_3:.*]] = sparse_tensor.insert %[[TMP_28]] into %[[A5]][%[[TMP_29]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor +// CHECK: scf.yield %[[NEW_3]] // CHECK: } +// CHECK: scf.yield %[[RET_6]] // CHECK: } -// CHECK: %[[TMP_22:.*]] = sparse_tensor.convert %[[TMP_0]] : tensor<9x4xf64, #sparse_tensor +// CHECK: %[[TMP_23:.*]] = sparse_tensor.load %[[RET_3]] hasInserts +// CHECK: %[[TMP_22:.*]] = sparse_tensor.convert %[[TMP_23]] : tensor<9x4xf64, #sparse_tensor // CHECK: return %[[TMP_22]] : tensor<9x4xf64, #sparse_tensor func.func @concat_sparse_sparse(%arg0: tensor<2x4xf64, #DCSR>, %arg1: tensor<3x4xf64, #DCSR>, diff --git a/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir b/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir index 38766b08ccab8..f38865c5e2a4f 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir @@ -1,14 +1,13 @@ // RUN: mlir-opt %s -sparsification="parallelization-strategy=none" | \ // RUN: FileCheck %s --check-prefix=CHECK-PAR0 -// FIXME: we do not support vectorization/parallel loops in loop emitter right now -// R_U_N: mlir-opt %s -sparsification="parallelization-strategy=dense-outer-loop" | \ -// R_U_N: FileCheck %s --check-prefix=CHECK-PAR1 -// R_U_N: mlir-opt %s -sparsification="parallelization-strategy=any-storage-outer-loop" | \ -// R_U_N: FileCheck %s --check-prefix=CHECK-PAR2 -// R_U_N: mlir-opt %s -sparsification="parallelization-strategy=dense-any-loop" | \ -// R_U_N: FileCheck %s --check-prefix=CHECK-PAR3 -// R_U_N: mlir-opt %s -sparsification="parallelization-strategy=any-storage-any-loop" | \ -// R_U_N: FileCheck %s --check-prefix=CHECK-PAR4 +// RUN: mlir-opt %s -sparsification="parallelization-strategy=dense-outer-loop" | \ +// RUN: FileCheck %s --check-prefix=CHECK-PAR1 +// RUN: mlir-opt %s -sparsification="parallelization-strategy=any-storage-outer-loop" | \ +// RUN: FileCheck %s --check-prefix=CHECK-PAR2 +// RUN: mlir-opt %s -sparsification="parallelization-strategy=dense-any-loop" | \ +// RUN: FileCheck %s --check-prefix=CHECK-PAR3 +// RUN: mlir-opt %s -sparsification="parallelization-strategy=any-storage-any-loop" | \ +// RUN: FileCheck %s --check-prefix=CHECK-PAR4 #DenseMatrix = #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense" ] @@ -151,7 +150,8 @@ func.func @scale_ss(%scale: f32, // // CHECK-PAR4-LABEL: func @matvec // CHECK-PAR4: scf.parallel -// CHECK-PAR4: scf.for +// CHECK-PAR4: scf.parallel +// CHECK-PAR4: scf.reduce // CHECK-PAR4: return // func.func @matvec(%arga: tensor<16x32xf32, #CSR>, diff --git a/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir b/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir new file mode 100644 index 0000000000000..8ba66d2c92ae1 --- /dev/null +++ b/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir @@ -0,0 +1,63 @@ +// RUN: mlir-opt %s -sparsification="parallelization-strategy=any-storage-any-loop" | \ +// RUN: FileCheck %s + +#CSR = #sparse_tensor.encoding<{ + dimLevelType = [ "dense", "compressed" ] +}> + +#trait_matvec = { + indexing_maps = [ + affine_map<(i,j) -> (i,j)>, // A + affine_map<(i,j) -> (j)>, // b + affine_map<(i,j) -> (i)> // x (out) + ], + iterator_types = ["parallel", "reduction"], + doc = "x(i) += A(i,j) * b(j)" +} +// CHECK-LABEL: func.func @matvec( +// CHECK-SAME: %[[TMP_arg0:.*]]: tensor<16x32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>>, +// CHECK-SAME: %[[TMP_arg1:.*]]: tensor<32xf32>, +// CHECK-SAME: %[[TMP_arg2:.*]]: tensor<16xf32>) -> tensor<16xf32> { +// CHECK-DAG: %[[TMP_c16:.*]] = arith.constant 16 : index +// CHECK-DAG: %[[TMP_c0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[TMP_c1:.*]] = arith.constant 1 : index +// CHECK: %[[TMP_0:.*]] = sparse_tensor.pointers %[[TMP_arg0]] {dimension = 1 : index} +// CHECK: %[[TMP_1:.*]] = sparse_tensor.indices %[[TMP_arg0]] {dimension = 1 : index} +// CHECK: %[[TMP_2:.*]] = sparse_tensor.values %[[TMP_arg0]] +// CHECK: %[[TMP_3:.*]] = bufferization.to_memref %[[TMP_arg1]] : memref<32xf32> +// CHECK: %[[TMP_4:.*]] = bufferization.to_memref %[[TMP_arg2]] : memref<16xf32> +// CHECK: scf.parallel (%[[TMP_arg3:.*]]) = (%[[TMP_c0]]) to (%[[TMP_c16]]) step (%[[TMP_c1]]) { +// CHECK: %[[TMP_6:.*]] = memref.load %[[TMP_4]][%[[TMP_arg3]]] : memref<16xf32> +// CHECK: %[[TMP_7:.*]] = memref.load %[[TMP_0]][%[[TMP_arg3]]] : memref +// CHECK: %[[TMP_8:.*]] = arith.addi %[[TMP_arg3]], %[[TMP_c1]] : index +// CHECK: %[[TMP_9:.*]] = memref.load %[[TMP_0]][%[[TMP_8]]] : memref +// CHECK: %[[TMP_10:.*]] = scf.parallel (%[[TMP_arg4:.*]]) = (%[[TMP_7]]) to (%[[TMP_9]]) step (%[[TMP_c1]]) init (%[[TMP_6]]) -> f32 { +// CHECK: %[[TMP_11:.*]] = memref.load %[[TMP_1]][%[[TMP_arg4]]] : memref +// CHECK: %[[TMP_12:.*]] = memref.load %[[TMP_2]][%[[TMP_arg4]]] : memref +// CHECK: %[[TMP_13:.*]] = memref.load %[[TMP_3]][%[[TMP_11]]] : memref<32xf32> +// CHECK: %[[TMP_14:.*]] = arith.mulf %[[TMP_12]], %[[TMP_13]] : f32 +// CHECK: scf.reduce(%[[TMP_14]]) : f32 { +// CHECK: ^bb0(%[[TMP_arg5:.*]]: f32, %[[TMP_arg6:.*]]: f32): +// CHECK: %[[TMP_15:.*]] = arith.addf %[[TMP_arg5]], %[[TMP_arg6]] : f32 +// CHECK: scf.reduce.return %[[TMP_15]] : f32 +// CHECK: } +// CHECK: scf.yield +// CHECK: } +// CHECK: memref.store %[[TMP_10]], %[[TMP_4]][%[[TMP_arg3]]] : memref<16xf32> +// CHECK: scf.yield +// CHECK: } +// CHECK: %[[TMP_5:.*]] = bufferization.to_tensor %[[TMP_4]] : memref<16xf32> +// CHECK: return %[[TMP_5]] : tensor<16xf32> +func.func @matvec(%arga: tensor<16x32xf32, #CSR>, + %argb: tensor<32xf32>, + %argx: tensor<16xf32>) -> tensor<16xf32> { + %0 = linalg.generic #trait_matvec + ins(%arga, %argb : tensor<16x32xf32, #CSR>, tensor<32xf32>) + outs(%argx: tensor<16xf32>) { + ^bb(%A: f32, %b: f32, %x: f32): + %0 = arith.mulf %A, %b : f32 + %1 = arith.addf %0, %x : f32 + linalg.yield %1 : f32 + } -> tensor<16xf32> + return %0 : tensor<16xf32> +} diff --git a/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir b/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir index c162bacffac96..94ee50197fa9c 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir @@ -52,14 +52,16 @@ // CHECK-RWT: %[[V:.*]] = sparse_tensor.values %[[S]] // CHECK-RWT: %[[S0:.*]] = memref.load %[[P0]]{{\[}}%[[C0]]] : memref // CHECK-RWT: %[[E0:.*]] = memref.load %[[P0]]{{\[}}%[[C1]]] : memref -// CHECK-RWT: scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] { +// CHECK-RWT: %[[RET:.*]] = scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] iter_args(%[[R:.*]] = %[[B]]) // CHECK-RWT: %[[SI:.*]] = memref.load %[[I0]]{{\[}}%[[I]]] : memref // CHECK-RWT: %[[SV:.*]] = memref.load %[[V]]{{\[}}%[[I]]] : memref // CHECK-RWT: %[[DI0:.*]] = arith.divui %[[SI]], %[[C10]] : index // CHECK-RWT: %[[DI1:.*]] = arith.remui %[[SI]], %[[C10]] : index -// CHECK-RWT: sparse_tensor.insert %[[SV]] into %[[B]]{{\[}}%[[DI0]], %[[DI1]]] +// CHECK-RWT: %[[NT:.*]] = sparse_tensor.insert %[[SV]] into %[[R]]{{\[}}%[[DI0]], %[[DI1]]] +// CHECK-RWT: scf.yield %[[NT:.*]] // CHECK-RWT: } -// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[B]] +// CHECK-RWT: %[[NT1:.*]] = sparse_tensor.load %[[RET]] hasInserts +// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[NT1]] // CHECK-RWT: return %[[T]] : tensor<10x10xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>> // func.func @sparse_expand(%arg0: tensor<100xf64, #SparseVector>) -> tensor<10x10xf64, #SparseMatrix> { @@ -111,25 +113,28 @@ func.func @sparse_expand(%arg0: tensor<100xf64, #SparseVector>) -> tensor<10x10x // CHECK-RWT: %[[B:.*]] = bufferization.alloc_tensor() // CHECK-RWT: %[[P0:.*]] = sparse_tensor.pointers %[[S]] {dimension = 0 : index} // CHECK-RWT: %[[I0:.*]] = sparse_tensor.indices %[[S]] {dimension = 0 : index} -// CHECK-RWT: %[[P1:.*]] = sparse_tensor.pointers %[[S]] {dimension = 1 : index} -// CHECK-RWT: %[[I1:.*]] = sparse_tensor.indices %[[S]] {dimension = 1 : index} -// CHECK-RWT: %[[V:.*]] = sparse_tensor.values %[[S]] -// CHECK-RWT: %[[S0:.*]] = memref.load %[[P0]]{{\[}}%[[C0]]] : memref -// CHECK-RWT: %[[E0:.*]] = memref.load %[[P0]]{{\[}}%[[C1]]] : memref -// CHECK-RWT: scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] { -// CHECK-RWT: %[[SI0:.*]] = memref.load %[[I0]]{{\[}}%[[I]]] : memref -// CHECK-RWT-DAG: %[[S1:.*]] = memref.load %[[P1]]{{\[}}%[[I]]] : memref -// CHECK-RWT-DAG: %[[PE1:.*]] = arith.addi %[[I]], %[[C1]] : index -// CHECK-RWT: %[[E1:.*]] = memref.load %[[P1]]{{\[}}%[[PE1]]] : memref -// CHECK-RWT: scf.for %[[J:.*]] = %[[S1]] to %[[E1]] step %[[C1]] { -// CHECK-RWT: %[[SI1:.*]] = memref.load %[[I1]]{{\[}}%[[J]]] : memref -// CHECK-RWT: %[[SV:.*]] = memref.load %[[V]]{{\[}}%[[J]]] : memref -// CHECK-RWT: %[[T:.*]] = arith.muli %[[SI0]], %[[C10]] : index -// CHECK-RWT: %[[DI:.*]] = arith.addi %[[T]], %[[SI1]] : index -// CHECK-RWT: sparse_tensor.insert %[[SV]] into %[[B]]{{\[}}%[[DI]]] -// CHECK-RWT } -// CHECK-RWT: } -// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[B]] +// CHECK-RWT: %[[P1:.*]] = sparse_tensor.pointers %[[S]] {dimension = 1 : index} +// CHECK-RWT: %[[I1:.*]] = sparse_tensor.indices %[[S]] {dimension = 1 : index} +// CHECK-RWT: %[[V:.*]] = sparse_tensor.values %[[S]] +// CHECK-RWT: %[[S0:.*]] = memref.load %[[P0]]{{\[}}%[[C0]]] : memref +// CHECK-RWT: %[[E0:.*]] = memref.load %[[P0]]{{\[}}%[[C1]]] : memref +// CHECK-RWT: %[[RET:.*]] = scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] iter_args(%[[A0:.*]] = %[[B]]) +// CHECK-RWT: %[[SI0:.*]] = memref.load %[[I0]]{{\[}}%[[I]]] : memref +// CHECK-RWT-DAG: %[[S1:.*]] = memref.load %[[P1]]{{\[}}%[[I]]] : memref +// CHECK-RWT-DAG: %[[PE1:.*]] = arith.addi %[[I]], %[[C1]] : index +// CHECK-RWT: %[[E1:.*]] = memref.load %[[P1]]{{\[}}%[[PE1]]] : memref +// CHECK-RWT: %[[RET_1:.*]] = scf.for %[[J:.*]] = %[[S1]] to %[[E1]] step %[[C1]] iter_args(%[[A1:.*]] = %[[A0]]) +// CHECK-RWT: %[[SI1:.*]] = memref.load %[[I1]]{{\[}}%[[J]]] : memref +// CHECK-RWT: %[[SV:.*]] = memref.load %[[V]]{{\[}}%[[J]]] : memref +// CHECK-RWT: %[[T:.*]] = arith.muli %[[SI0]], %[[C10]] : index +// CHECK-RWT: %[[DI:.*]] = arith.addi %[[T]], %[[SI1]] : index +// CHECK-RWT: %[[R1:.*]] = sparse_tensor.insert %[[SV]] into %[[A1]]{{\[}}%[[DI]]] +// CHECK-RWT scf.yield %[[R1]] +// CHECK-RWT } +// CHECK-RWT scf.yield %[[RET_1]] +// CHECK-RWT: } +// CHECK-RWT: %[[NT1:.*]] = sparse_tensor.load %[[RET]] hasInserts +// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[NT1]] // CHECK-RWT: return %[[T]] : tensor<100xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> // func.func @sparse_collapse(%arg0: tensor<10x10xf64, #SparseMatrix>) -> tensor<100xf64, #SparseVector> { @@ -191,7 +196,7 @@ func.func @sparse_collapse(%arg0: tensor<10x10xf64, #SparseMatrix>) -> tensor<10 // CHECK-RWT: %[[V:.*]] = sparse_tensor.values %[[S]] // CHECK-RWT: %[[S0:.*]] = memref.load %[[P0]]{{\[}}%[[C0]]] : memref // CHECK-RWT: %[[E0:.*]] = memref.load %[[P0]]{{\[}}%[[C1]]] : memref -// CHECK-RWT: scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] { +// CHECK-RWT: %[[RET:.*]] = scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] iter_args(%[[R:.*]] = %[[B]]) // CHECK-RWT: %[[SI:.*]] = memref.load %[[I0]]{{\[}}%[[I]]] : memref // CHECK-RWT: %[[SV:.*]] = memref.load %[[V]]{{\[}}%[[I]]] : memref // CHECK-RWT: %[[T1:.*]] = arith.muli %[[DD0]], %[[C10]] : index @@ -200,9 +205,11 @@ func.func @sparse_collapse(%arg0: tensor<10x10xf64, #SparseMatrix>) -> tensor<10 // CHECK-RWT: %[[T3:.*]] = arith.remui %[[SI]], %[[T2]] : index // CHECK-RWT: %[[T4:.*]] = arith.divui %[[T2]], %[[C10]] : index // CHECK-RWT: %[[DI1:.*]] = arith.divui %[[T3]], %[[T4]] : index -// CHECK-RWT: sparse_tensor.insert %[[SV]] into %[[B]]{{\[}}%[[DI0]], %[[DI1]]] +// CHECK-RWT: %[[NT:.*]] = sparse_tensor.insert %[[SV]] into %[[R]]{{\[}}%[[DI0]], %[[DI1]]] +// CHECK-RWT: scf.yield %[[NT]] // CHECK-RWT: } -// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[B]] +// CHECK-RWT: %[[NT1:.*]] = sparse_tensor.load %[[RET]] hasInserts +// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[NT1]] // CHECK-RWT: return %[[T]] : tensor> // func.func @dynamic_sparse_expand(%arg0: tensor) -> tensor { @@ -260,28 +267,31 @@ func.func @dynamic_sparse_expand(%arg0: tensor) -> tensor< // CHECK-RWT: %[[B:.*]] = bufferization.alloc_tensor(%[[DD0]]) // CHECK-RWT: %[[P0:.*]] = sparse_tensor.pointers %[[S]] {dimension = 0 : index} // CHECK-RWT: %[[I0:.*]] = sparse_tensor.indices %[[S]] {dimension = 0 : index} -// CHECK-RWT: %[[P1:.*]] = sparse_tensor.pointers %[[S]] {dimension = 1 : index} -// CHECK-RWT: %[[I1:.*]] = sparse_tensor.indices %[[S]] {dimension = 1 : index} -// CHECK-RWT: %[[V:.*]] = sparse_tensor.values %[[S]] -// CHECK-RWT: %[[S0:.*]] = memref.load %[[P0]]{{\[}}%[[C0]]] : memref -// CHECK-RWT: %[[E0:.*]] = memref.load %[[P0]]{{\[}}%[[C1]]] : memref -// CHECK-RWT: scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] { -// CHECK-RWT: %[[SI0:.*]] = memref.load %[[I0]]{{\[}}%[[I]]] : memref -// CHECK-RWT-DAG: %[[S1:.*]] = memref.load %[[P1]]{{\[}}%[[I]]] : memref -// CHECK-RWT-DAG: %[[PE1:.*]] = arith.addi %[[I]], %[[C1]] : index -// CHECK-RWT: %[[E1:.*]] = memref.load %[[P1]]{{\[}}%[[PE1]]] : memref -// CHECK-RWT: scf.for %[[J:.*]] = %[[S1]] to %[[E1]] step %[[C1]] { -// CHECK-RWT: %[[SI1:.*]] = memref.load %[[I1]]{{\[}}%[[J]]] : memref -// CHECK-RWT: %[[SV:.*]] = memref.load %[[V]]{{\[}}%[[J]]] : memref -// CHECK-RWT: %[[T1:.*]] = arith.divui %[[DD0]], %[[C10]] : index -// CHECK-RWT: %[[T2:.*]] = arith.muli %[[SI0]], %[[T1]] : index -// CHECK-RWT: %[[T3:.*]] = arith.divui %[[T1]], %[[SD1]] : index -// CHECK-RWT: %[[T4:.*]] = arith.muli %[[SI1]], %[[T3]] : index -// CHECK-RWT: %[[DI:.*]] = arith.addi %[[T2]], %[[T4]] : index -// CHECK-RWT: sparse_tensor.insert %[[SV]] into %[[B]]{{\[}}%[[DI]]] -// CHECK-RWT } -// CHECK-RWT: } -// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[B]] +// CHECK-RWT: %[[P1:.*]] = sparse_tensor.pointers %[[S]] {dimension = 1 : index} +// CHECK-RWT: %[[I1:.*]] = sparse_tensor.indices %[[S]] {dimension = 1 : index} +// CHECK-RWT: %[[V:.*]] = sparse_tensor.values %[[S]] +// CHECK-RWT: %[[S0:.*]] = memref.load %[[P0]]{{\[}}%[[C0]]] : memref +// CHECK-RWT: %[[E0:.*]] = memref.load %[[P0]]{{\[}}%[[C1]]] : memref +// CHECK-RWT: %[[RET:.*]] = scf.for %[[I:.*]] = %[[S0]] to %[[E0]] step %[[C1]] iter_args(%[[R0:.*]] = %[[B]]) +// CHECK-RWT: %[[SI0:.*]] = memref.load %[[I0]]{{\[}}%[[I]]] : memref +// CHECK-RWT-DAG: %[[S1:.*]] = memref.load %[[P1]]{{\[}}%[[I]]] : memref +// CHECK-RWT-DAG: %[[PE1:.*]] = arith.addi %[[I]], %[[C1]] : index +// CHECK-RWT: %[[E1:.*]] = memref.load %[[P1]]{{\[}}%[[PE1]]] : memref +// CHECK-RWT: %[[RET_1:.*]] = scf.for %[[J:.*]] = %[[S1]] to %[[E1]] step %[[C1]] iter_args(%[[R1:.*]] = %[[R0]]) +// CHECK-RWT: %[[SI1:.*]] = memref.load %[[I1]]{{\[}}%[[J]]] : memref +// CHECK-RWT: %[[SV:.*]] = memref.load %[[V]]{{\[}}%[[J]]] : memref +// CHECK-RWT: %[[T1:.*]] = arith.divui %[[DD0]], %[[C10]] : index +// CHECK-RWT: %[[T2:.*]] = arith.muli %[[SI0]], %[[T1]] : index +// CHECK-RWT: %[[T3:.*]] = arith.divui %[[T1]], %[[SD1]] : index +// CHECK-RWT: %[[T4:.*]] = arith.muli %[[SI1]], %[[T3]] : index +// CHECK-RWT: %[[DI:.*]] = arith.addi %[[T2]], %[[T4]] : index +// CHECK-RWT: %[[NT:.*]] = sparse_tensor.insert %[[SV]] into %[[R1]]{{\[}}%[[DI]]] +// CHECK-RWT scf.yield %[[NT]] +// CHECK-RWT } +// CHECK-RWT scf.yield %[[RET_1]] +// CHECK-RWT: } +// CHECK-RWT: %[[NT1:.*]] = sparse_tensor.load %[[RET]] hasInserts +// CHECK-RWT: %[[T:.*]] = sparse_tensor.convert %[[NT1]] // CHECK-RWT: return %[[T]] : tensor> // func.func @dynamic_sparse_collapse(%arg0: tensor<10x?xf64, #SparseMatrix>) -> tensor { diff --git a/mlir/test/Dialect/Tensor/canonicalize.mlir b/mlir/test/Dialect/Tensor/canonicalize.mlir index 9cddfd88735ab..99e31c7c35964 100644 --- a/mlir/test/Dialect/Tensor/canonicalize.mlir +++ b/mlir/test/Dialect/Tensor/canonicalize.mlir @@ -353,7 +353,7 @@ func.func @slice_canonicalize(%arg0 : tensor, %arg1 : index, // CHECK-SAME: [4, 1, %{{[a-zA-Z0-9_]+}}] [1, 1, 1] // CHECK-SAME: : tensor to tensor<4x1x?xf32> // CHECK: %[[RESULT:.+]] = tensor.cast %[[SLICE]] -// CHEKC: return %[[RESULT]] +// CHECK: return %[[RESULT]] // ----- @@ -372,7 +372,7 @@ func.func @rank_reducing_slice_canonicalize(%arg0 : tensor, %arg1 : i // CHECK-SAME: [4, 1, %{{[a-zA-Z0-9_]+}}] [1, 1, 1] // CHECK-SAME: : tensor to tensor<4x?xf32> // CHECK: %[[RESULT:.+]] = tensor.cast %[[SLICE]] -// CHEKC: return %[[RESULT]] +// CHECK: return %[[RESULT]] // ----- @@ -467,7 +467,7 @@ func.func @slice_to_insert_slice_canonicalize(%arg0 : tensor, %arg1 : // CHECK: %[[RESULT:.+]] = tensor.insert_slice %[[SLICE]] // CHECK-SAME: [0, %{{.+}}, 1] [4, 1, %{{.+}}] [1, 1, 1] // CHECK-SAME: : tensor<4x1x?xf32> into tensor -// CHEKC: return %[[RESULT]] +// CHECK: return %[[RESULT]] // ----- @@ -486,7 +486,7 @@ func.func @rank_reducing_insert_slice_canonicalize(%arg0 : tensor, %arg // CHECK: %[[RESULT:.+]] = tensor.insert_slice %[[CAST]] // CHECK-SAME: [0, %{{.+}}, 1] [4, 1, %{{.+}}] [1, 1, 1] // CHECK-SAME: : tensor<4x?xf32> into tensor -// CHEKC: return %[[RESULT]] +// CHECK: return %[[RESULT]] // ----- @@ -509,7 +509,7 @@ func.func @rank_reducing_slice_to_insert_slice_canonicalize(%arg0 : tensor into tensor -// CHEKC: return %[[RESULT]] +// CHECK: return %[[RESULT]] // ----- diff --git a/mlir/test/Dialect/Tensor/ops.mlir b/mlir/test/Dialect/Tensor/ops.mlir index aadf6ab90250d..4afe128d05eb3 100644 --- a/mlir/test/Dialect/Tensor/ops.mlir +++ b/mlir/test/Dialect/Tensor/ops.mlir @@ -21,6 +21,15 @@ func.func @empty(%sz: index) -> tensor<5x?x6xf32> { return %0 : tensor<5x?x6xf32> } +// CHECK-LABEL: func @empty_with_encoding( +// CHECK-SAME: %[[sz:.*]]: index +func.func @empty_with_encoding(%sz: index) -> tensor<5x?x6xf32, "foo"> { + // CHECK: tensor.empty(%[[sz]]) : tensor<5x?x6xf32, "foo"> + %0 = tensor.empty(%sz) : tensor<5x?x6xf32, "foo"> + return %0 : tensor<5x?x6xf32, "foo"> +} + + // CHECK-LABEL: func @extract( // CHECK-SAME: %[[TENSOR:.*]]: tensor, // CHECK-SAME: %[[INDEX:.*]]: index) { diff --git a/mlir/test/Dialect/Vector/canonicalize.mlir b/mlir/test/Dialect/Vector/canonicalize.mlir index c3d9ae24c0891..3f3a35eb52b0d 100644 --- a/mlir/test/Dialect/Vector/canonicalize.mlir +++ b/mlir/test/Dialect/Vector/canonicalize.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline='func.func(canonicalize)' -split-input-file -allow-unregistered-dialect | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(canonicalize))' -split-input-file -allow-unregistered-dialect | FileCheck %s // ----- diff --git a/mlir/test/IR/affine-map.mlir b/mlir/test/IR/affine-map.mlir index 56a105630f9d2..977aec2536b1e 100644 --- a/mlir/test/IR/affine-map.mlir +++ b/mlir/test/IR/affine-map.mlir @@ -1,10 +1,7 @@ // RUN: mlir-opt -allow-unregistered-dialect %s | FileCheck %s // Identity maps used in trivial compositions in MemRefs are optimized away. -// CHECK-NOT: #map{{[0-9]*}} = affine_map<(d0, d1) -> (d0, d1)> #map0 = affine_map<(i, j) -> (i, j)> - -// CHECK-NOT: #map{{[0-9]*}} = affine_map<(d0, d1)[s0] -> (d0, d1)> #map1 = affine_map<(i, j)[s0] -> (i, j)> // CHECK: #map{{[0-9]*}} = affine_map<() -> (0)> @@ -194,7 +191,6 @@ // Check if parser can parse affine_map with identifiers that collide with // integer types. -// CHECK: #map{{[0-9]*}} = affine_map<(d0, d1) -> (d0, d1)> #map60 = affine_map<(i0, i1) -> (i0, i1)> // Check if parser can parse affine_map with identifiers that collide with diff --git a/mlir/test/IR/diagnostic-handler-filter.mlir b/mlir/test/IR/diagnostic-handler-filter.mlir index 83ba053065943..39374a919fb95 100644 --- a/mlir/test/IR/diagnostic-handler-filter.mlir +++ b/mlir/test/IR/diagnostic-handler-filter.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(test-diagnostic-filter{filters=mysource1})" -split-input-file -o - 2>&1 | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(test-diagnostic-filter{filters=mysource1}))" -split-input-file -o - 2>&1 | FileCheck %s // This test verifies that diagnostic handler can emit the call stack successfully. // CHECK-LABEL: Test 'test1' diff --git a/mlir/test/IR/memory-ops.mlir b/mlir/test/IR/memory-ops.mlir index fbbf36d6bc210..c1cfc3bfa0dbf 100644 --- a/mlir/test/IR/memory-ops.mlir +++ b/mlir/test/IR/memory-ops.mlir @@ -1,6 +1,6 @@ // RUN: mlir-opt %s | FileCheck %s -// CHECK: #map = affine_map<(d0, d1)[s0] -> (d0 + s0, d1)> +// CHECK: #[[$MAP:.*]] = affine_map<(d0, d1)[s0] -> (d0 + s0, d1)> // CHECK-LABEL: func @alloc() { func.func @alloc() { @@ -17,11 +17,11 @@ func.func @alloc() { %1 = memref.alloc(%c0, %c1) : memref (d0, d1)>, 1> // Test alloc with no dynamic dimensions and one symbol. - // CHECK: %{{.*}} = memref.alloc()[%{{.*}}] : memref<2x4xf32, #map, 1> + // CHECK: %{{.*}} = memref.alloc()[%{{.*}}] : memref<2x4xf32, #[[$MAP]], 1> %2 = memref.alloc()[%c0] : memref<2x4xf32, affine_map<(d0, d1)[s0] -> ((d0 + s0), d1)>, 1> // Test alloc with dynamic dimensions and one symbol. - // CHECK: %{{.*}} = memref.alloc(%{{.*}})[%{{.*}}] : memref<2x?xf32, #map, 1> + // CHECK: %{{.*}} = memref.alloc(%{{.*}})[%{{.*}}] : memref<2x?xf32, #[[$MAP]], 1> %3 = memref.alloc(%c1)[%c0] : memref<2x?xf32, affine_map<(d0, d1)[s0] -> (d0 + s0, d1)>, 1> // Alloc with no mappings. @@ -48,11 +48,11 @@ func.func @alloca() { %1 = memref.alloca(%c0, %c1) : memref (d0, d1)>, 1> // Test alloca with no dynamic dimensions and one symbol. - // CHECK: %{{.*}} = memref.alloca()[%{{.*}}] : memref<2x4xf32, #map, 1> + // CHECK: %{{.*}} = memref.alloca()[%{{.*}}] : memref<2x4xf32, #[[$MAP]], 1> %2 = memref.alloca()[%c0] : memref<2x4xf32, affine_map<(d0, d1)[s0] -> ((d0 + s0), d1)>, 1> // Test alloca with dynamic dimensions and one symbol. - // CHECK: %{{.*}} = memref.alloca(%{{.*}})[%{{.*}}] : memref<2x?xf32, #map, 1> + // CHECK: %{{.*}} = memref.alloca(%{{.*}})[%{{.*}}] : memref<2x?xf32, #[[$MAP]], 1> %3 = memref.alloca(%c1)[%c0] : memref<2x?xf32, affine_map<(d0, d1)[s0] -> (d0 + s0, d1)>, 1> // Alloca with no mappings, but with alignment. diff --git a/mlir/test/IR/test-clone.mlir b/mlir/test/IR/test-clone.mlir index be8cef05d17ec..7c720288c95b9 100644 --- a/mlir/test/IR/test-clone.mlir +++ b/mlir/test/IR/test-clone.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="func.func(test-clone)" -split-input-file +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(test-clone))" -split-input-file module { func.func @fixpoint(%arg1 : i32) -> i32 { diff --git a/mlir/test/IR/test-matchers.mlir b/mlir/test/IR/test-matchers.mlir index 074572a591ab7..87c7bf9e7ebc8 100644 --- a/mlir/test/IR/test-matchers.mlir +++ b/mlir/test/IR/test-matchers.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline="func.func(test-matchers)" -o /dev/null 2>&1 | FileCheck %s +// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline="builtin.module(func.func(test-matchers))" -o /dev/null 2>&1 | FileCheck %s func.func @test1(%a: f32, %b: f32, %c: f32) { %0 = arith.addf %a, %b: f32 diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir index 300e75b519092..f661792b9ab48 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir @@ -1,6 +1,6 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(canonicalize,cse),one-shot-bufferize{bufferize-function-boundaries}" |\ -// RUN: mlir-opt -pass-pipeline="func.func(buffer-deallocation,convert-vector-to-scf,lower-affine,convert-linalg-to-loops)" |\ -// RUN: mlir-opt -pass-pipeline="func.func(canonicalize,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(canonicalize,cse),one-shot-bufferize{bufferize-function-boundaries})" |\ +// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(buffer-deallocation,convert-vector-to-scf,lower-affine,convert-linalg-to-loops))" |\ +// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(canonicalize,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \ // RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext,%mlir_lib_dir/libmlir_c_runner_utils%shlibext |\ diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir index c12d2b9b913e4..459b0e13667f6 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir @@ -2,6 +2,14 @@ // RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s +// +// Do the same run, but now with parallelization. +// +// RUN: mlir-opt %s --sparse-compiler="parallelization-strategy=any-storage-any-loop" | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + #CSR = #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir index 59e7f33c22c88..adc0b261f04d3 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir @@ -4,6 +4,16 @@ // RUN: -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s +// +// Do the same run, but now with parallelization. +// +// RUN: mlir-opt %s \ +// RUN: --sparse-compiler="parallelization-strategy=any-storage-any-loop" | \ +// RUN: TENSOR0="%mlir_src_dir/test/Integration/data/wide.mtx" \ +// RUN: mlir-cpu-runner \ +// RUN: -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s !Filename = !llvm.ptr diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_rewrite_sort.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_rewrite_sort.mlir index 650c0885fcb66..f0937e238af58 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_rewrite_sort.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_rewrite_sort.mlir @@ -82,7 +82,7 @@ module { // CHECK: ( 1, 1, 2, 5, 10 ) // CHECK: ( 3, 3, 1, 10, 1 ) // CHECK: ( 9, 9, 4, 7, 2 ) - // CHECK: ( 7, 8, 10, 9, 6 ) + // CHECK: ( 8, 7, 10, 9, 6 ) call @storeValuesTo(%x0, %c10, %c2, %c1, %c5, %c1) : (memref, i32, i32, i32, i32, i32) -> () call @storeValuesTo(%x1, %c1, %c1, %c3, %c10, %c3) diff --git a/mlir/test/Integration/Dialect/SparseTensor/python/tools/sparse_compiler.py b/mlir/test/Integration/Dialect/SparseTensor/python/tools/sparse_compiler.py index 174e847f72c23..abdab9738def7 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/python/tools/sparse_compiler.py +++ b/mlir/test/Integration/Dialect/SparseTensor/python/tools/sparse_compiler.py @@ -13,7 +13,7 @@ class SparseCompiler: """Sparse compiler class for compiling and building MLIR modules.""" def __init__(self, options: str, opt_level: int, shared_libs: Sequence[str]): - pipeline = f'sparse-compiler{{{options} reassociate-fp-reductions=1 enable-index-optimizations=1}}' + pipeline = f'builtin.module(sparse-compiler{{{options} reassociate-fp-reductions=1 enable-index-optimizations=1}})' self.pipeline = pipeline self.opt_level = opt_level self.shared_libs = shared_libs diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_sparse_compiler.py b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_sparse_compiler.py index 6f117f386f531..1ba0d393894b9 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_sparse_compiler.py +++ b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_sparse_compiler.py @@ -16,7 +16,7 @@ class SparseCompiler: """Sparse compiler class for compiling and building MLIR modules.""" def __init__(self, options: str, opt_level: int, shared_libs: Sequence[str]): - pipeline = f'sparse-compiler{{{options} reassociate-fp-reductions=1 enable-index-optimizations=1}}' + pipeline = f'builtin.module(sparse-compiler{{{options} reassociate-fp-reductions=1 enable-index-optimizations=1}})' self.pipeline = pipeline self.opt_level = opt_level self.shared_libs = shared_libs diff --git a/mlir/test/Integration/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir b/mlir/test/Integration/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir index 41dec2912f5c4..5372280e8a012 100644 --- a/mlir/test/Integration/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir +++ b/mlir/test/Integration/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf,memref-expand,arith-expand),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf,memref-expand,arith-expand),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \ // RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir index 3d5165e797ade..973ca5bc95383 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-1d.mlir @@ -1,19 +1,19 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \ // RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{lower-permutation-maps=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf{lower-permutation-maps=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \ // RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{full-unroll=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf{full-unroll=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \ // RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{full-unroll=true lower-permutation-maps=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf{full-unroll=true lower-permutation-maps=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \ // RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir index 17311904d8919..ee079c44f5822 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-2d.mlir @@ -1,19 +1,19 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \ // RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{lower-permutation-maps=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf{lower-permutation-maps=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \ // RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{full-unroll=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf{full-unroll=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \ // RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{full-unroll=true lower-permutation-maps=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf{full-unroll=true lower-permutation-maps=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \ // RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-3d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-3d.mlir index 6bad868bba214..225ff563cbff0 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-3d.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read-3d.mlir @@ -1,19 +1,19 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \ // RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{lower-permutation-maps=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf{lower-permutation-maps=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \ // RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{full-unroll=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf{full-unroll=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \ // RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{full-unroll=true lower-permutation-maps=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf{full-unroll=true lower-permutation-maps=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \ // RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read.mlir index 66c7be56a12ec..ccafa5b36a7c6 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-read.mlir @@ -1,9 +1,9 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \ // RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{full-unroll=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf{full-unroll=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \ // RUN: mlir-cpu-runner -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-to-loops.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-to-loops.mlir index 2bdd1caf84e6b..929a30a4d4788 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-to-loops.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/test-transfer-to-loops.mlir @@ -1,9 +1,9 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf,lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext,%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-vector-to-scf{full-unroll=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-scf{full-unroll=true},lower-affine,convert-scf-to-cf),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext,%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s diff --git a/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-reduction-distribute.mlir b/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-reduction-distribute.mlir index a34838b0ef0a0..1a3fe7cf9cefb 100644 --- a/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-reduction-distribute.mlir +++ b/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-reduction-distribute.mlir @@ -1,9 +1,9 @@ // RUN: mlir-opt %s -test-vector-warp-distribute="hoist-uniform distribute-transfer-write propagate-distribution" -canonicalize |\ // RUN: mlir-opt -test-vector-warp-distribute=rewrite-warp-ops-to-scf-if |\ // RUN: mlir-opt -lower-affine -convert-scf-to-cf -convert-vector-to-llvm \ -// RUN: -convert-arith-to-llvm -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm -reconcile-unrealized-casts |\ +// RUN: -convert-arith-to-llvm -gpu-kernel-outlining |\ +// RUN: mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin))' |\ +// RUN: mlir-opt -gpu-to-llvm -reconcile-unrealized-casts |\ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext \ diff --git a/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-warp-distribute.mlir b/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-warp-distribute.mlir index 4eedd2a085aea..4a26080a654ad 100644 --- a/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-warp-distribute.mlir +++ b/mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-warp-distribute.mlir @@ -2,9 +2,9 @@ // everything on the same thread. // RUN: mlir-opt %s -test-vector-warp-distribute=rewrite-warp-ops-to-scf-if -canonicalize | \ // RUN: mlir-opt -convert-scf-to-cf -convert-cf-to-llvm -convert-vector-to-llvm -convert-arith-to-llvm \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm -reconcile-unrealized-casts |\ +// RUN: -gpu-kernel-outlining |\ +// RUN: mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin))' |\ +// RUN: mlir-opt -gpu-to-llvm -reconcile-unrealized-casts |\ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext \ @@ -15,9 +15,9 @@ // RUN: mlir-opt %s -test-vector-warp-distribute="hoist-uniform distribute-transfer-write" \ // RUN: -test-vector-warp-distribute=rewrite-warp-ops-to-scf-if -canonicalize | \ // RUN: mlir-opt -convert-scf-to-cf -convert-cf-to-llvm -convert-vector-to-llvm -convert-arith-to-llvm \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm -reconcile-unrealized-casts |\ +// RUN: -gpu-kernel-outlining |\ +// RUN: mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin))' |\ +// RUN: mlir-opt -gpu-to-llvm -reconcile-unrealized-casts |\ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext \ @@ -27,9 +27,9 @@ // RUN: mlir-opt %s -test-vector-warp-distribute="hoist-uniform distribute-transfer-write propagate-distribution" \ // RUN: -test-vector-warp-distribute=rewrite-warp-ops-to-scf-if -canonicalize | \ // RUN: mlir-opt -convert-scf-to-cf -convert-cf-to-llvm -convert-vector-to-llvm -convert-arith-to-llvm \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm -reconcile-unrealized-casts |\ +// RUN: -gpu-kernel-outlining |\ +// RUN: mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin))' |\ +// RUN: mlir-opt -gpu-to-llvm -reconcile-unrealized-casts |\ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f16.mlir b/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f16.mlir index 5c20cb3b3c961..00fc729ed158d 100644 --- a/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f16.mlir +++ b/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f16.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin{chip=sm_70})' \ -// RUN: --convert-scf-to-cf -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin{chip=sm_70}))' \ +// RUN: | mlir-opt --convert-scf-to-cf -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f32.mlir b/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f32.mlir index 98ba4fa90f89e..cbbde4ac63345 100644 --- a/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f32.mlir +++ b/mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f32.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin{chip=sm_70})' \ -// RUN: --convert-scf-to-cf -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin{chip=sm_70}))' \ +// RUN: | mlir-opt --convert-scf-to-cf -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir index 03acf3dc16e96..0f61f1ebcbd6b 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir index d39d23ad28b96..03948f6ee958e 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir index 473fe0725f7c0..5e1127ebcce59 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir index 58ad00fd3d0c5..92f6a804ece15 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir index ef1a36da777c4..3b988e29cb39f 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir index 118ae800c08ba..485bdcd5d0a3e 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir index 4e667503693d0..eac5ecfc5b449 100644 --- a/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/async.mlir b/mlir/test/Integration/GPU/CUDA/async.mlir index e57801ee1b912..ffb11ab27c7f4 100644 --- a/mlir/test/Integration/GPU/CUDA/async.mlir +++ b/mlir/test/Integration/GPU/CUDA/async.mlir @@ -1,9 +1,9 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ -// RUN: -gpu-async-region -gpu-to-llvm \ -// RUN: -async-to-async-runtime -async-runtime-ref-counting \ -// RUN: -convert-async-to-llvm -convert-func-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \ +// RUN: | mlir-opt -gpu-async-region -gpu-to-llvm \ +// RUN: | mlir-opt -async-to-async-runtime -async-runtime-ref-counting \ +// RUN: | mlir-opt -convert-async-to-llvm -convert-func-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_async_runtime%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir b/mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir index fc2540046e9f6..b52d688bb75f6 100644 --- a/mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir +++ b/mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir b/mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir index 137465ff93909..c980f39c3dbb5 100644 --- a/mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir +++ b/mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/shuffle.mlir b/mlir/test/Integration/GPU/CUDA/shuffle.mlir index 94392cf89264f..be59529b27202 100644 --- a/mlir/test/Integration/GPU/CUDA/shuffle.mlir +++ b/mlir/test/Integration/GPU/CUDA/shuffle.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/CUDA/two-modules.mlir b/mlir/test/Integration/GPU/CUDA/two-modules.mlir index 3d25e7845126b..446be5709f36a 100644 --- a/mlir/test/Integration/GPU/CUDA/two-modules.mlir +++ b/mlir/test/Integration/GPU/CUDA/two-modules.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin)' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,gpu-to-cubin))' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_cuda_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/ROCM/gpu-to-hsaco.mlir b/mlir/test/Integration/GPU/ROCM/gpu-to-hsaco.mlir index 2372752219bae..be27a0194fd43 100644 --- a/mlir/test/Integration/GPU/ROCM/gpu-to-hsaco.mlir +++ b/mlir/test/Integration/GPU/ROCM/gpu-to-hsaco.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl,gpu-to-hsaco{chip=%chip})' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-rocdl,gpu-to-hsaco{chip=%chip}))' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_rocm_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/ROCM/printf.mlir b/mlir/test/Integration/GPU/ROCM/printf.mlir index b4b7eb66d818d..5aa51b53431cb 100644 --- a/mlir/test/Integration/GPU/ROCM/printf.mlir +++ b/mlir/test/Integration/GPU/ROCM/printf.mlir @@ -1,6 +1,6 @@ // RUN: mlir-opt %s \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl{index-bitwidth=32 runtime=HIP},gpu-to-hsaco{chip=%chip})' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-rocdl{index-bitwidth=32 runtime=HIP},gpu-to-hsaco{chip=%chip}))' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_rocm_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/ROCM/two-modules.mlir b/mlir/test/Integration/GPU/ROCM/two-modules.mlir index 3a3d1862adead..92ff32ed18b72 100644 --- a/mlir/test/Integration/GPU/ROCM/two-modules.mlir +++ b/mlir/test/Integration/GPU/ROCM/two-modules.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl,gpu-to-hsaco{chip=%chip})' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-rocdl,gpu-to-hsaco{chip=%chip}))' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_rocm_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/ROCM/vecadd.mlir b/mlir/test/Integration/GPU/ROCM/vecadd.mlir index d2857919c8966..ffb7de9030bff 100644 --- a/mlir/test/Integration/GPU/ROCM/vecadd.mlir +++ b/mlir/test/Integration/GPU/ROCM/vecadd.mlir @@ -1,8 +1,8 @@ // RUN: mlir-opt %s \ -// RUN: -convert-scf-to-cf \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl{use-bare-ptr-memref-call-conv=true},gpu-to-hsaco{chip=%chip})' \ -// RUN: -gpu-to-llvm=use-bare-pointers-for-kernels=true \ +// RUN: | mlir-opt -convert-scf-to-cf \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-rocdl{use-bare-ptr-memref-call-conv=true},gpu-to-hsaco{chip=%chip}))' \ +// RUN: | mlir-opt -gpu-to-llvm=use-bare-pointers-for-kernels=true \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_rocm_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir b/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir index 4d2e55e0920f7..af3b1e9b8c3db 100644 --- a/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir +++ b/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir @@ -1,8 +1,8 @@ // RUN: mlir-opt %s \ -// RUN: -convert-scf-to-cf \ -// RUN: -gpu-kernel-outlining \ -// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl{chipset=%chip index-bitwidth=32},gpu-to-hsaco{chip=%chip})' \ -// RUN: -gpu-to-llvm \ +// RUN: | mlir-opt -convert-scf-to-cf \ +// RUN: | mlir-opt -gpu-kernel-outlining \ +// RUN: | mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-rocdl{chipset=%chip index-bitwidth=32},gpu-to-hsaco{chip=%chip}))' \ +// RUN: | mlir-opt -gpu-to-llvm \ // RUN: | mlir-cpu-runner \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_rocm_runtime%shlibext \ // RUN: --shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ diff --git a/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir index 9addbcc83517c..b2e3fd5eec3b1 100644 --- a/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir +++ b/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir @@ -240,3 +240,29 @@ func.func @reduce(%arg0: memref<16x32x64xf32>, // CHECK: %[[OUT_ELEM:.*]] = memref.load %[[OUT]][%[[I]], %[[K]]] // CHECK: %[[ADD:.*]] = arith.addf %[[IN_ELEM]], %[[OUT_ELEM]] // CHECK: memref.store %[[ADD]], %[[OUT]][%[[I]], %[[K]]] + +// ----- + +func.func @broadcast(%input: memref<8x32xf32>, + %init: memref<8x16x32xf32>) { + linalg.broadcast + ins(%input:memref<8x32xf32>) + outs(%init:memref<8x16x32xf32>) + dimensions = [0, 2] + func.return +} +// CHECK-LABEL: func.func @broadcast( +// CHECK-SAME: %[[IN:[a-zA-Z0-9]+]]: memref<8x32xf32>, +// CHECK-SAME: %[[OUT:[a-zA-Z0-9]+]]: memref<8x16x32xf32> + +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index +// CHECK-DAG: %[[C16:.*]] = arith.constant 16 : index +// CHECK-DAG: %[[C32:.*]] = arith.constant 32 : index + +// CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[C8]] step %[[C1]] { +// CHECK: scf.for %[[J:.*]] = %[[C0]] to %[[C16]] step %[[C1]] { +// CHECK: scf.for %[[K:.*]] = %[[C0]] to %[[C32]] step %[[C1]] { +// CHECK: %[[ELEM:.*]] = memref.load %[[IN]][%[[I]], %[[K]]] +// CHECK: memref.store %[[ELEM]], %[[OUT]][%[[I]], %[[J]], %[[K]]] diff --git a/mlir/test/Pass/crash-recovery-dynamic-failure.mlir b/mlir/test/Pass/crash-recovery-dynamic-failure.mlir index 9901f7f2474e2..69e087d5aa83a 100644 --- a/mlir/test/Pass/crash-recovery-dynamic-failure.mlir +++ b/mlir/test/Pass/crash-recovery-dynamic-failure.mlir @@ -1,5 +1,5 @@ // Check that local reproducers will also traverse dynamic pass pipelines. -// RUN: mlir-opt %s -pass-pipeline='test-module-pass,test-dynamic-pipeline{op-name=inner_mod1 run-on-nested-operations=1 dynamic-pipeline=test-pass-failure}' -mlir-pass-pipeline-crash-reproducer=%t -verify-diagnostics -mlir-pass-pipeline-local-reproducer --mlir-disable-threading +// RUN: mlir-opt %s -pass-pipeline='builtin.module(test-module-pass,test-dynamic-pipeline{op-name=inner_mod1 run-on-nested-operations=1 dynamic-pipeline=test-pass-failure})' -mlir-pass-pipeline-crash-reproducer=%t -verify-diagnostics -mlir-pass-pipeline-local-reproducer --mlir-disable-threading // RUN: cat %t | FileCheck -check-prefix=REPRO_LOCAL_DYNAMIC_FAILURE %s // The crash recovery mechanism will leak memory allocated in the crashing thread. @@ -15,4 +15,4 @@ module @inner_mod1 { // REPRO_LOCAL_DYNAMIC_FAILURE: module @inner_mod1 // REPRO_LOCAL_DYNAMIC_FAILURE: module @foo { -// REPRO_LOCAL_DYNAMIC_FAILURE: pipeline: "builtin.module(test-pass-failure)" +// REPRO_LOCAL_DYNAMIC_FAILURE: pipeline: "builtin.module(builtin.module(test-pass-failure))" diff --git a/mlir/test/Pass/crash-recovery.mlir b/mlir/test/Pass/crash-recovery.mlir index 91030a4bdfd52..e636064d26e34 100644 --- a/mlir/test/Pass/crash-recovery.mlir +++ b/mlir/test/Pass/crash-recovery.mlir @@ -1,6 +1,6 @@ -// RUN: mlir-opt %s -pass-pipeline='builtin.module(test-module-pass, test-pass-crash)' -mlir-pass-pipeline-crash-reproducer=%t -verify-diagnostics +// RUN: mlir-opt %s -pass-pipeline='builtin.module(builtin.module(test-module-pass, test-pass-crash))' -mlir-pass-pipeline-crash-reproducer=%t -verify-diagnostics // RUN: cat %t | FileCheck -check-prefix=REPRO %s -// RUN: mlir-opt %s -pass-pipeline='builtin.module(test-module-pass, test-pass-crash)' -mlir-pass-pipeline-crash-reproducer=%t -verify-diagnostics -mlir-pass-pipeline-local-reproducer -mlir-disable-threading +// RUN: mlir-opt %s -pass-pipeline='builtin.module(builtin.module(test-module-pass, test-pass-crash))' -mlir-pass-pipeline-crash-reproducer=%t -verify-diagnostics -mlir-pass-pipeline-local-reproducer -mlir-disable-threading // RUN: cat %t | FileCheck -check-prefix=REPRO_LOCAL %s // Check that we correctly handle verifiers passes with local reproducer, this used to crash. @@ -8,7 +8,7 @@ // RUN: cat %t | FileCheck -check-prefix=REPRO_LOCAL %s // Check that local reproducers will also traverse dynamic pass pipelines. -// RUN: mlir-opt %s -pass-pipeline='test-module-pass,test-dynamic-pipeline{op-name=inner_mod1 run-on-nested-operations=1 dynamic-pipeline=test-pass-crash}' -mlir-pass-pipeline-crash-reproducer=%t -verify-diagnostics -mlir-pass-pipeline-local-reproducer --mlir-disable-threading +// RUN: mlir-opt %s -pass-pipeline='builtin.module(test-module-pass,test-dynamic-pipeline{op-name=inner_mod1 run-on-nested-operations=1 dynamic-pipeline=test-pass-crash})' -mlir-pass-pipeline-crash-reproducer=%t -verify-diagnostics -mlir-pass-pipeline-local-reproducer --mlir-disable-threading // RUN: cat %t | FileCheck -check-prefix=REPRO_LOCAL_DYNAMIC %s // The crash recovery mechanism will leak memory allocated in the crashing thread. @@ -22,12 +22,12 @@ module @inner_mod1 { // REPRO: module @inner_mod1 // REPRO: module @foo { -// REPRO: pipeline: "builtin.module(test-module-pass,test-pass-crash)" +// REPRO: pipeline: "builtin.module(builtin.module(test-module-pass,test-pass-crash))" // REPRO_LOCAL: module @inner_mod1 // REPRO_LOCAL: module @foo { -// REPRO_LOCAL: pipeline: "builtin.module(test-pass-crash)" +// REPRO_LOCAL: pipeline: "builtin.module(builtin.module(test-pass-crash))" // REPRO_LOCAL_DYNAMIC: module @inner_mod1 // REPRO_LOCAL_DYNAMIC: module @foo { -// REPRO_LOCAL_DYNAMIC: pipeline: "builtin.module(test-pass-crash)" +// REPRO_LOCAL_DYNAMIC: pipeline: "builtin.module(builtin.module(test-pass-crash))" diff --git a/mlir/test/Pass/dynamic-pipeline-fail-on-parent.mlir b/mlir/test/Pass/dynamic-pipeline-fail-on-parent.mlir index 80219503da20b..c429ad3afb052 100644 --- a/mlir/test/Pass/dynamic-pipeline-fail-on-parent.mlir +++ b/mlir/test/Pass/dynamic-pipeline-fail-on-parent.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline='builtin.module(test-dynamic-pipeline{op-name=inner_mod1 run-on-parent=1 dynamic-pipeline=test-patterns})' -split-input-file -verify-diagnostics +// RUN: mlir-opt %s -pass-pipeline='builtin.module(builtin.module(test-dynamic-pipeline{op-name=inner_mod1 run-on-parent=1 dynamic-pipeline=test-patterns}))' -split-input-file -verify-diagnostics // Verify that we fail to schedule a dynamic pipeline on the parent operation. diff --git a/mlir/test/Pass/dynamic-pipeline-nested.mlir b/mlir/test/Pass/dynamic-pipeline-nested.mlir index 35e909d74dd34..ac2fdd3265b63 100644 --- a/mlir/test/Pass/dynamic-pipeline-nested.mlir +++ b/mlir/test/Pass/dynamic-pipeline-nested.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt %s -pass-pipeline='builtin.module(test-dynamic-pipeline{op-name=inner_mod1 dynamic-pipeline=cse})' --mlir-disable-threading -mlir-print-ir-before-all 2>&1 | FileCheck %s --check-prefix=NOTNESTED --check-prefix=CHECK -// RUN: mlir-opt %s -pass-pipeline='builtin.module(test-dynamic-pipeline{op-name=inner_mod1 run-on-nested-operations=1 dynamic-pipeline=cse})' --mlir-disable-threading -mlir-print-ir-before-all 2>&1 | FileCheck %s --check-prefix=NESTED --check-prefix=CHECK +// RUN: mlir-opt %s -pass-pipeline='builtin.module(builtin.module(test-dynamic-pipeline{op-name=inner_mod1 dynamic-pipeline=cse}))' --mlir-disable-threading -mlir-print-ir-before-all 2>&1 | FileCheck %s --check-prefix=NOTNESTED --check-prefix=CHECK +// RUN: mlir-opt %s -pass-pipeline='builtin.module(builtin.module(test-dynamic-pipeline{op-name=inner_mod1 run-on-nested-operations=1 dynamic-pipeline=cse}))' --mlir-disable-threading -mlir-print-ir-before-all 2>&1 | FileCheck %s --check-prefix=NESTED --check-prefix=CHECK // Verify that we can schedule a dynamic pipeline on a nested operation diff --git a/mlir/test/Pass/dynamic-pipeline.mlir b/mlir/test/Pass/dynamic-pipeline.mlir index f037ae8044f82..5e31ba476aeb0 100644 --- a/mlir/test/Pass/dynamic-pipeline.mlir +++ b/mlir/test/Pass/dynamic-pipeline.mlir @@ -1,7 +1,7 @@ -// RUN: mlir-opt %s -pass-pipeline='builtin.module(test-dynamic-pipeline{op-name=inner_mod1, dynamic-pipeline=func.func(cse,canonicalize)})' --mlir-disable-threading -mlir-print-ir-before-all 2>&1 | FileCheck %s --check-prefix=MOD1 --check-prefix=MOD1-ONLY --check-prefix=CHECK -// RUN: mlir-opt %s -pass-pipeline='builtin.module(test-dynamic-pipeline{op-name=inner_mod2, dynamic-pipeline=func.func(cse,canonicalize)})' --mlir-disable-threading -mlir-print-ir-before-all 2>&1 | FileCheck %s --check-prefix=MOD2 --check-prefix=MOD2-ONLY --check-prefix=CHECK -// RUN: mlir-opt %s -pass-pipeline='builtin.module(test-dynamic-pipeline{op-name=inner_mod1,inner_mod2, dynamic-pipeline=func.func(cse,canonicalize)})' --mlir-disable-threading -mlir-print-ir-before-all 2>&1 | FileCheck %s --check-prefix=MOD1 --check-prefix=MOD2 --check-prefix=CHECK -// RUN: mlir-opt %s -pass-pipeline='builtin.module(test-dynamic-pipeline{dynamic-pipeline=func.func(cse,canonicalize)})' --mlir-disable-threading -mlir-print-ir-before-all 2>&1 | FileCheck %s --check-prefix=MOD1 --check-prefix=MOD2 --check-prefix=CHECK +// RUN: mlir-opt %s -pass-pipeline='builtin.module(builtin.module(test-dynamic-pipeline{op-name=inner_mod1, dynamic-pipeline=func.func(cse,canonicalize)}))' --mlir-disable-threading -mlir-print-ir-before-all 2>&1 | FileCheck %s --check-prefix=MOD1 --check-prefix=MOD1-ONLY --check-prefix=CHECK +// RUN: mlir-opt %s -pass-pipeline='builtin.module(builtin.module(test-dynamic-pipeline{op-name=inner_mod2, dynamic-pipeline=func.func(cse,canonicalize)}))' --mlir-disable-threading -mlir-print-ir-before-all 2>&1 | FileCheck %s --check-prefix=MOD2 --check-prefix=MOD2-ONLY --check-prefix=CHECK +// RUN: mlir-opt %s -pass-pipeline='builtin.module(builtin.module(test-dynamic-pipeline{op-name=inner_mod1,inner_mod2, dynamic-pipeline=func.func(cse,canonicalize)}))' --mlir-disable-threading -mlir-print-ir-before-all 2>&1 | FileCheck %s --check-prefix=MOD1 --check-prefix=MOD2 --check-prefix=CHECK +// RUN: mlir-opt %s -pass-pipeline='builtin.module(builtin.module(test-dynamic-pipeline{dynamic-pipeline=func.func(cse,canonicalize)}))' --mlir-disable-threading -mlir-print-ir-before-all 2>&1 | FileCheck %s --check-prefix=MOD1 --check-prefix=MOD2 --check-prefix=CHECK func.func @f() { diff --git a/mlir/test/Pass/generic-pipeline.mlir b/mlir/test/Pass/generic-pipeline.mlir index 00c6c767c7707..dfd17d59dd270 100644 --- a/mlir/test/Pass/generic-pipeline.mlir +++ b/mlir/test/Pass/generic-pipeline.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -verify-diagnostics -pass-pipeline='any(cse, test-interface-pass)' -allow-unregistered-dialect -o /dev/null +// RUN: mlir-opt %s -verify-diagnostics -pass-pipeline='builtin.module(any(cse, test-interface-pass))' -allow-unregistered-dialect -o /dev/null // Test that we execute generic pipelines correctly. The `cse` pass is fully generic and should execute // on both the module and the func. The `test-interface-pass` filters based on FunctionOpInterface and diff --git a/mlir/test/Pass/interface-pass.mlir b/mlir/test/Pass/interface-pass.mlir index e07237b5e8ff5..d02e8937f43aa 100644 --- a/mlir/test/Pass/interface-pass.mlir +++ b/mlir/test/Pass/interface-pass.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -verify-diagnostics -pass-pipeline='func.func(test-interface-pass)' -o /dev/null +// RUN: mlir-opt %s -verify-diagnostics -pass-pipeline='builtin.module(func.func(test-interface-pass))' -o /dev/null // Test that we run the interface pass on the function. diff --git a/mlir/test/Pass/invalid-interface-pass.mlir b/mlir/test/Pass/invalid-interface-pass.mlir index eb36958b8b834..8f58f39200308 100644 --- a/mlir/test/Pass/invalid-interface-pass.mlir +++ b/mlir/test/Pass/invalid-interface-pass.mlir @@ -1,4 +1,4 @@ -// RUN: not mlir-opt %s -pass-pipeline='test-interface-pass' 2>&1 | FileCheck %s +// RUN: not mlir-opt %s -pass-pipeline='builtin.module(test-interface-pass)' 2>&1 | FileCheck %s // Test that we emit an error when an interface pass is added to a pass manager it can't be scheduled on. diff --git a/mlir/test/Pass/invalid-ir-print-after-failure.mlir b/mlir/test/Pass/invalid-ir-print-after-failure.mlir index 012d999e23055..6910abe20be92 100644 --- a/mlir/test/Pass/invalid-ir-print-after-failure.mlir +++ b/mlir/test/Pass/invalid-ir-print-after-failure.mlir @@ -1,11 +1,11 @@ -// RUN: not mlir-opt %s -pass-pipeline='func.func(test-pass-create-invalid-ir{emit-invalid-ir=true signal-pass-failure=true})' -mlir-print-ir-after-failure 2>&1 | FileCheck %s --check-prefix=CHECK-GENERIC -// RUN: not mlir-opt %s -pass-pipeline='func.func(test-pass-create-invalid-ir{emit-invalid-ir=true signal-pass-failure=false})' -mlir-print-ir-after-failure 2>&1 | FileCheck %s --check-prefix=CHECK-GENERIC -// RUN: not mlir-opt %s -pass-pipeline='func.func(test-pass-create-invalid-ir{emit-invalid-ir=false signal-pass-failure=true})' -mlir-print-ir-after-failure 2>&1 | FileCheck %s --check-prefix=CHECK-CUSTOM -// RUN: mlir-opt %s -pass-pipeline='func.func(test-pass-create-invalid-ir{emit-invalid-ir=false signal-pass-failure=false})' -mlir-print-ir-after-failure 2>&1 | FileCheck %s --check-prefix=CHECK-CUSTOM +// RUN: not mlir-opt %s -pass-pipeline='builtin.module(func.func(test-pass-create-invalid-ir{emit-invalid-ir=true signal-pass-failure=true}))' -mlir-print-ir-after-failure 2>&1 | FileCheck %s --check-prefix=CHECK-GENERIC +// RUN: not mlir-opt %s -pass-pipeline='builtin.module(func.func(test-pass-create-invalid-ir{emit-invalid-ir=true signal-pass-failure=false}))' -mlir-print-ir-after-failure 2>&1 | FileCheck %s --check-prefix=CHECK-GENERIC +// RUN: not mlir-opt %s -pass-pipeline='builtin.module(func.func(test-pass-create-invalid-ir{emit-invalid-ir=false signal-pass-failure=true}))' -mlir-print-ir-after-failure 2>&1 | FileCheck %s --check-prefix=CHECK-CUSTOM +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-pass-create-invalid-ir{emit-invalid-ir=false signal-pass-failure=false}))' -mlir-print-ir-after-failure 2>&1 | FileCheck %s --check-prefix=CHECK-CUSTOM // Check that `-mlir-print-assume-verified` will print custom even when the IR is invalid. -// RUN: not mlir-opt %s -pass-pipeline='func.func(test-pass-create-invalid-ir{emit-invalid-ir=true signal-pass-failure=true})' -mlir-print-ir-after-failure 2>&1 -mlir-print-assume-verified | FileCheck %s --check-prefix=CHECK-CUSTOM -// RUN: not mlir-opt %s -pass-pipeline='func.func(test-pass-create-invalid-ir{emit-invalid-ir=true signal-pass-failure=false})' -mlir-print-ir-after-failure 2>&1 -mlir-print-assume-verified | FileCheck %s --check-prefix=CHECK-CUSTOM +// RUN: not mlir-opt %s -pass-pipeline='builtin.module(func.func(test-pass-create-invalid-ir{emit-invalid-ir=true signal-pass-failure=true}))' -mlir-print-ir-after-failure 2>&1 -mlir-print-assume-verified | FileCheck %s --check-prefix=CHECK-CUSTOM +// RUN: not mlir-opt %s -pass-pipeline='builtin.module(func.func(test-pass-create-invalid-ir{emit-invalid-ir=true signal-pass-failure=false}))' -mlir-print-ir-after-failure 2>&1 -mlir-print-assume-verified | FileCheck %s --check-prefix=CHECK-CUSTOM // Test whether we print generically or not on pass failure, depending on whether there is invalid IR or not. diff --git a/mlir/test/Pass/invalid-parent.mlir b/mlir/test/Pass/invalid-parent.mlir index 1e2865dad1bf7..c6f54c43516fa 100644 --- a/mlir/test/Pass/invalid-parent.mlir +++ b/mlir/test/Pass/invalid-parent.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline='func.func(test-pass-invalid-parent)' -verify-diagnostics +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-pass-invalid-parent))' -verify-diagnostics // Test that we properly report errors when the parent becomes invalid after running a pass // on a child operation. diff --git a/mlir/test/Pass/invalid-pass.mlir b/mlir/test/Pass/invalid-pass.mlir index 5a8b8386ce150..c9e37cc4984af 100644 --- a/mlir/test/Pass/invalid-pass.mlir +++ b/mlir/test/Pass/invalid-pass.mlir @@ -1,4 +1,4 @@ -// RUN: not mlir-opt %s -pass-pipeline='builtin.module(test-module-pass{test-option=a})' 2>&1 | FileCheck %s +// RUN: not mlir-opt %s -pass-pipeline='builtin.module(builtin.module(test-module-pass{test-option=a}))' 2>&1 | FileCheck %s // CHECK: : no such option test-option // CHECK: failed to add `test-module-pass` with options `test-option=a` diff --git a/mlir/test/Pass/ir-printing.mlir b/mlir/test/Pass/ir-printing.mlir index bd506c2c7ebda..048b721ba6d53 100644 --- a/mlir/test/Pass/ir-printing.mlir +++ b/mlir/test/Pass/ir-printing.mlir @@ -1,10 +1,10 @@ -// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='func.func(cse,canonicalize)' -mlir-print-ir-before=cse -o /dev/null 2>&1 | FileCheck -check-prefix=BEFORE %s -// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='func.func(cse,canonicalize)' -mlir-print-ir-before-all -o /dev/null 2>&1 | FileCheck -check-prefix=BEFORE_ALL %s -// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='func.func(cse,canonicalize)' -mlir-print-ir-after=cse -o /dev/null 2>&1 | FileCheck -check-prefix=AFTER %s -// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='func.func(cse,canonicalize)' -mlir-print-ir-after-all -o /dev/null 2>&1 | FileCheck -check-prefix=AFTER_ALL %s -// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='func.func(cse,canonicalize)' -mlir-print-ir-before=cse -mlir-print-ir-module-scope -o /dev/null 2>&1 | FileCheck -check-prefix=BEFORE_MODULE %s -// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='func.func(cse,cse)' -mlir-print-ir-after-all -mlir-print-ir-after-change -o /dev/null 2>&1 | FileCheck -check-prefix=AFTER_ALL_CHANGE %s -// RUN: not mlir-opt %s -mlir-disable-threading=true -pass-pipeline='func.func(cse,test-pass-failure)' -mlir-print-ir-after-failure -o /dev/null 2>&1 | FileCheck -check-prefix=AFTER_FAILURE %s +// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='builtin.module(func.func(cse,canonicalize))' -mlir-print-ir-before=cse -o /dev/null 2>&1 | FileCheck -check-prefix=BEFORE %s +// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='builtin.module(func.func(cse,canonicalize))' -mlir-print-ir-before-all -o /dev/null 2>&1 | FileCheck -check-prefix=BEFORE_ALL %s +// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='builtin.module(func.func(cse,canonicalize))' -mlir-print-ir-after=cse -o /dev/null 2>&1 | FileCheck -check-prefix=AFTER %s +// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='builtin.module(func.func(cse,canonicalize))' -mlir-print-ir-after-all -o /dev/null 2>&1 | FileCheck -check-prefix=AFTER_ALL %s +// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='builtin.module(func.func(cse,canonicalize))' -mlir-print-ir-before=cse -mlir-print-ir-module-scope -o /dev/null 2>&1 | FileCheck -check-prefix=BEFORE_MODULE %s +// RUN: mlir-opt %s -mlir-disable-threading=true -pass-pipeline='builtin.module(func.func(cse,cse))' -mlir-print-ir-after-all -mlir-print-ir-after-change -o /dev/null 2>&1 | FileCheck -check-prefix=AFTER_ALL_CHANGE %s +// RUN: not mlir-opt %s -mlir-disable-threading=true -pass-pipeline='builtin.module(func.func(cse,test-pass-failure))' -mlir-print-ir-after-failure -o /dev/null 2>&1 | FileCheck -check-prefix=AFTER_FAILURE %s func.func @foo() { %0 = arith.constant 0 : i32 diff --git a/mlir/test/Pass/pass-timing.mlir b/mlir/test/Pass/pass-timing.mlir index 63cfa3702bdbc..bd5d611b47154 100644 --- a/mlir/test/Pass/pass-timing.mlir +++ b/mlir/test/Pass/pass-timing.mlir @@ -1,7 +1,7 @@ -// RUN: mlir-opt %s -mlir-disable-threading=true -verify-each=true -pass-pipeline='func.func(cse,canonicalize,cse)' -mlir-timing -mlir-timing-display=list 2>&1 | FileCheck -check-prefix=LIST %s -// RUN: mlir-opt %s -mlir-disable-threading=true -verify-each=true -pass-pipeline='func.func(cse,canonicalize,cse)' -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck -check-prefix=PIPELINE %s -// RUN: mlir-opt %s -mlir-disable-threading=false -verify-each=true -pass-pipeline='func.func(cse,canonicalize,cse)' -mlir-timing -mlir-timing-display=list 2>&1 | FileCheck -check-prefix=MT_LIST %s -// RUN: mlir-opt %s -mlir-disable-threading=false -verify-each=true -pass-pipeline='func.func(cse,canonicalize,cse)' -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck -check-prefix=MT_PIPELINE %s +// RUN: mlir-opt %s -mlir-disable-threading=true -verify-each=true -pass-pipeline='builtin.module(func.func(cse,canonicalize,cse))' -mlir-timing -mlir-timing-display=list 2>&1 | FileCheck -check-prefix=LIST %s +// RUN: mlir-opt %s -mlir-disable-threading=true -verify-each=true -pass-pipeline='builtin.module(func.func(cse,canonicalize,cse))' -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck -check-prefix=PIPELINE %s +// RUN: mlir-opt %s -mlir-disable-threading=false -verify-each=true -pass-pipeline='builtin.module(func.func(cse,canonicalize,cse))' -mlir-timing -mlir-timing-display=list 2>&1 | FileCheck -check-prefix=MT_LIST %s +// RUN: mlir-opt %s -mlir-disable-threading=false -verify-each=true -pass-pipeline='builtin.module(func.func(cse,canonicalize,cse))' -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck -check-prefix=MT_PIPELINE %s // RUN: mlir-opt %s -mlir-disable-threading=true -verify-each=false -test-pm-nested-pipeline -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck -check-prefix=NESTED_PIPELINE %s // LIST: Execution time report diff --git a/mlir/test/Pass/pipeline-options-parsing.mlir b/mlir/test/Pass/pipeline-options-parsing.mlir index 436dfce4d1d96..33bef75ee94a2 100644 --- a/mlir/test/Pass/pipeline-options-parsing.mlir +++ b/mlir/test/Pass/pipeline-options-parsing.mlir @@ -1,11 +1,11 @@ -// RUN: not mlir-opt %s -pass-pipeline='builtin.module(test-module-pass{)' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_1 %s -// RUN: not mlir-opt %s -pass-pipeline='builtin.module(test-module-pass{test-option=3})' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_2 %s -// RUN: not mlir-opt %s -pass-pipeline='builtin.module(func.func(test-options-pass{list=3}), test-module-pass{invalid-option=3})' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_3 %s -// RUN: not mlir-opt %s -pass-pipeline='test-options-pass{list=3 list=notaninteger}' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_4 %s -// RUN: mlir-opt %s -pass-pipeline='func.func(test-options-pass{list=1,2,3,4 list=5 string=value1 string=value2})' -// RUN: mlir-opt %s -verify-each=false -pass-pipeline='func.func(test-options-pass{string-list=a list=1,2,3,4 string-list=b,c list=5 string-list=d string=nested_pipeline{arg1=10 arg2=" {} " arg3=true}})' -dump-pass-pipeline 2>&1 | FileCheck --check-prefix=CHECK_1 %s +// RUN: not mlir-opt %s -pass-pipeline='builtin.module(builtin.module(test-module-pass{))' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_1 %s +// RUN: not mlir-opt %s -pass-pipeline='builtin.module(builtin.module(test-module-pass{test-option=3}))' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_2 %s +// RUN: not mlir-opt %s -pass-pipeline='builtin.module(builtin.module(func.func(test-options-pass{list=3}), test-module-pass{invalid-option=3}))' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_3 %s +// RUN: not mlir-opt %s -pass-pipeline='builtin.module(test-options-pass{list=3 list=notaninteger})' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_4 %s +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-options-pass{list=1,2,3,4 list=5 string=value1 string=value2}))' +// RUN: mlir-opt %s -verify-each=false -pass-pipeline='builtin.module(func.func(test-options-pass{string-list=a list=1,2,3,4 string-list=b,c list=5 string-list=d string=nested_pipeline{arg1=10 arg2=" {} " arg3=true}}))' -dump-pass-pipeline 2>&1 | FileCheck --check-prefix=CHECK_1 %s // RUN: mlir-opt %s -verify-each=false -test-options-pass-pipeline='list=1 string-list=a,b' -dump-pass-pipeline 2>&1 | FileCheck --check-prefix=CHECK_2 %s -// RUN: mlir-opt %s -verify-each=false -pass-pipeline='builtin.module(func.func(test-options-pass{list=3}), func.func(test-options-pass{list=1,2,3,4}))' -dump-pass-pipeline 2>&1 | FileCheck --check-prefix=CHECK_3 %s +// RUN: mlir-opt %s -verify-each=false -pass-pipeline='builtin.module(builtin.module(func.func(test-options-pass{list=3}), func.func(test-options-pass{list=1,2,3,4})))' -dump-pass-pipeline 2>&1 | FileCheck --check-prefix=CHECK_3 %s // CHECK_ERROR_1: missing closing '}' while processing pass options // CHECK_ERROR_2: no such option test-option diff --git a/mlir/test/Pass/pipeline-parsing.mlir b/mlir/test/Pass/pipeline-parsing.mlir index 77519e1ed1897..6291dd647391b 100644 --- a/mlir/test/Pass/pipeline-parsing.mlir +++ b/mlir/test/Pass/pipeline-parsing.mlir @@ -1,10 +1,10 @@ -// RUN: mlir-opt %s -mlir-disable-threading -pass-pipeline='builtin.module(test-module-pass,func.func(test-function-pass)),func.func(test-function-pass)' -pass-pipeline="func.func(cse,canonicalize)" -verify-each=false -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck %s +// RUN: mlir-opt %s -mlir-disable-threading -pass-pipeline='builtin.module(builtin.module(test-module-pass,func.func(test-function-pass)),func.func(test-function-pass),func.func(cse,canonicalize))' -verify-each=false -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck %s // RUN: mlir-opt %s -mlir-disable-threading -test-textual-pm-nested-pipeline -verify-each=false -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck %s --check-prefix=TEXTUAL_CHECK -// RUN: mlir-opt %s -mlir-disable-threading -pass-pipeline='builtin.module(test-module-pass),any(test-interface-pass),any(test-interface-pass),func.func(test-function-pass),any(canonicalize),func.func(cse)' -verify-each=false -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck %s --check-prefix=GENERIC_MERGE_CHECK -// RUN: not mlir-opt %s -pass-pipeline='builtin.module(test-module-pass' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_1 %s +// RUN: mlir-opt %s -mlir-disable-threading -pass-pipeline='builtin.module(builtin.module(test-module-pass),any(test-interface-pass),any(test-interface-pass),func.func(test-function-pass),any(canonicalize),func.func(cse))' -verify-each=false -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck %s --check-prefix=GENERIC_MERGE_CHECK +// RUN: not mlir-opt %s -pass-pipeline='any(builtin.module(test-module-pass)' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_1 %s // RUN: not mlir-opt %s -pass-pipeline='builtin.module(test-module-pass))' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_2 %s -// RUN: not mlir-opt %s -pass-pipeline='builtin.module()(' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_3 %s -// RUN: not mlir-opt %s -pass-pipeline=',' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_4 %s +// RUN: not mlir-opt %s -pass-pipeline='any(builtin.module()()' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_3 %s +// RUN: not mlir-opt %s -pass-pipeline='any(,)' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_4 %s // RUN: not mlir-opt %s -pass-pipeline='func.func(test-module-pass)' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_5 %s // CHECK_ERROR_1: encountered unbalanced parentheses while parsing pipeline @@ -13,6 +13,15 @@ // CHECK_ERROR_4: does not refer to a registered pass or pass pipeline // CHECK_ERROR_5: Can't add pass '{{.*}}TestModulePass' restricted to 'builtin.module' on a PassManager intended to run on 'func.func', did you intend to nest? +// RUN: not mlir-opt %s -pass-pipeline='' -cse 2>&1 | FileCheck --check-prefix=CHECK_ERROR_6 %s +// CHECK_ERROR_6: '-pass-pipeline' option can't be used with individual pass options + +// RUN: not mlir-opt %s -pass-pipeline='wrong-op()' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_7 %s +// CHECK_ERROR_7: can't run 'wrong-op' pass manager on 'builtin.module' op + +// RUN: mlir-opt %s -pass-pipeline='any(cse)' -dump-pass-pipeline 2>&1 | FileCheck %s -check-prefix=CHECK_ROUNDTRIP +// CHECK_ROUNDTRIP: any(cse) + func.func @foo() { return } diff --git a/mlir/test/Pass/pipeline-stats.mlir b/mlir/test/Pass/pipeline-stats.mlir index e71cb69fa99af..0f8b02c1057ea 100644 --- a/mlir/test/Pass/pipeline-stats.mlir +++ b/mlir/test/Pass/pipeline-stats.mlir @@ -1,6 +1,6 @@ // REQUIRES: asserts -// RUN: mlir-opt %s -verify-each=true -pass-pipeline='func.func(test-stats-pass,test-stats-pass)' -mlir-pass-statistics -mlir-pass-statistics-display=list 2>&1 | FileCheck -check-prefix=LIST %s -// RUN: mlir-opt %s -verify-each=true -pass-pipeline='func.func(test-stats-pass,test-stats-pass)' -mlir-pass-statistics -mlir-pass-statistics-display=pipeline 2>&1 | FileCheck -check-prefix=PIPELINE %s +// RUN: mlir-opt %s -verify-each=true -pass-pipeline='builtin.module(func.func(test-stats-pass,test-stats-pass))' -mlir-pass-statistics -mlir-pass-statistics-display=list 2>&1 | FileCheck -check-prefix=LIST %s +// RUN: mlir-opt %s -verify-each=true -pass-pipeline='builtin.module(func.func(test-stats-pass,test-stats-pass))' -mlir-pass-statistics -mlir-pass-statistics-display=pipeline 2>&1 | FileCheck -check-prefix=PIPELINE %s // LIST: Pass statistics report // LIST: TestStatisticPass diff --git a/mlir/test/Pass/run-reproducer.mlir b/mlir/test/Pass/run-reproducer.mlir index 6627033d8be7d..496471d032a52 100644 --- a/mlir/test/Pass/run-reproducer.mlir +++ b/mlir/test/Pass/run-reproducer.mlir @@ -1,3 +1,4 @@ +// RUN: mlir-opt %s -dump-pass-pipeline 2>&1 | FileCheck %s // RUN: mlir-opt %s -mlir-print-ir-before=cse 2>&1 | FileCheck -check-prefix=BEFORE %s func.func @foo() { @@ -12,7 +13,9 @@ func.func @bar() { {-# external_resources: { mlir_reproducer: { - pipeline: "func.func(cse,canonicalize)", + verify_each: true, + // CHECK: builtin.module(func.func(cse,canonicalize{ max-iterations=1 region-simplify=false top-down=false})) + pipeline: "builtin.module(func.func(cse,canonicalize{max-iterations=1 region-simplify=false top-down=false}))", disable_threading: true } } diff --git a/mlir/test/Target/LLVMIR/Import/basic.ll b/mlir/test/Target/LLVMIR/Import/basic.ll index 784491499fe2d..05d10eeaec9d8 100644 --- a/mlir/test/Target/LLVMIR/Import/basic.ll +++ b/mlir/test/Target/LLVMIR/Import/basic.ll @@ -24,8 +24,8 @@ @g4 = external global i32, align 8 ; CHECK: llvm.mlir.global internal constant @int_gep() {addr_space = 0 : i32, dso_local} : !llvm.ptr { -; CHECK-DAG: %[[addr:[0-9]+]] = llvm.mlir.addressof @g4 : !llvm.ptr -; CHECK-DAG: %[[c2:[0-9]+]] = llvm.mlir.constant(2 : i32) : i32 +; CHECK: %[[addr:[0-9]+]] = llvm.mlir.addressof @g4 : !llvm.ptr +; CHECK: %[[c2:[0-9]+]] = llvm.mlir.constant(2 : i32) : i32 ; CHECK-NEXT: %[[gepinit:[0-9]+]] = llvm.getelementptr %[[addr]][%[[c2]]] : (!llvm.ptr, i32) -> !llvm.ptr ; CHECK-NEXT: llvm.return %[[gepinit]] : !llvm.ptr ; CHECK-NEXT: } @@ -133,10 +133,10 @@ define internal spir_func void @spir_func_internal() { ; FIXME: function attributes. ; CHECK-LABEL: llvm.func internal @f1(%arg0: i64) -> i32 attributes {dso_local} { ; CHECK-DBG: llvm.func internal @f1(%arg0: i64 loc(unknown)) -> i32 attributes {dso_local} { -; CHECK-DAG: %[[c2:[0-9]+]] = llvm.mlir.constant(2 : i32) : i32 -; CHECK-DAG: %[[c42:[0-9]+]] = llvm.mlir.constant(42 : i32) : i32 -; CHECK-DAG: %[[c1:[0-9]+]] = llvm.mlir.constant(true) : i1 -; CHECK-DAG: %[[c43:[0-9]+]] = llvm.mlir.constant(43 : i32) : i32 +; CHECK: %[[c2:[0-9]+]] = llvm.mlir.constant(2 : i32) : i32 +; CHECK: %[[c1:[0-9]+]] = llvm.mlir.constant(true) : i1 +; CHECK: %[[c43:[0-9]+]] = llvm.mlir.constant(43 : i32) : i32 +; CHECK: %[[c42:[0-9]+]] = llvm.mlir.constant(42 : i32) : i32 define internal dso_local i32 @f1(i64 %a) norecurse { entry: ; CHECK: %{{[0-9]+}} = llvm.inttoptr %arg0 : i64 to !llvm.ptr @@ -148,7 +148,7 @@ entry: ; %{{[0-9]+}} = llvm.ptrtoint %[[addrof2]] : !llvm.ptr to i64 ; %{{[0-9]+}} = llvm.getelementptr %[[addrof]][%3] : (!llvm.ptr, i32) -> !llvm.ptr %bb = ptrtoint double* @g2 to i64 - %cc = getelementptr double, double* @g2, i32 2 + %cc = getelementptr double, double* @g2, i32 3 ; CHECK: %[[b:[0-9]+]] = llvm.trunc %arg0 : i64 to i32 ; CHECK-DBG: llvm.trunc %arg0 : i64 to i32 loc(#[[UNKNOWNLOC]]) %b = trunc i64 %a to i32 @@ -195,18 +195,18 @@ define void @f6(void (i16) *%fn) { ; Testing rest of the floating point constant kinds. ; CHECK-LABEL: llvm.func @FPConstant(%arg0: f16, %arg1: bf16, %arg2: f128, %arg3: f80) define void @FPConstant(half %a, bfloat %b, fp128 %c, x86_fp80 %d) { - ; CHECK-DAG: %[[C0:.+]] = llvm.mlir.constant(7.000000e+00 : f80) : f80 - ; CHECK-DAG: %[[C1:.+]] = llvm.mlir.constant(0.000000e+00 : f128) : f128 - ; CHECK-DAG: %[[C2:.+]] = llvm.mlir.constant(1.000000e+00 : bf16) : bf16 - ; CHECK-DAG: %[[C3:.+]] = llvm.mlir.constant(1.000000e+00 : f16) : f16 + ; CHECK: %[[C0:.+]] = llvm.mlir.constant(1.000000e+00 : f16) : f16 + ; CHECK: %[[C1:.+]] = llvm.mlir.constant(1.000000e+00 : bf16) : bf16 + ; CHECK: %[[C2:.+]] = llvm.mlir.constant(0.000000e+00 : f128) : f128 + ; CHECK: %[[C3:.+]] = llvm.mlir.constant(7.000000e+00 : f80) : f80 - ; CHECK: llvm.fadd %[[C3]], %arg0 : f16 + ; CHECK: llvm.fadd %[[C0]], %arg0 : f16 %1 = fadd half 1.0, %a - ; CHECK: llvm.fadd %[[C2]], %arg1 : bf16 + ; CHECK: llvm.fadd %[[C1]], %arg1 : bf16 %2 = fadd bfloat 1.0, %b - ; CHECK: llvm.fadd %[[C1]], %arg2 : f128 + ; CHECK: llvm.fadd %[[C2]], %arg2 : f128 %3 = fadd fp128 0xL00000000000000000000000000000000, %c - ; CHECK: llvm.fadd %[[C0]], %arg3 : f80 + ; CHECK: llvm.fadd %[[C3]], %arg3 : f80 %4 = fadd x86_fp80 0xK4001E000000000000000, %d ret void } diff --git a/mlir/test/Target/LLVMIR/Import/constant-aggregate.ll b/mlir/test/Target/LLVMIR/Import/constant-aggregate.ll index 5e22aadcaff0e..955cf47779664 100644 --- a/mlir/test/Target/LLVMIR/Import/constant-aggregate.ll +++ b/mlir/test/Target/LLVMIR/Import/constant-aggregate.ll @@ -1,41 +1,40 @@ ; RUN: mlir-translate --import-llvm %s | FileCheck %s - -; CHECK-DAG: %[[C0:.+]] = llvm.mlir.constant(7 : i32) : i32 -; CHECK-DAG: %[[C1:.+]] = llvm.mlir.constant(8 : i16) : i16 -; CHECK-DAG: %[[C2:.+]] = llvm.mlir.constant(4 : i8) : i8 -; CHECK-DAG: %[[C3:.+]] = llvm.mlir.constant(9 : i32) : i32 ; CHECK: %[[ROOT:.+]] = llvm.mlir.undef : !llvm.struct<"SimpleAggType", (i32, i8, i16, i32)> -; CHECK: %[[CHAIN0:.+]] = llvm.insertvalue %[[C3]], %[[ROOT]][0] -; CHECK: %[[CHAIN1:.+]] = llvm.insertvalue %[[C2]], %[[CHAIN0]][1] -; CHECK: %[[CHAIN2:.+]] = llvm.insertvalue %[[C1]], %[[CHAIN1]][2] -; CHECK: %[[CHAIN3:.+]] = llvm.insertvalue %[[C0]], %[[CHAIN2]][3] +; CHECK: %[[C0:.+]] = llvm.mlir.constant(9 : i32) : i32 +; CHECK: %[[CHAIN0:.+]] = llvm.insertvalue %[[C0]], %[[ROOT]][0] +; CHECK: %[[C1:.+]] = llvm.mlir.constant(4 : i8) : i8 +; CHECK: %[[CHAIN1:.+]] = llvm.insertvalue %[[C1]], %[[CHAIN0]][1] +; CHECK: %[[C2:.+]] = llvm.mlir.constant(8 : i16) : i16 +; CHECK: %[[CHAIN2:.+]] = llvm.insertvalue %[[C2]], %[[CHAIN1]][2] +; CHECK: %[[C3:.+]] = llvm.mlir.constant(7 : i32) : i32 +; CHECK: %[[CHAIN3:.+]] = llvm.insertvalue %[[C3]], %[[CHAIN2]][3] ; CHECK: llvm.return %[[CHAIN3]] %SimpleAggType = type {i32, i8, i16, i32} @simpleAgg = global %SimpleAggType {i32 9, i8 4, i16 8, i32 7} -; CHECK: %[[NP:.+]] = llvm.mlir.null : !llvm.ptr> -; CHECK-DAG: %[[C0:.+]] = llvm.mlir.constant(4 : i32) : i32 -; CHECK-DAG: %[[C1:.+]] = llvm.mlir.constant(3 : i16) : i16 -; CHECK-DAG: %[[C2:.+]] = llvm.mlir.constant(2 : i8) : i8 -; CHECK-DAG: %[[C3:.+]] = llvm.mlir.constant(1 : i32) : i32 -; CHECK: %[[ROOT:.+]] = llvm.mlir.undef : !llvm.struct<"SimpleAggType", (i32, i8, i16, i32)> -; CHECK: %[[CHAIN0:.+]] = llvm.insertvalue %[[C3]], %[[ROOT]][0] +; CHECK: %[[ROOT:.+]] = llvm.mlir.undef : !llvm.struct<"NestedAggType", (struct<"SimpleAggType", (i32, i8, i16, i32)>, ptr>)> +; CHECK: %[[NESTED:.+]] = llvm.mlir.undef : !llvm.struct<"SimpleAggType", (i32, i8, i16, i32)> +; CHECK: %[[C1:.+]] = llvm.mlir.constant(1 : i32) : i32 +; CHECK: %[[CHAIN0:.+]] = llvm.insertvalue %[[C1]], %[[NESTED]][0] +; CHECK: %[[C2:.+]] = llvm.mlir.constant(2 : i8) : i8 ; CHECK: %[[CHAIN1:.+]] = llvm.insertvalue %[[C2]], %[[CHAIN0]][1] -; CHECK: %[[CHAIN2:.+]] = llvm.insertvalue %[[C1]], %[[CHAIN1]][2] -; CHECK: %[[CHAIN3:.+]] = llvm.insertvalue %[[C0]], %[[CHAIN2]][3] -; CHECK: %[[ROOT2:.+]] = llvm.mlir.undef : !llvm.struct<"NestedAggType", (struct<"SimpleAggType", (i32, i8, i16, i32)>, ptr>)> -; CHECK: %[[CHAIN4:.+]] = llvm.insertvalue %[[CHAIN3]], %[[ROOT2]][0] +; CHECK: %[[C3:.+]] = llvm.mlir.constant(3 : i16) : i16 +; CHECK: %[[CHAIN2:.+]] = llvm.insertvalue %[[C3]], %[[CHAIN1]][2] +; CHECK: %[[C4:.+]] = llvm.mlir.constant(4 : i32) : i32 +; CHECK: %[[CHAIN3:.+]] = llvm.insertvalue %[[C4]], %[[CHAIN2]][3] +; CHECK: %[[CHAIN4:.+]] = llvm.insertvalue %[[CHAIN3]], %[[ROOT]][0] +; CHECK: %[[NP:.+]] = llvm.mlir.null : !llvm.ptr> ; CHECK: %[[CHAIN5:.+]] = llvm.insertvalue %[[NP]], %[[CHAIN4]][1] ; CHECK: llvm.return %[[CHAIN5]] %NestedAggType = type {%SimpleAggType, %SimpleAggType*} @nestedAgg = global %NestedAggType { %SimpleAggType{i32 1, i8 2, i16 3, i32 4}, %SimpleAggType* null } -; CHECK: %[[C0:.+]] = llvm.mlir.null : !llvm.ptr> -; CHECK: %[[C1:.+]] = llvm.mlir.null : !llvm.ptr> ; CHECK: %[[ROOT:.+]] = llvm.mlir.undef : !llvm.vec<2 x ptr>> +; CHECK: %[[C0:.+]] = llvm.mlir.null : !llvm.ptr> ; CHECK: %[[P0:.+]] = llvm.mlir.constant(0 : i32) : i32 -; CHECK: %[[CHAIN0:.+]] = llvm.insertelement %[[C1]], %[[ROOT]][%[[P0]] : i32] : !llvm.vec<2 x ptr>> +; CHECK: %[[CHAIN0:.+]] = llvm.insertelement %[[C0]], %[[ROOT]][%[[P0]] : i32] : !llvm.vec<2 x ptr>> +; CHECK: %[[C1:.+]] = llvm.mlir.null : !llvm.ptr> ; CHECK: %[[P1:.+]] = llvm.mlir.constant(1 : i32) : i32 -; CHECK: %[[CHAIN1:.+]] = llvm.insertelement %[[C0]], %[[CHAIN0]][%[[P1]] : i32] : !llvm.vec<2 x ptr>> +; CHECK: %[[CHAIN1:.+]] = llvm.insertelement %[[C1]], %[[CHAIN0]][%[[P1]] : i32] : !llvm.vec<2 x ptr>> ; CHECK: llvm.return %[[CHAIN1]] : !llvm.vec<2 x ptr>> @vectorAgg = global <2 x %SimpleAggType*> <%SimpleAggType* null, %SimpleAggType* null> diff --git a/mlir/test/Target/LLVMIR/Import/incorrect-constant-caching.ll b/mlir/test/Target/LLVMIR/Import/incorrect-constant-caching.ll index a4add0ea95414..afb8cf4e4c1a9 100644 --- a/mlir/test/Target/LLVMIR/Import/incorrect-constant-caching.ll +++ b/mlir/test/Target/LLVMIR/Import/incorrect-constant-caching.ll @@ -8,23 +8,22 @@ ; only wrote minimum level of checks. %my_struct = type {i32, i8*} +; CHECK: llvm.mlir.addressof @str0 : !llvm.ptr> ; CHECK: llvm.mlir.addressof @str1 : !llvm.ptr> -; CHECK: llvm.getelementptr -; CHECK: llvm.mlir.constant(7 : i32) : i32 +; CHECK: llvm.mlir.undef : !llvm.array<2 x struct<"my_struct", (i32, ptr)>> ; CHECK: llvm.mlir.undef : !llvm.struct<"my_struct", (i32, ptr)> +; CHECK: llvm.mlir.constant(8 : i32) : i32 ; CHECK: llvm.insertvalue -; CHECK: llvm.insertvalue -; CHECK: llvm.mlir.addressof @str0 : !llvm.ptr> ; CHECK: llvm.getelementptr -; CHECK: llvm.mlir.constant(8 : i32) : i32 -; CHECK: llvm.mlir.undef : !llvm.struct<"my_struct", (i32, ptr)> ; CHECK: llvm.insertvalue ; CHECK: llvm.insertvalue -; CHECK: llvm.mlir.undef : !llvm.array<2 x struct<"my_struct", (i32, ptr)>> +; CHECK: llvm.mlir.undef : !llvm.struct<"my_struct", (i32, ptr)> +; CHECK: llvm.mlir.constant(7 : i32) : i32 +; CHECK: llvm.insertvalue +; CHECK: llvm.getelementptr ; CHECK: llvm.insertvalue ; CHECK: llvm.insertvalue ; CHECK: llvm.return @str0 = private unnamed_addr constant [5 x i8] c"aaaa\00" @str1 = private unnamed_addr constant [5 x i8] c"bbbb\00" @g = global [2 x %my_struct] [%my_struct {i32 8, i8* getelementptr ([5 x i8], [5 x i8]* @str0, i32 0, i32 0)}, %my_struct {i32 7, i8* getelementptr ([5 x i8], [5 x i8]* @str1, i32 0, i32 0)}] - diff --git a/mlir/test/Target/LLVMIR/Import/incorrect-constexpr-inst-caching.ll b/mlir/test/Target/LLVMIR/Import/incorrect-constexpr-inst-caching.ll index edc8379067451..916b961c568e7 100644 --- a/mlir/test/Target/LLVMIR/Import/incorrect-constexpr-inst-caching.ll +++ b/mlir/test/Target/LLVMIR/Import/incorrect-constexpr-inst-caching.ll @@ -5,27 +5,26 @@ ; Thus, we only wrote minimum level of checks. %my_struct = type {i32, i8*} -; CHECK: llvm.mlir.constant(3 : i32) : i32 -; CHECK: llvm.mlir.constant(2 : i32) : i32 +; CHECK: llvm.mlir.addressof @str0 : !llvm.ptr> +; CHECK: llvm.mlir.constant(0 : i32) : i32 +; CHECK: llvm.mlir.constant(1 : i32) : i32 ; CHECK: llvm.mlir.addressof @str1 : !llvm.ptr> -; CHECK: llvm.getelementptr -; CHECK: llvm.mlir.constant(7 : i32) : i32 +; CHECK: llvm.mlir.constant(2 : i32) : i32 +; CHECK: llvm.mlir.constant(3 : i32) : i32 +; CHECK: llvm.mlir.undef : !llvm.array<2 x struct<"my_struct", (i32, ptr)>> ; CHECK: llvm.mlir.undef : !llvm.struct<"my_struct", (i32, ptr)> +; CHECK: llvm.mlir.constant(8 : i32) : i32 ; CHECK: llvm.insertvalue -; CHECK: llvm.insertvalue -; CHECK: llvm.mlir.constant(1 : i32) : i32 -; CHECK: llvm.mlir.constant(0 : i32) : i32 -; CHECK: llvm.mlir.addressof @str0 : !llvm.ptr> ; CHECK: llvm.getelementptr -; CHECK: llvm.mlir.constant(8 : i32) : i32 -; CHECK: llvm.mlir.undef : !llvm.struct<"my_struct", (i32, ptr)> ; CHECK: llvm.insertvalue ; CHECK: llvm.insertvalue -; CHECK: llvm.mlir.undef : !llvm.array<2 x struct<"my_struct", (i32, ptr)>> +; CHECK: llvm.mlir.undef : !llvm.struct<"my_struct", (i32, ptr)> +; CHECK: llvm.mlir.constant(7 : i32) : i32 +; CHECK: llvm.insertvalue +; CHECK: llvm.getelementptr ; CHECK: llvm.insertvalue ; CHECK: llvm.insertvalue ; CHECK: llvm.return @str0 = private unnamed_addr constant [5 x i8] c"aaaa\00" @str1 = private unnamed_addr constant [5 x i8] c"bbbb\00" @g = global [2 x %my_struct] [%my_struct {i32 8, i8* getelementptr ([5 x i8], [5 x i8]* @str0, i32 0, i32 1)}, %my_struct {i32 7, i8* getelementptr ([5 x i8], [5 x i8]* @str1, i32 2, i32 3)}] - diff --git a/mlir/test/Target/LLVMIR/Import/instructions.ll b/mlir/test/Target/LLVMIR/Import/instructions.ll index fa3c5efa4f40d..c5322ab6f84f3 100644 --- a/mlir/test/Target/LLVMIR/Import/instructions.ll +++ b/mlir/test/Target/LLVMIR/Import/instructions.ll @@ -6,8 +6,8 @@ ; CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]] ; CHECK-SAME: %[[ARG4:[a-zA-Z0-9]+]] define void @integer_arith(i32 %arg1, i32 %arg2, i64 %arg3, i64 %arg4) { - ; CHECK-DAG: %[[C1:[0-9]+]] = llvm.mlir.constant(-7 : i32) : i32 - ; CHECK-DAG: %[[C2:[0-9]+]] = llvm.mlir.constant(42 : i32) : i32 + ; CHECK: %[[C1:[0-9]+]] = llvm.mlir.constant(-7 : i32) : i32 + ; CHECK: %[[C2:[0-9]+]] = llvm.mlir.constant(42 : i32) : i32 ; CHECK: llvm.add %[[ARG1]], %[[C1]] : i32 %1 = add i32 %arg1, -7 ; CHECK: llvm.add %[[C2]], %[[ARG2]] : i32 @@ -75,13 +75,13 @@ define i1 @integer_compare(i32 %arg1, i32 %arg2, <4 x i64> %arg3, <4 x i64> %arg ; CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]] ; CHECK-SAME: %[[ARG4:[a-zA-Z0-9]+]] define void @fp_arith(float %arg1, float %arg2, double %arg3, double %arg4) { - ; CHECK: %[[C1:[0-9]+]] = llvm.mlir.constant(3.030000e+01 : f64) : f64 - ; CHECK: %[[C2:[0-9]+]] = llvm.mlir.constant(3.030000e+01 : f32) : f32 - ; CHECK: llvm.fadd %[[C2]], %[[ARG1]] : f32 + ; CHECK: %[[C1:[0-9]+]] = llvm.mlir.constant(3.030000e+01 : f32) : f32 + ; CHECK: %[[C2:[0-9]+]] = llvm.mlir.constant(3.030000e+01 : f64) : f64 + ; CHECK: llvm.fadd %[[C1]], %[[ARG1]] : f32 %1 = fadd float 0x403E4CCCC0000000, %arg1 ; CHECK: llvm.fadd %[[ARG1]], %[[ARG2]] : f32 %2 = fadd float %arg1, %arg2 - ; CHECK: llvm.fadd %[[C1]], %[[ARG3]] : f64 + ; CHECK: llvm.fadd %[[C2]], %[[ARG3]] : f64 %3 = fadd double 3.030000e+01, %arg3 ; CHECK: llvm.fsub %[[ARG1]], %[[ARG2]] : f32 %4 = fsub float %arg1, %arg2 @@ -212,8 +212,8 @@ define ptr addrspace(2) @addrspace_casts(ptr addrspace(1) %arg1) { ; CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]] ; CHECK-SAME: %[[ARG4:[a-zA-Z0-9]+]] define void @integer_arith(i32 %arg1, i32 %arg2, i64 %arg3, i64 %arg4) { - ; CHECK-DAG: %[[C1:[0-9]+]] = llvm.mlir.constant(-7 : i32) : i32 - ; CHECK-DAG: %[[C2:[0-9]+]] = llvm.mlir.constant(42 : i32) : i32 + ; CHECK: %[[C1:[0-9]+]] = llvm.mlir.constant(-7 : i32) : i32 + ; CHECK: %[[C2:[0-9]+]] = llvm.mlir.constant(42 : i32) : i32 ; CHECK: llvm.add %[[ARG1]], %[[C1]] : i32 ; CHECK: llvm.add %[[C2]], %[[ARG2]] : i32 ; CHECK: llvm.sub %[[ARG3]], %[[ARG4]] : i64 diff --git a/mlir/test/Target/LLVMIR/Import/intrinsic.ll b/mlir/test/Target/LLVMIR/Import/intrinsic.ll index d8fca91860053..550203b543c59 100644 --- a/mlir/test/Target/LLVMIR/Import/intrinsic.ll +++ b/mlir/test/Target/LLVMIR/Import/intrinsic.ll @@ -1,18 +1,14 @@ ; RUN: mlir-translate -import-llvm %s | FileCheck %s -define void @intrinsics() { - ret void -} - ; CHECK-LABEL: llvm.func @fmuladd_test define void @fmuladd_test(float %0, float %1, <8 x float> %2, i8* %3) { - ; CHECK: "llvm.intr.fmuladd"(%{{.*}}, %{{.*}}, %{{.*}}) : (f32, f32, f32) -> f32 + ; CHECK: llvm.intr.fmuladd(%{{.*}}, %{{.*}}, %{{.*}}) : (f32, f32, f32) -> f32 %5 = call float @llvm.fmuladd.f32(float %0, float %1, float %0) - ; CHECK: "llvm.intr.fmuladd"(%{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>, vector<8xf32>) -> vector<8xf32> + ; CHECK: llvm.intr.fmuladd(%{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>, vector<8xf32>) -> vector<8xf32> %6 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %2, <8 x float> %2, <8 x float> %2) - ; CHECK: "llvm.intr.fma"(%{{.*}}, %{{.*}}, %{{.*}}) : (f32, f32, f32) -> f32 + ; CHECK: llvm.intr.fma(%{{.*}}, %{{.*}}, %{{.*}}) : (f32, f32, f32) -> f32 %7 = call float @llvm.fma.f32(float %0, float %1, float %0) - ; CHECK: "llvm.intr.fma"(%{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>, vector<8xf32>) -> vector<8xf32> + ; CHECK: llvm.intr.fma(%{{.*}}, %{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>, vector<8xf32>) -> vector<8xf32> %8 = call <8 x float> @llvm.fma.v8f32(<8 x float> %2, <8 x float> %2, <8 x float> %2) ; CHECK: "llvm.intr.prefetch"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr, i32, i32, i32) -> () call void @llvm.prefetch.p0i8(i8* %3, i32 0, i32 3, i32 1) @@ -21,212 +17,212 @@ define void @fmuladd_test(float %0, float %1, <8 x float> %2, i8* %3) { ; CHECK-LABEL: llvm.func @exp_test define void @exp_test(float %0, <8 x float> %1) { - ; CHECK: "llvm.intr.exp"(%{{.*}}) : (f32) -> f32 + ; CHECK: llvm.intr.exp(%{{.*}}) : (f32) -> f32 %3 = call float @llvm.exp.f32(float %0) - ; CHECK: "llvm.intr.exp"(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> + ; CHECK: llvm.intr.exp(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> %4 = call <8 x float> @llvm.exp.v8f32(<8 x float> %1) ret void } ; CHECK-LABEL: llvm.func @exp2_test define void @exp2_test(float %0, <8 x float> %1) { - ; CHECK: "llvm.intr.exp2"(%{{.*}}) : (f32) -> f32 + ; CHECK: llvm.intr.exp2(%{{.*}}) : (f32) -> f32 %3 = call float @llvm.exp2.f32(float %0) - ; CHECK: "llvm.intr.exp2"(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> + ; CHECK: llvm.intr.exp2(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> %4 = call <8 x float> @llvm.exp2.v8f32(<8 x float> %1) ret void } ; CHECK-LABEL: llvm.func @log_test define void @log_test(float %0, <8 x float> %1) { - ; CHECK: "llvm.intr.log"(%{{.*}}) : (f32) -> f32 + ; CHECK: llvm.intr.log(%{{.*}}) : (f32) -> f32 %3 = call float @llvm.log.f32(float %0) - ; CHECK: "llvm.intr.log"(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> + ; CHECK: llvm.intr.log(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> %4 = call <8 x float> @llvm.log.v8f32(<8 x float> %1) ret void } ; CHECK-LABEL: llvm.func @log10_test define void @log10_test(float %0, <8 x float> %1) { - ; CHECK: "llvm.intr.log10"(%{{.*}}) : (f32) -> f32 + ; CHECK: llvm.intr.log10(%{{.*}}) : (f32) -> f32 %3 = call float @llvm.log10.f32(float %0) - ; CHECK: "llvm.intr.log10"(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> + ; CHECK: llvm.intr.log10(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> %4 = call <8 x float> @llvm.log10.v8f32(<8 x float> %1) ret void } ; CHECK-LABEL: llvm.func @log2_test define void @log2_test(float %0, <8 x float> %1) { - ; CHECK: "llvm.intr.log2"(%{{.*}}) : (f32) -> f32 + ; CHECK: llvm.intr.log2(%{{.*}}) : (f32) -> f32 %3 = call float @llvm.log2.f32(float %0) - ; CHECK: "llvm.intr.log2"(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> + ; CHECK: llvm.intr.log2(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> %4 = call <8 x float> @llvm.log2.v8f32(<8 x float> %1) ret void } ; CHECK-LABEL: llvm.func @fabs_test define void @fabs_test(float %0, <8 x float> %1) { - ; CHECK: "llvm.intr.fabs"(%{{.*}}) : (f32) -> f32 + ; CHECK: llvm.intr.fabs(%{{.*}}) : (f32) -> f32 %3 = call float @llvm.fabs.f32(float %0) - ; CHECK: "llvm.intr.fabs"(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> + ; CHECK: llvm.intr.fabs(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> %4 = call <8 x float> @llvm.fabs.v8f32(<8 x float> %1) ret void } ; CHECK-LABEL: llvm.func @sqrt_test define void @sqrt_test(float %0, <8 x float> %1) { - ; CHECK: "llvm.intr.sqrt"(%{{.*}}) : (f32) -> f32 + ; CHECK: llvm.intr.sqrt(%{{.*}}) : (f32) -> f32 %3 = call float @llvm.sqrt.f32(float %0) - ; CHECK: "llvm.intr.sqrt"(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> + ; CHECK: llvm.intr.sqrt(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> %4 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %1) ret void } ; CHECK-LABEL: llvm.func @ceil_test define void @ceil_test(float %0, <8 x float> %1) { - ; CHECK: "llvm.intr.ceil"(%{{.*}}) : (f32) -> f32 + ; CHECK: llvm.intr.ceil(%{{.*}}) : (f32) -> f32 %3 = call float @llvm.ceil.f32(float %0) - ; CHECK: "llvm.intr.ceil"(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> + ; CHECK: llvm.intr.ceil(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> %4 = call <8 x float> @llvm.ceil.v8f32(<8 x float> %1) ret void } ; CHECK-LABEL: llvm.func @floor_test define void @floor_test(float %0, <8 x float> %1) { - ; CHECK: "llvm.intr.floor"(%{{.*}}) : (f32) -> f32 + ; CHECK: llvm.intr.floor(%{{.*}}) : (f32) -> f32 %3 = call float @llvm.floor.f32(float %0) - ; CHECK: "llvm.intr.floor"(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> + ; CHECK: llvm.intr.floor(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> %4 = call <8 x float> @llvm.floor.v8f32(<8 x float> %1) ret void } ; CHECK-LABEL: llvm.func @cos_test define void @cos_test(float %0, <8 x float> %1) { - ; CHECK: "llvm.intr.cos"(%{{.*}}) : (f32) -> f32 + ; CHECK: llvm.intr.cos(%{{.*}}) : (f32) -> f32 %3 = call float @llvm.cos.f32(float %0) - ; CHECK: "llvm.intr.cos"(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> + ; CHECK: llvm.intr.cos(%{{.*}}) : (vector<8xf32>) -> vector<8xf32> %4 = call <8 x float> @llvm.cos.v8f32(<8 x float> %1) ret void } ; CHECK-LABEL: llvm.func @copysign_test define void @copysign_test(float %0, float %1, <8 x float> %2, <8 x float> %3) { - ; CHECK: "llvm.intr.copysign"(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 + ; CHECK: llvm.intr.copysign(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 %5 = call float @llvm.copysign.f32(float %0, float %1) - ; CHECK: "llvm.intr.copysign"(%{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> + ; CHECK: llvm.intr.copysign(%{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> %6 = call <8 x float> @llvm.copysign.v8f32(<8 x float> %2, <8 x float> %3) ret void } ; CHECK-LABEL: llvm.func @pow_test define void @pow_test(float %0, float %1, <8 x float> %2, <8 x float> %3) { - ; CHECK: "llvm.intr.pow"(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 + ; CHECK: llvm.intr.pow(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 %5 = call float @llvm.pow.f32(float %0, float %1) - ; CHECK: "llvm.intr.pow"(%{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> + ; CHECK: llvm.intr.pow(%{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> %6 = call <8 x float> @llvm.pow.v8f32(<8 x float> %2, <8 x float> %3) ret void } ; CHECK-LABEL: llvm.func @bitreverse_test define void @bitreverse_test(i32 %0, <8 x i32> %1) { - ; CHECK: "llvm.intr.bitreverse"(%{{.*}}) : (i32) -> i32 + ; CHECK: llvm.intr.bitreverse(%{{.*}}) : (i32) -> i32 %3 = call i32 @llvm.bitreverse.i32(i32 %0) - ; CHECK: "llvm.intr.bitreverse"(%{{.*}}) : (vector<8xi32>) -> vector<8xi32> + ; CHECK: llvm.intr.bitreverse(%{{.*}}) : (vector<8xi32>) -> vector<8xi32> %4 = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %1) ret void } ; CHECK-LABEL: llvm.func @ctlz_test define void @ctlz_test(i32 %0, <8 x i32> %1) { - ; CHECK-DAG: %[[falseval1:.+]] = llvm.mlir.constant(false) : i1 - ; CHECK-DAG: %[[falseval2:.+]] = llvm.mlir.constant(false) : i1 - ; CHECK: "llvm.intr.ctlz"(%{{.*}}, %[[falseval2]]) : (i32, i1) -> i32 + ; CHECK: %[[falseval1:.+]] = llvm.mlir.constant(false) : i1 + ; CHECK: %[[falseval2:.+]] = llvm.mlir.constant(false) : i1 + ; CHECK: "llvm.intr.ctlz"(%{{.*}}, %[[falseval1]]) : (i32, i1) -> i32 %3 = call i32 @llvm.ctlz.i32(i32 %0, i1 false) - ; CHECK: "llvm.intr.ctlz"(%{{.*}}, %[[falseval1]]) : (vector<8xi32>, i1) -> vector<8xi32> + ; CHECK: "llvm.intr.ctlz"(%{{.*}}, %[[falseval2]]) : (vector<8xi32>, i1) -> vector<8xi32> %4 = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %1, i1 false) ret void } ; CHECK-LABEL: llvm.func @cttz_test define void @cttz_test(i32 %0, <8 x i32> %1) { - ; CHECK-DAG: %[[falseval1:.+]] = llvm.mlir.constant(false) : i1 - ; CHECK-DAG: %[[falseval2:.+]] = llvm.mlir.constant(false) : i1 - ; CHECK: "llvm.intr.cttz"(%{{.*}}, %[[falseval2]]) : (i32, i1) -> i32 + ; CHECK: %[[falseval1:.+]] = llvm.mlir.constant(false) : i1 + ; CHECK: %[[falseval2:.+]] = llvm.mlir.constant(false) : i1 + ; CHECK: "llvm.intr.cttz"(%{{.*}}, %[[falseval1]]) : (i32, i1) -> i32 %3 = call i32 @llvm.cttz.i32(i32 %0, i1 false) - ; CHECK: "llvm.intr.cttz"(%{{.*}}, %[[falseval1]]) : (vector<8xi32>, i1) -> vector<8xi32> + ; CHECK: "llvm.intr.cttz"(%{{.*}}, %[[falseval2]]) : (vector<8xi32>, i1) -> vector<8xi32> %4 = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %1, i1 false) ret void } ; CHECK-LABEL: llvm.func @ctpop_test define void @ctpop_test(i32 %0, <8 x i32> %1) { - ; CHECK: "llvm.intr.ctpop"(%{{.*}}) : (i32) -> i32 + ; CHECK: llvm.intr.ctpop(%{{.*}}) : (i32) -> i32 %3 = call i32 @llvm.ctpop.i32(i32 %0) - ; CHECK: "llvm.intr.ctpop"(%{{.*}}) : (vector<8xi32>) -> vector<8xi32> + ; CHECK: llvm.intr.ctpop(%{{.*}}) : (vector<8xi32>) -> vector<8xi32> %4 = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %1) ret void } ; CHECK-LABEL: llvm.func @maximum_test define void @maximum_test(float %0, float %1, <8 x float> %2, <8 x float> %3) { - ; CHECK: "llvm.intr.maximum"(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 + ; CHECK: llvm.intr.maximum(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 %5 = call float @llvm.maximum.f32(float %0, float %1) - ; CHECK: "llvm.intr.maximum"(%{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> + ; CHECK: llvm.intr.maximum(%{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> %6 = call <8 x float> @llvm.maximum.v8f32(<8 x float> %2, <8 x float> %3) ret void } ; CHECK-LABEL: llvm.func @minimum_test define void @minimum_test(float %0, float %1, <8 x float> %2, <8 x float> %3) { - ; CHECK: "llvm.intr.minimum"(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 + ; CHECK: llvm.intr.minimum(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 %5 = call float @llvm.minimum.f32(float %0, float %1) - ; CHECK: "llvm.intr.minimum"(%{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> + ; CHECK: llvm.intr.minimum(%{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> %6 = call <8 x float> @llvm.minimum.v8f32(<8 x float> %2, <8 x float> %3) ret void } ; CHECK-LABEL: llvm.func @maxnum_test define void @maxnum_test(float %0, float %1, <8 x float> %2, <8 x float> %3) { - ; CHECK: "llvm.intr.maxnum"(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 + ; CHECK: llvm.intr.maxnum(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 %5 = call float @llvm.maxnum.f32(float %0, float %1) - ; CHECK: "llvm.intr.maxnum"(%{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> + ; CHECK: llvm.intr.maxnum(%{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> %6 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %2, <8 x float> %3) ret void } ; CHECK-LABEL: llvm.func @minnum_test define void @minnum_test(float %0, float %1, <8 x float> %2, <8 x float> %3) { - ; CHECK: "llvm.intr.minnum"(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 + ; CHECK: llvm.intr.minnum(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 %5 = call float @llvm.minnum.f32(float %0, float %1) - ; CHECK: "llvm.intr.minnum"(%{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> + ; CHECK: llvm.intr.minnum(%{{.*}}, %{{.*}}) : (vector<8xf32>, vector<8xf32>) -> vector<8xf32> %6 = call <8 x float> @llvm.minnum.v8f32(<8 x float> %2, <8 x float> %3) ret void } ; CHECK-LABEL: llvm.func @smax_test define void @smax_test(i32 %0, i32 %1, <8 x i32> %2, <8 x i32> %3) { - ; CHECK: "llvm.intr.smax"(%{{.*}}, %{{.*}}) : (i32, i32) -> i32 + ; CHECK: llvm.intr.smax(%{{.*}}, %{{.*}}) : (i32, i32) -> i32 %5 = call i32 @llvm.smax.i32(i32 %0, i32 %1) - ; CHECK: "llvm.intr.smax"(%{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>) -> vector<8xi32> + ; CHECK: llvm.intr.smax(%{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>) -> vector<8xi32> %6 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %2, <8 x i32> %3) ret void } ; CHECK-LABEL: llvm.func @smin_test define void @smin_test(i32 %0, i32 %1, <8 x i32> %2, <8 x i32> %3) { - ; CHECK: "llvm.intr.smin"(%{{.*}}, %{{.*}}) : (i32, i32) -> i32 + ; CHECK: llvm.intr.smin(%{{.*}}, %{{.*}}) : (i32, i32) -> i32 %5 = call i32 @llvm.smin.i32(i32 %0, i32 %1) - ; CHECK: "llvm.intr.smin"(%{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>) -> vector<8xi32> + ; CHECK: llvm.intr.smin(%{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>) -> vector<8xi32> %6 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %2, <8 x i32> %3) ret void } ; CHECK-LABEL: llvm.func @umax_test define void @umax_test(i32 %0, i32 %1, <8 x i32> %2, <8 x i32> %3) { - ; CHECK: "llvm.intr.umax"(%{{.*}}, %{{.*}}) : (i32, i32) -> i32 + ; CHECK: llvm.intr.umax(%{{.*}}, %{{.*}}) : (i32, i32) -> i32 %5 = call i32 @llvm.umax.i32(i32 %0, i32 %1) - ; CHECK: "llvm.intr.umax"(%{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>) -> vector<8xi32> + ; CHECK: llvm.intr.umax(%{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>) -> vector<8xi32> %6 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> %2, <8 x i32> %3) ret void } ; CHECK-LABEL: llvm.func @umin_test define void @umin_test(i32 %0, i32 %1, <8 x i32> %2, <8 x i32> %3) { - ; CHECK: "llvm.intr.umin"(%{{.*}}, %{{.*}}) : (i32, i32) -> i32 + ; CHECK: llvm.intr.umin(%{{.*}}, %{{.*}}) : (i32, i32) -> i32 %5 = call i32 @llvm.umin.i32(i32 %0, i32 %1) - ; CHECK: "llvm.intr.umin"(%{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>) -> vector<8xi32> + ; CHECK: llvm.intr.umin(%{{.*}}, %{{.*}}) : (vector<8xi32>, vector<8xi32>) -> vector<8xi32> %6 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> %2, <8 x i32> %3) ret void } @@ -340,9 +336,9 @@ define void @memcpy_test(i32 %0, i8* %1, i8* %2) { ; CHECK: %[[falseval1:.+]] = llvm.mlir.constant(false) : i1 ; CHECK: %[[constant:.+]] = llvm.mlir.constant(10 : i64) : i64 ; CHECK: %[[falseval2:.+]] = llvm.mlir.constant(false) : i1 - ; CHECK: "llvm.intr.memcpy"(%{{.*}}, %{{.*}}, %{{.*}}, %[[falseval2]]) : (!llvm.ptr, !llvm.ptr, i32, i1) -> () + ; CHECK: "llvm.intr.memcpy"(%{{.*}}, %{{.*}}, %{{.*}}, %[[falseval1]]) : (!llvm.ptr, !llvm.ptr, i32, i1) -> () call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %2, i32 %0, i1 false) - ; CHECK: "llvm.intr.memcpy.inline"(%{{.*}}, %{{.*}}, %[[constant]], %[[falseval1]]) : (!llvm.ptr, !llvm.ptr, i64, i1) -> () + ; CHECK: "llvm.intr.memcpy.inline"(%{{.*}}, %{{.*}}, %[[constant]], %[[falseval2]]) : (!llvm.ptr, !llvm.ptr, i64, i1) -> () call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* %1, i8* %2, i64 10, i1 false) ret void } diff --git a/mlir/test/Target/LLVMIR/Import/zeroinitializer.ll b/mlir/test/Target/LLVMIR/Import/zeroinitializer.ll index 3f582138b03b5..bc0e4cde45175 100644 --- a/mlir/test/Target/LLVMIR/Import/zeroinitializer.ll +++ b/mlir/test/Target/LLVMIR/Import/zeroinitializer.ll @@ -4,10 +4,10 @@ ; CHECK: llvm.mlir.global external @D() ; CHECK-SAME: !llvm.struct<"Domain", (ptr>>, ptr>)> -; CHECK-DAG: %[[E0:.+]] = llvm.mlir.null : !llvm.ptr>>, ptr>)>> -; CHECK-DAG: %[[E1:.+]] = llvm.mlir.null : !llvm.ptr>>, ptr>)>>> ; CHECK: %[[ROOT:.+]] = llvm.mlir.undef : !llvm.struct<"Domain", (ptr>>, ptr>)> -; CHECK: %[[CHAIN:.+]] = llvm.insertvalue %[[E1]], %[[ROOT]][0] -; CHECK: %[[RES:.+]] = llvm.insertvalue %[[E0]], %[[CHAIN]][1] +; CHECK: %[[E0:.+]] = llvm.mlir.null : !llvm.ptr>>, ptr>)>>> +; CHECK: %[[CHAIN:.+]] = llvm.insertvalue %[[E0]], %[[ROOT]][0] +; CHECK: %[[E1:.+]] = llvm.mlir.null : !llvm.ptr>>, ptr>)>> +; CHECK: %[[RES:.+]] = llvm.insertvalue %[[E1]], %[[CHAIN]][1] ; CHECK: llvm.return %[[RES]] @D = global %Domain zeroinitializer diff --git a/mlir/test/Target/LLVMIR/arm-neon-2d.mlir b/mlir/test/Target/LLVMIR/arm-neon-2d.mlir index 18d10c0497974..dfb4e4fd309a9 100644 --- a/mlir/test/Target/LLVMIR/arm-neon-2d.mlir +++ b/mlir/test/Target/LLVMIR/arm-neon-2d.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -pass-pipeline="func.func(arm-neon-2d-to-intr)" %s | FileCheck %s +// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(arm-neon-2d-to-intr))" %s | FileCheck %s // CHECK-LABEL: arm_neon_sdot2d_4x4_i8i8 func.func @arm_neon_sdot2d_4x4_i8i8(%a: vector<4xi32>, %b: vector<4x4xi8>, %c: vector<4x4xi8>) -> vector<4xi32> { diff --git a/mlir/test/Target/LLVMIR/llvmir-debug.mlir b/mlir/test/Target/LLVMIR/llvmir-debug.mlir index fd95e176e1941..72f8f746b9769 100644 --- a/mlir/test/Target/LLVMIR/llvmir-debug.mlir +++ b/mlir/test/Target/LLVMIR/llvmir-debug.mlir @@ -27,7 +27,12 @@ llvm.func @func_no_debug() { sourceLanguage = DW_LANG_C, file = #file, producer = "MLIR", isOptimized = true, emissionKind = Full > -#spType = #llvm.di_subroutine_type +#composite = #llvm.di_composite_type< + tag = DW_TAG_structure_type, name = "composite", file = #file, + line = 0, sizeInBits = 0, alignInBits = 0, + elements = #llvm.di_subrange +> +#spType = #llvm.di_subroutine_type #sp = #llvm.di_subprogram< compileUnit = #cu, scope = #file, name = "intrinsics", linkageName = "intrinsics", file = #file, line = 3, scopeLine = 3, subprogramFlags = "Definition|Optimized", type = #spType @@ -69,8 +74,11 @@ llvm.func @func_with_debug(%arg: i64) { // CHECK: ![[FUNC_LOC]] = distinct !DISubprogram(name: "intrinsics", linkageName: "intrinsics", scope: ![[CU_FILE_LOC]], file: ![[CU_FILE_LOC]], line: 3, type: ![[FUNC_TYPE:.*]], scopeLine: 3, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: ![[CU_LOC]]) // CHECK: ![[FUNC_TYPE]] = !DISubroutineType(cc: DW_CC_normal, types: ![[ARG_TYPES:.*]]) -// CHECK: ![[ARG_TYPES]] = !{![[ARG_TYPE:.*]]} +// CHECK: ![[ARG_TYPES]] = !{![[ARG_TYPE:.*]], ![[COMPOSITE_TYPE:.*]]} // CHECK: ![[ARG_TYPE]] = !DIBasicType(name: "si64", encoding: DW_ATE_signed) +// CHECK: ![[COMPOSITE_TYPE]] = !DICompositeType(tag: DW_TAG_structure_type, name: "composite", file: ![[CU_FILE_LOC]], elements: ![[COMPOSITE_ELEMENTS:.*]]) +// CHECK: ![[COMPOSITE_ELEMENTS]] = !{![[COMPOSITE_ELEMENT:.*]]} +// CHECK: ![[COMPOSITE_ELEMENT]] = !DISubrange(count: 4) // CHECK: ![[VAR_LOC]] = !DILocalVariable(name: "arg", arg: 1, scope: ![[VAR_SCOPE:.*]], file: ![[CU_FILE_LOC]], line: 6, type: ![[ARG_TYPE]]) // CHECK: ![[VAR_SCOPE]] = distinct !DILexicalBlockFile(scope: ![[FUNC_LOC]], file: ![[CU_FILE_LOC]], discriminator: 0) diff --git a/mlir/test/Target/LLVMIR/llvmir-invalid.mlir b/mlir/test/Target/LLVMIR/llvmir-invalid.mlir index 7217979dbf005..4627df83eb425 100644 --- a/mlir/test/Target/LLVMIR/llvmir-invalid.mlir +++ b/mlir/test/Target/LLVMIR/llvmir-invalid.mlir @@ -83,6 +83,20 @@ llvm.func @invalid_align(%arg0 : f32 {llvm.align = 4}) -> f32 { // ----- +// expected-error @below{{llvm.signext attribute attached to LLVM non-integer argument}} +llvm.func @invalid_signext(%arg0: f32 {llvm.signext}) { + "llvm.return"() : () -> () +} + +// ----- + +// expected-error @below{{llvm.zeroext attribute attached to LLVM non-integer argument}} +llvm.func @invalid_zeroext(%arg0: f32 {llvm.zeroext}) { + "llvm.return"() : () -> () +} + +// ----- + llvm.func @no_non_complex_struct() -> !llvm.array<2 x array<2 x array<2 x struct<(i32)>>>> { // expected-error @below{{expected struct type to be a complex number}} %0 = llvm.mlir.constant(dense<[[[1, 2], [3, 4]], [[42, 43], [44, 45]]]> : tensor<2x2x2xi32>) : !llvm.array<2 x array<2 x array<2 x struct<(i32)>>>> diff --git a/mlir/test/Target/LLVMIR/llvmir.mlir b/mlir/test/Target/LLVMIR/llvmir.mlir index bc9c287fa9a81..0ca8bb02c2bb1 100644 --- a/mlir/test/Target/LLVMIR/llvmir.mlir +++ b/mlir/test/Target/LLVMIR/llvmir.mlir @@ -1049,31 +1049,81 @@ llvm.func @llvm_noalias(%arg0: !llvm.ptr {llvm.noalias}) { llvm.return } +// CHECK-LABEL: declare void @llvm_noalias_decl(ptr noalias) +llvm.func @llvm_noalias_decl(!llvm.ptr {llvm.noalias}) + +// CHECK-LABEL: define void @byrefattr(ptr byref(i32) % +llvm.func @byrefattr(%arg0: !llvm.ptr {llvm.byref = i32}) { + llvm.return +} + +// CHECK-LABEL: declare void @byrefattr_decl(ptr byref(i32)) +llvm.func @byrefattr_decl(!llvm.ptr {llvm.byref = i32}) + // CHECK-LABEL: define void @byvalattr(ptr byval(i32) % llvm.func @byvalattr(%arg0: !llvm.ptr {llvm.byval = i32}) { llvm.return } +// CHECK-LABEL: declare void @byvalattr_decl(ptr byval(i32)) +llvm.func @byvalattr_decl(!llvm.ptr {llvm.byval = i32}) + // CHECK-LABEL: define void @sretattr(ptr sret(i32) % llvm.func @sretattr(%arg0: !llvm.ptr {llvm.sret = i32}) { llvm.return } +// CHECK-LABEL: declare void @sretattr_decl(ptr sret(i32)) +llvm.func @sretattr_decl(!llvm.ptr {llvm.sret = i32}) + // CHECK-LABEL: define void @nestattr(ptr nest % llvm.func @nestattr(%arg0: !llvm.ptr {llvm.nest}) { llvm.return } +// CHECK-LABEL: declare void @nestattr_decl(ptr nest) +llvm.func @nestattr_decl(!llvm.ptr {llvm.nest}) + // CHECK-LABEL: define void @noundefattr(i32 noundef % llvm.func @noundefattr(%arg0: i32 {llvm.noundef}) { llvm.return } +// CHECK-LABEL: declare void @noundefattr_decl(i32 noundef) +llvm.func @noundefattr_decl(i32 {llvm.noundef}) + // CHECK-LABEL: define void @llvm_align(ptr align 4 {{%*.}}) llvm.func @llvm_align(%arg0: !llvm.ptr {llvm.align = 4}) { llvm.return } +// CHECK-LABEL: declare void @llvm_align_decl(ptr align 4) +llvm.func @llvm_align_decl(!llvm.ptr {llvm.align = 4}) + +// CHECK-LABEL: define void @inallocaattr(ptr inalloca(i32) % +llvm.func @inallocaattr(%arg0: !llvm.ptr {llvm.inalloca = i32}) { + llvm.return +} + +// CHECK-LABEL: declare void @inallocaattr_decl(ptr inalloca(i32)) +llvm.func @inallocaattr_decl(!llvm.ptr {llvm.inalloca = i32}) + +// CHECK-LABEL: define void @signextattr(i1 signext % +llvm.func @signextattr(%arg0: i1 {llvm.signext}) { + llvm.return +} + +// CHECK-LABEL: declare void @signextattr_decl(i1 signext) +llvm.func @signextattr_decl(i1 {llvm.signext}) + +// CHECK-LABEL: define void @zeroextattr(i1 zeroext % +llvm.func @zeroextattr(%arg0: i1 {llvm.zeroext}) { + llvm.return +} + +// CHECK-LABEL: declare void @zeroextattr_decl(i1 zeroext) +llvm.func @zeroextattr_decl(i1 {llvm.zeroext}) + // CHECK-LABEL: @llvm_varargs(...) llvm.func @llvm_varargs(...) @@ -1680,6 +1730,17 @@ llvm.func @fastmathFlags(%arg0: f32) { %14 = llvm.call @fastmathFlagsFunc(%arg0) {fastmathFlags = #llvm.fastmath} : (f32) -> (f32) %15 = llvm.call @fastmathFlagsFunc(%arg0) {fastmathFlags = #llvm.fastmath} : (f32) -> (f32) %16 = llvm.call @fastmathFlagsFunc(%arg0) {fastmathFlags = #llvm.fastmath} : (f32) -> (f32) + +// CHECK: call fast float @llvm.copysign.f32(float {{.*}}, float {{.*}}) + %17 = "llvm.intr.copysign"(%arg0, %arg0) {fastmathFlags = #llvm.fastmath} : (f32, f32) -> f32 +// CHECK: call afn float @llvm.copysign.f32(float {{.*}}, float {{.*}}) + %18 = "llvm.intr.copysign"(%arg0, %arg0) {fastmathFlags = #llvm.fastmath} : (f32, f32) -> f32 + +// CHECK: call fast float @llvm.powi.f32.i32(float {{.*}}, i32 {{.*}}) + %exp = llvm.mlir.constant(1 : i32) : i32 + %19 = "llvm.intr.powi"(%arg0, %exp) {fastmathFlags = #llvm.fastmath} : (f32, i32) -> f32 +// CHECK: call afn float @llvm.powi.f32.i32(float {{.*}}, i32 {{.*}}) + %20 = "llvm.intr.powi"(%arg0, %exp) {fastmathFlags = #llvm.fastmath} : (f32, i32) -> f32 llvm.return } @@ -1948,5 +2009,5 @@ llvm.func @vararg_function(%arg0: i32, ...) { // Function attributes: readnone // CHECK: declare void @readnone_function() #[[ATTR:[0-9]+]] -// CHECK: attributes #[[ATTR]] = { readnone } +// CHECK: attributes #[[ATTR]] = { memory(none) } llvm.func @readnone_function() attributes {llvm.readnone} diff --git a/mlir/test/Target/LLVMIR/vector-to-llvm-ir.mlir b/mlir/test/Target/LLVMIR/vector-to-llvm-ir.mlir index 924842d6388dc..9f3eeb569060c 100644 --- a/mlir/test/Target/LLVMIR/vector-to-llvm-ir.mlir +++ b/mlir/test/Target/LLVMIR/vector-to-llvm-ir.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="convert-vector-to-llvm,func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" | mlir-translate -mlir-to-llvmir | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline="builtin.module(convert-vector-to-llvm,func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts)" | mlir-translate -mlir-to-llvmir | FileCheck %s func.func @genbool_1d() -> vector<8xi1> { %0 = vector.constant_mask [4] : vector<8xi1> diff --git a/mlir/test/Transforms/canonicalize-block-merge.mlir b/mlir/test/Transforms/canonicalize-block-merge.mlir index 3a19cac5d3070..d33c911e042d7 100644 --- a/mlir/test/Transforms/canonicalize-block-merge.mlir +++ b/mlir/test/Transforms/canonicalize-block-merge.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func.func(canonicalize)' -split-input-file | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(canonicalize))' -split-input-file | FileCheck %s // Check the simple case of single operation blocks with a return. diff --git a/mlir/test/Transforms/canonicalize-dce.mlir b/mlir/test/Transforms/canonicalize-dce.mlir index d118768764376..46545d2e9fd51 100644 --- a/mlir/test/Transforms/canonicalize-dce.mlir +++ b/mlir/test/Transforms/canonicalize-dce.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -pass-pipeline='func.func(canonicalize)' | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -pass-pipeline='builtin.module(func.func(canonicalize))' | FileCheck %s // Test case: Simple case of deleting a dead pure op. diff --git a/mlir/test/Transforms/canonicalize-td.mlir b/mlir/test/Transforms/canonicalize-td.mlir index 549b302534914..46fc4d085ba56 100644 --- a/mlir/test/Transforms/canonicalize-td.mlir +++ b/mlir/test/Transforms/canonicalize-td.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func.func(canonicalize{top-down=true})' | FileCheck %s --check-prefix=TD -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func.func(canonicalize)' | FileCheck %s --check-prefix=BU +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(canonicalize{top-down=true}))' | FileCheck %s --check-prefix=TD +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(canonicalize))' | FileCheck %s --check-prefix=BU // BU-LABEL: func @default_insertion_position diff --git a/mlir/test/Transforms/canonicalize.mlir b/mlir/test/Transforms/canonicalize.mlir index 20538cd6262d3..df1555db666bb 100644 --- a/mlir/test/Transforms/canonicalize.mlir +++ b/mlir/test/Transforms/canonicalize.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func.func(canonicalize)' -split-input-file | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(canonicalize))' -split-input-file | FileCheck %s // CHECK-LABEL: func @test_subi_zero func.func @test_subi_zero(%arg0: i32) -> i32 { diff --git a/mlir/test/Transforms/cse.mlir b/mlir/test/Transforms/cse.mlir index 7a8de218bdd82..dbc2d5efb36ad 100644 --- a/mlir/test/Transforms/cse.mlir +++ b/mlir/test/Transforms/cse.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func.func(cse)' | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(cse))' | FileCheck %s // CHECK-DAG: #[[$MAP:.*]] = affine_map<(d0) -> (d0 mod 2)> #map0 = affine_map<(d0) -> (d0 mod 2)> diff --git a/mlir/test/Transforms/loop-fusion-2.mlir b/mlir/test/Transforms/loop-fusion-2.mlir index 729e1dc2d9e80..c1fded7a16bb9 100644 --- a/mlir/test/Transforms/loop-fusion-2.mlir +++ b/mlir/test/Transforms/loop-fusion-2.mlir @@ -508,16 +508,16 @@ func.func @fuse_across_dim_mismatch(%arg0: memref<4x4x16x1xf32>, %arg1: memref<1 } return } -// MAXIMAL: #map = affine_map<(d0, d1) -> (d0 * 16 + d1)> +// MAXIMAL: #[[$MAP:.*]] = affine_map<(d0, d1) -> (d0 * 16 + d1)> // MAXIMAL-LABEL: func @fuse_across_dim_mismatch // MAXIMAL: memref.alloc() : memref<1x1xf32> // MAXIMAL: affine.for %{{.*}} = 0 to 9 { // MAXIMAL-NEXT: affine.for %{{.*}} = 0 to 9 { // MAXIMAL-NEXT: affine.for %{{.*}} = 0 to 4 { // MAXIMAL-NEXT: affine.for %{{.*}} = 0 to 16 { -// MAXIMAL-NEXT: affine.apply #map(%{{.*}}, %{{.*}}) +// MAXIMAL-NEXT: affine.apply #[[$MAP]](%{{.*}}, %{{.*}}) // MAXIMAL-NEXT: affine.store %{{.*}}, %{{.*}}[0, 0] : memref<1x1xf32> -// MAXIMAL-NEXT: affine.apply #map(%{{.*}}, %{{.*}}) +// MAXIMAL-NEXT: affine.apply #[[$MAP]](%{{.*}}, %{{.*}}) // MAXIMAL-NEXT: affine.load %{{.*}}[0, 0] : memref<1x1xf32> // MAXIMAL-NEXT: } // MAXIMAL-NEXT: } diff --git a/mlir/test/Transforms/normalize-memrefs-ops.mlir b/mlir/test/Transforms/normalize-memrefs-ops.mlir index b45b62a92e4a6..34420c50a51ab 100644 --- a/mlir/test/Transforms/normalize-memrefs-ops.mlir +++ b/mlir/test/Transforms/normalize-memrefs-ops.mlir @@ -29,15 +29,15 @@ func.func @test_norm(%arg0 : memref<1x16x14x14xf32, #map0>) -> () { // Same test with op_nonnorm, with maps in the arguments and the operations in the function. // CHECK-LABEL: test_nonnorm -// CHECK-SAME: (%[[ARG0:.*]]: memref<1x16x14x14xf32, #map>) +// CHECK-SAME: (%[[ARG0:.*]]: memref<1x16x14x14xf32, #[[MAP:.*]]>) func.func @test_nonnorm(%arg0 : memref<1x16x14x14xf32, #map0>) -> () { %0 = memref.alloc() : memref<1x16x14x14xf32, #map0> "test.op_nonnorm"(%arg0, %0) : (memref<1x16x14x14xf32, #map0>, memref<1x16x14x14xf32, #map0>) -> () memref.dealloc %0 : memref<1x16x14x14xf32, #map0> - // CHECK: %[[v0:.*]] = memref.alloc() : memref<1x16x14x14xf32, #map> - // CHECK: "test.op_nonnorm"(%[[ARG0]], %[[v0]]) : (memref<1x16x14x14xf32, #map>, memref<1x16x14x14xf32, #map>) -> () - // CHECK: memref.dealloc %[[v0]] : memref<1x16x14x14xf32, #map> + // CHECK: %[[v0:.*]] = memref.alloc() : memref<1x16x14x14xf32, #[[MAP]]> + // CHECK: "test.op_nonnorm"(%[[ARG0]], %[[v0]]) : (memref<1x16x14x14xf32, #[[MAP]]>, memref<1x16x14x14xf32, #[[MAP]]>) -> () + // CHECK: memref.dealloc %[[v0]] : memref<1x16x14x14xf32, #[[MAP]]> return } diff --git a/mlir/test/Transforms/parallel-loop-collapsing.mlir b/mlir/test/Transforms/parallel-loop-collapsing.mlir index 224ed2d2f39de..90c3f5d28d6af 100644 --- a/mlir/test/Transforms/parallel-loop-collapsing.mlir +++ b/mlir/test/Transforms/parallel-loop-collapsing.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func.func(scf-parallel-loop-collapsing{collapsed-indices-0=0,3 collapsed-indices-1=1,4 collapsed-indices-2=2}, canonicalize)' | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(scf-parallel-loop-collapsing{collapsed-indices-0=0,3 collapsed-indices-1=1,4 collapsed-indices-2=2}, canonicalize))' | FileCheck %s // CHECK-LABEL: func @parallel_many_dims() { func.func @parallel_many_dims() { diff --git a/mlir/test/Transforms/parametric-mapping.mlir b/mlir/test/Transforms/parametric-mapping.mlir index a2e7a7718285b..b6ef0088d868b 100644 --- a/mlir/test/Transforms/parametric-mapping.mlir +++ b/mlir/test/Transforms/parametric-mapping.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect -pass-pipeline="func.func(test-mapping-to-processing-elements)" %s | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect -pass-pipeline="builtin.module(func.func(test-mapping-to-processing-elements))" %s | FileCheck %s // CHECK: #[[mul_map:.+]] = affine_map<()[s0, s1] -> (s0 * s1)> // CHECK: #[[add_map:.+]] = affine_map<()[s0, s1] -> (s0 + s1)> diff --git a/mlir/test/Transforms/sccp-callgraph.mlir b/mlir/test/Transforms/sccp-callgraph.mlir index 3ed1c3c131358..f31f749b6e75a 100644 --- a/mlir/test/Transforms/sccp-callgraph.mlir +++ b/mlir/test/Transforms/sccp-callgraph.mlir @@ -1,6 +1,6 @@ // RUN: mlir-opt -allow-unregistered-dialect %s -sccp -split-input-file | FileCheck %s -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(sccp)" -split-input-file | FileCheck %s --check-prefix=NESTED -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="func.func(sccp)" -split-input-file | FileCheck %s --check-prefix=FUNC +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(builtin.module(sccp))" -split-input-file | FileCheck %s --check-prefix=NESTED +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(sccp))" -split-input-file | FileCheck %s --check-prefix=FUNC /// Check that a constant is properly propagated through the arguments and /// results of a private function. diff --git a/mlir/test/Transforms/sccp-structured.mlir b/mlir/test/Transforms/sccp-structured.mlir index 529d41554a473..f17f5ff51cf3f 100644 --- a/mlir/test/Transforms/sccp-structured.mlir +++ b/mlir/test/Transforms/sccp-structured.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="func.func(sccp)" -split-input-file | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(sccp))" -split-input-file | FileCheck %s /// Check that a constant is properly propagated when only one edge is taken. diff --git a/mlir/test/Transforms/sccp.mlir b/mlir/test/Transforms/sccp.mlir index d4ca3a2b492a3..db24432b65cc6 100644 --- a/mlir/test/Transforms/sccp.mlir +++ b/mlir/test/Transforms/sccp.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="func.func(sccp)" -split-input-file | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(sccp))" -split-input-file | FileCheck %s /// Check simple forward constant propagation without any control flow. diff --git a/mlir/test/Transforms/single-parallel-loop-collapsing.mlir b/mlir/test/Transforms/single-parallel-loop-collapsing.mlir index 5645f8b779183..91cab126d8dd6 100644 --- a/mlir/test/Transforms/single-parallel-loop-collapsing.mlir +++ b/mlir/test/Transforms/single-parallel-loop-collapsing.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func.func(scf-parallel-loop-collapsing{collapsed-indices-0=0,1}, canonicalize)' | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(scf-parallel-loop-collapsing{collapsed-indices-0=0,1}, canonicalize))' | FileCheck %s func.func @collapse_to_single() { %c0 = arith.constant 3 : index diff --git a/mlir/test/Transforms/test-canonicalize-filter.mlir b/mlir/test/Transforms/test-canonicalize-filter.mlir index 5e3738e531b0a..dba5f05e84345 100644 --- a/mlir/test/Transforms/test-canonicalize-filter.mlir +++ b/mlir/test/Transforms/test-canonicalize-filter.mlir @@ -1,6 +1,6 @@ -// RUN: mlir-opt %s -pass-pipeline='func.func(canonicalize)' | FileCheck %s --check-prefix=NO_FILTER -// RUN: mlir-opt %s -pass-pipeline='func.func(canonicalize{enable-patterns=TestRemoveOpWithInnerOps})' | FileCheck %s --check-prefix=FILTER_ENABLE -// RUN: mlir-opt %s -pass-pipeline='func.func(canonicalize{disable-patterns=TestRemoveOpWithInnerOps})' | FileCheck %s --check-prefix=FILTER_DISABLE +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(canonicalize))' | FileCheck %s --check-prefix=NO_FILTER +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(canonicalize{enable-patterns=TestRemoveOpWithInnerOps}))' | FileCheck %s --check-prefix=FILTER_ENABLE +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(canonicalize{disable-patterns=TestRemoveOpWithInnerOps}))' | FileCheck %s --check-prefix=FILTER_DISABLE // NO_FILTER-LABEL: func @remove_op_with_inner_ops_pattern // NO_FILTER-NEXT: return diff --git a/mlir/test/Transforms/test-canonicalize.mlir b/mlir/test/Transforms/test-canonicalize.mlir index 2181d1856d3aa..bc463fefe6534 100644 --- a/mlir/test/Transforms/test-canonicalize.mlir +++ b/mlir/test/Transforms/test-canonicalize.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt %s -pass-pipeline='func.func(canonicalize)' | FileCheck %s -// RUN: mlir-opt %s -pass-pipeline='func.func(canonicalize{region-simplify=false})' | FileCheck %s --check-prefixes=CHECK,NO-RS +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(canonicalize))' | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(canonicalize{region-simplify=false}))' | FileCheck %s --check-prefixes=CHECK,NO-RS // CHECK-LABEL: func @remove_op_with_inner_ops_pattern func.func @remove_op_with_inner_ops_pattern() { diff --git a/mlir/test/Transforms/test-operation-folder-commutative.mlir b/mlir/test/Transforms/test-operation-folder-commutative.mlir index ea8f9b3f42928..89896e3bf99a8 100644 --- a/mlir/test/Transforms/test-operation-folder-commutative.mlir +++ b/mlir/test/Transforms/test-operation-folder-commutative.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt --pass-pipeline="func.func(test-patterns)" %s | FileCheck %s +// RUN: mlir-opt --pass-pipeline="builtin.module(func.func(test-patterns))" %s | FileCheck %s // CHECK-LABEL: func @test_reorder_constants_and_match func.func @test_reorder_constants_and_match(%arg0 : i32) -> (i32) { diff --git a/mlir/test/Transforms/test-symbol-dce.mlir b/mlir/test/Transforms/test-symbol-dce.mlir index fa6da2202fc3d..7bd784928e6f3 100644 --- a/mlir/test/Transforms/test-symbol-dce.mlir +++ b/mlir/test/Transforms/test-symbol-dce.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt -allow-unregistered-dialect %s -symbol-dce -split-input-file -verify-diagnostics | FileCheck %s -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(symbol-dce)" -split-input-file | FileCheck %s --check-prefix=NESTED +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(builtin.module(symbol-dce))" -split-input-file | FileCheck %s --check-prefix=NESTED // Check that trivially dead and trivially live non-nested cases are handled. diff --git a/mlir/test/lib/Analysis/TestAliasAnalysis.cpp b/mlir/test/lib/Analysis/TestAliasAnalysis.cpp index 284ea4cffeca4..04b2bc3906a8a 100644 --- a/mlir/test/lib/Analysis/TestAliasAnalysis.cpp +++ b/mlir/test/lib/Analysis/TestAliasAnalysis.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "TestAliasAnalysis.h" #include "mlir/Analysis/AliasAnalysis.h" #include "mlir/Pass/Pass.h" @@ -39,13 +40,80 @@ static void printAliasOperand(Value value) { llvm::errs() << "#" << result.getResultNumber(); } +namespace mlir { +namespace test { +void printAliasResult(AliasResult result, Value lhs, Value rhs) { + printAliasOperand(lhs); + llvm::errs() << " <-> "; + printAliasOperand(rhs); + llvm::errs() << ": " << result << "\n"; +} + +/// Print the result of an alias query. +void printModRefResult(ModRefResult result, Operation *op, Value location) { + printAliasOperand(op); + llvm::errs() << " -> "; + printAliasOperand(location); + llvm::errs() << ": " << result << "\n"; +} + +void TestAliasAnalysisBase::runAliasAnalysisOnOperation( + Operation *op, AliasAnalysis &aliasAnalysis) { + llvm::errs() << "Testing : " << op->getAttr("sym_name") << "\n"; + + // Collect all of the values to check for aliasing behavior. + SmallVector valsToCheck; + op->walk([&](Operation *op) { + if (!op->getAttr("test.ptr")) + return; + valsToCheck.append(op->result_begin(), op->result_end()); + for (Region ®ion : op->getRegions()) + for (Block &block : region) + valsToCheck.append(block.args_begin(), block.args_end()); + }); + + // Check for aliasing behavior between each of the values. + for (auto it = valsToCheck.begin(), e = valsToCheck.end(); it != e; ++it) + for (auto *innerIt = valsToCheck.begin(); innerIt != it; ++innerIt) + printAliasResult(aliasAnalysis.alias(*innerIt, *it), *innerIt, *it); +} + +void TestAliasAnalysisModRefBase::runAliasAnalysisOnOperation( + Operation *op, AliasAnalysis &aliasAnalysis) { + llvm::errs() << "Testing : " << op->getAttr("sym_name") << "\n"; + + // Collect all of the values to check for aliasing behavior. + SmallVector valsToCheck; + op->walk([&](Operation *op) { + if (!op->getAttr("test.ptr")) + return; + valsToCheck.append(op->result_begin(), op->result_end()); + for (Region ®ion : op->getRegions()) + for (Block &block : region) + valsToCheck.append(block.args_begin(), block.args_end()); + }); + + // Check for aliasing behavior between each of the values. + for (auto &it : valsToCheck) { + op->walk([&](Operation *op) { + if (!op->getAttr("test.ptr")) + return; + printModRefResult(aliasAnalysis.getModRef(op, it), op, it); + }); + } +} + +} // namespace test +} // namespace mlir + //===----------------------------------------------------------------------===// // Testing AliasResult //===----------------------------------------------------------------------===// namespace { struct TestAliasAnalysisPass - : public PassWrapper> { + : public test::TestAliasAnalysisBase, + PassWrapper> { MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestAliasAnalysisPass) StringRef getArgument() const final { return "test-alias-analysis"; } @@ -53,32 +121,8 @@ struct TestAliasAnalysisPass return "Test alias analysis results."; } void runOnOperation() override { - llvm::errs() << "Testing : " << getOperation()->getAttr("sym_name") << "\n"; - - // Collect all of the values to check for aliasing behavior. AliasAnalysis &aliasAnalysis = getAnalysis(); - SmallVector valsToCheck; - getOperation()->walk([&](Operation *op) { - if (!op->getAttr("test.ptr")) - return; - valsToCheck.append(op->result_begin(), op->result_end()); - for (Region ®ion : op->getRegions()) - for (Block &block : region) - valsToCheck.append(block.args_begin(), block.args_end()); - }); - - // Check for aliasing behavior between each of the values. - for (auto it = valsToCheck.begin(), e = valsToCheck.end(); it != e; ++it) - for (auto *innerIt = valsToCheck.begin(); innerIt != it; ++innerIt) - printAliasResult(aliasAnalysis.alias(*innerIt, *it), *innerIt, *it); - } - - /// Print the result of an alias query. - void printAliasResult(AliasResult result, Value lhs, Value rhs) { - printAliasOperand(lhs); - llvm::errs() << " <-> "; - printAliasOperand(rhs); - llvm::errs() << ": " << result << "\n"; + runAliasAnalysisOnOperation(getOperation(), aliasAnalysis); } }; } // namespace @@ -89,7 +133,8 @@ struct TestAliasAnalysisPass namespace { struct TestAliasAnalysisModRefPass - : public PassWrapper> { + : public test::TestAliasAnalysisModRefBase, + PassWrapper> { MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(TestAliasAnalysisModRefPass) StringRef getArgument() const final { return "test-alias-analysis-modref"; } @@ -97,36 +142,8 @@ struct TestAliasAnalysisModRefPass return "Test alias analysis ModRef results."; } void runOnOperation() override { - llvm::errs() << "Testing : " << getOperation()->getAttr("sym_name") << "\n"; - - // Collect all of the values to check for aliasing behavior. AliasAnalysis &aliasAnalysis = getAnalysis(); - SmallVector valsToCheck; - getOperation()->walk([&](Operation *op) { - if (!op->getAttr("test.ptr")) - return; - valsToCheck.append(op->result_begin(), op->result_end()); - for (Region ®ion : op->getRegions()) - for (Block &block : region) - valsToCheck.append(block.args_begin(), block.args_end()); - }); - - // Check for aliasing behavior between each of the values. - for (auto &it : valsToCheck) { - getOperation()->walk([&](Operation *op) { - if (!op->getAttr("test.ptr")) - return; - printModRefResult(aliasAnalysis.getModRef(op, it), op, it); - }); - } - } - - /// Print the result of an alias query. - void printModRefResult(ModRefResult result, Operation *op, Value location) { - printAliasOperand(op); - llvm::errs() << " -> "; - printAliasOperand(location); - llvm::errs() << ": " << result << "\n"; + runAliasAnalysisOnOperation(getOperation(), aliasAnalysis); } }; } // namespace diff --git a/mlir/test/lib/Analysis/TestAliasAnalysis.h b/mlir/test/lib/Analysis/TestAliasAnalysis.h new file mode 100644 index 0000000000000..f84b2fabb092a --- /dev/null +++ b/mlir/test/lib/Analysis/TestAliasAnalysis.h @@ -0,0 +1,37 @@ +//===- TestAliasAnalysis.h - MLIR Test Utility ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides a common facility that can be reused for the +// testing of various aliasing analyses +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TEST_LIB_ANALYSIS_ALIASANALYSIS_H +#define MLIR_TEST_LIB_ANALYSIS_ALIASANALYSIS_H + +#include "mlir/Analysis/AliasAnalysis.h" + +namespace mlir { +namespace test { + +/// Print the result of an alias query. +void printAliasResult(AliasResult result, Value lhs, Value rhs); +void printModRefResult(ModRefResult result, Operation *op, Value location); + +struct TestAliasAnalysisBase { + void runAliasAnalysisOnOperation(Operation *op, AliasAnalysis &aliasAnalysis); +}; + +struct TestAliasAnalysisModRefBase { + void runAliasAnalysisOnOperation(Operation *op, AliasAnalysis &aliasAnalysis); +}; + +} // namespace test +} // namespace mlir + +#endif // MLIR_TEST_LIB_ANALYSIS_ALIASANALYSIS_H diff --git a/mlir/test/lib/Dialect/Test/TestAttrDefs.td b/mlir/test/lib/Dialect/Test/TestAttrDefs.td index 07cfca121f62d..0c35f81c129b0 100644 --- a/mlir/test/lib/Dialect/Test/TestAttrDefs.td +++ b/mlir/test/lib/Dialect/Test/TestAttrDefs.td @@ -119,8 +119,7 @@ def TestI64ElementsAttr : Test_Attr<"TestI64Elements", [ } def TestSubElementsAccessAttr : Test_Attr<"TestSubElementsAccess", [ - DeclareAttrInterfaceMethods + SubElementAttrInterface ]> { let mnemonic = "sub_elements_access"; diff --git a/mlir/test/lib/Dialect/Test/TestAttributes.cpp b/mlir/test/lib/Dialect/Test/TestAttributes.cpp index 28fde0987ac09..4c7639b3ae252 100644 --- a/mlir/test/lib/Dialect/Test/TestAttributes.cpp +++ b/mlir/test/lib/Dialect/Test/TestAttributes.cpp @@ -150,20 +150,6 @@ void TestSubElementsAccessAttr::print(::mlir::AsmPrinter &printer) const { << ">"; } -void TestSubElementsAccessAttr::walkImmediateSubElements( - llvm::function_ref walkAttrsFn, - llvm::function_ref walkTypesFn) const { - walkAttrsFn(getFirst()); - walkAttrsFn(getSecond()); - walkAttrsFn(getThird()); -} - -Attribute TestSubElementsAccessAttr::replaceImmediateSubElements( - ArrayRef replAttrs, ArrayRef replTypes) const { - assert(replAttrs.size() == 3 && "invalid number of replacement attributes"); - return get(getContext(), replAttrs[0], replAttrs[1], replAttrs[2]); -} - //===----------------------------------------------------------------------===// // TestExtern1DI64ElementsAttr //===----------------------------------------------------------------------===// diff --git a/mlir/test/lib/Dialect/Test/TestPatterns.cpp b/mlir/test/lib/Dialect/Test/TestPatterns.cpp index 17c8c1f84d35d..12f374777936c 100644 --- a/mlir/test/lib/Dialect/Test/TestPatterns.cpp +++ b/mlir/test/lib/Dialect/Test/TestPatterns.cpp @@ -786,8 +786,8 @@ struct TestLegalizePatternDriver TestNestedOpCreationUndoRewrite, TestReplaceEraseOp, TestCreateUnregisteredOp>(&getContext()); patterns.add(&getContext(), converter); - mlir::populateFunctionOpInterfaceTypeConversionPattern( - patterns, converter); + mlir::populateAnyFunctionOpInterfaceTypeConversionPattern(patterns, + converter); mlir::populateCallOpTypeConversionPattern(patterns, converter); // Define the conversion target used for the test. @@ -1313,8 +1313,8 @@ struct TestTypeConversionDriver TestTestSignatureConversionNoConverter>(converter, &getContext()); patterns.add(&getContext()); - mlir::populateFunctionOpInterfaceTypeConversionPattern( - patterns, converter); + mlir::populateAnyFunctionOpInterfaceTypeConversionPattern(patterns, + converter); if (failed(applyPartialConversion(getOperation(), target, std::move(patterns)))) diff --git a/mlir/test/lib/Interfaces/TilingInterface/TestTilingInterface.cpp b/mlir/test/lib/Interfaces/TilingInterface/TestTilingInterface.cpp index 31e3c1a529a7c..1644179c427c3 100644 --- a/mlir/test/lib/Interfaces/TilingInterface/TestTilingInterface.cpp +++ b/mlir/test/lib/Interfaces/TilingInterface/TestTilingInterface.cpp @@ -193,7 +193,8 @@ struct TestTileUsingSCFForOp rewriter.eraseOp(op); } - filter.replaceLinalgTransformationFilter(rewriter, tilingResult->tiledOp); + for (auto tiledOp : tilingResult->tiledOps) + filter.replaceLinalgTransformationFilter(rewriter, tiledOp); return success(); } diff --git a/mlir/test/mlir-cpu-runner/async-error.mlir b/mlir/test/mlir-cpu-runner/async-error.mlir index 321f243a3debe..a1ca96fae105e 100644 --- a/mlir/test/mlir-cpu-runner/async-error.mlir +++ b/mlir/test/mlir-cpu-runner/async-error.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="async-to-async-runtime,func.func(async-runtime-ref-counting,async-runtime-ref-counting-opt),convert-async-to-llvm,func.func(convert-linalg-to-loops,convert-scf-to-cf),convert-linalg-to-llvm,convert-vector-to-llvm,func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(async-to-async-runtime,func.func(async-runtime-ref-counting,async-runtime-ref-counting-opt),convert-async-to-llvm,func.func(convert-linalg-to-loops,convert-scf-to-cf),convert-linalg-to-llvm,convert-vector-to-llvm,func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts)" \ // RUN: | mlir-cpu-runner \ // RUN: -e main -entry-point-result=void -O0 \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext \ diff --git a/mlir/test/mlir-cpu-runner/async-func.mlir b/mlir/test/mlir-cpu-runner/async-func.mlir new file mode 100644 index 0000000000000..8b3d728d4667f --- /dev/null +++ b/mlir/test/mlir-cpu-runner/async-func.mlir @@ -0,0 +1,149 @@ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(async-to-async-runtime,func.func(async-runtime-ref-counting,async-runtime-ref-counting-opt),convert-async-to-llvm,func.func(convert-linalg-to-loops,convert-scf-to-cf),convert-linalg-to-llvm,convert-vector-to-llvm,func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts)" \ +// RUN: | mlir-cpu-runner \ +// RUN: -e main -entry-point-result=void -O0 \ +// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext \ +// RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext \ +// RUN: -shared-libs=%mlir_lib_dir/libmlir_async_runtime%shlibext \ +// RUN: | FileCheck %s --dump-input=always + +// FIXME: https://github.com/llvm/llvm-project/issues/57231 +// UNSUPPORTED: hwasan + +async.func @async_func_empty() -> !async.token { + return +} + +async.func @async_func_assert() -> !async.token { + %false = arith.constant 0 : i1 + cf.assert %false, "error" + return +} + +async.func @async_func_nested_assert() -> !async.token { + %token0 = async.call @async_func_assert() : () -> !async.token + async.await %token0 : !async.token + return +} + +async.func @async_func_value_assert() -> !async.value { + %false = arith.constant 0 : i1 + cf.assert %false, "error" + %0 = arith.constant 123.45 : f32 + return %0 : f32 +} + +async.func @async_func_value_nested_assert() -> !async.value { + %value0 = async.call @async_func_value_assert() : () -> !async.value + %ret = async.await %value0 : !async.value + return %ret : f32 +} + +async.func @async_func_return_value() -> !async.value { + %0 = arith.constant 456.789 : f32 + return %0 : f32 +} + +async.func @async_func_non_blocking_await() -> !async.value { + %value0 = async.call @async_func_return_value() : () -> !async.value + %1 = async.await %value0 : !async.value + return %1 : f32 +} + +async.func @async_func_inside_memref() -> !async.value> { + %0 = memref.alloc() : memref + %c0 = arith.constant 0.25 : f32 + memref.store %c0, %0[] : memref + return %0 : memref +} + +async.func @async_func_passed_memref(%arg0 : !async.value>) -> !async.token { + %unwrapped = async.await %arg0 : !async.value> + %0 = memref.load %unwrapped[] : memref + %1 = arith.addf %0, %0 : f32 + memref.store %1, %unwrapped[] : memref + return +} + + +func.func @main() { + %false = arith.constant 0 : i1 + + // ------------------------------------------------------------------------ // + // Check that simple async.func completes without errors. + // ------------------------------------------------------------------------ // + %token0 = async.call @async_func_empty() : () -> !async.token + async.runtime.await %token0 : !async.token + + // CHECK: 0 + %err0 = async.runtime.is_error %token0 : !async.token + vector.print %err0 : i1 + + // ------------------------------------------------------------------------ // + // Check that assertion in the async.func converted to async error. + // ------------------------------------------------------------------------ // + %token1 = async.call @async_func_assert() : () -> !async.token + async.runtime.await %token1 : !async.token + + // CHECK: 1 + %err1 = async.runtime.is_error %token1 : !async.token + vector.print %err1 : i1 + + // ------------------------------------------------------------------------ // + // Check error propagation from the nested async.func. + // ------------------------------------------------------------------------ // + %token2 = async.call @async_func_nested_assert() : () -> !async.token + async.runtime.await %token2 : !async.token + + // CHECK: 1 + %err2 = async.runtime.is_error %token2 : !async.token + vector.print %err2 : i1 + + // ------------------------------------------------------------------------ // + // Check error propagation from the nested async.func with async values. + // ------------------------------------------------------------------------ // + %value3 = async.call @async_func_value_nested_assert() : () -> !async.value + async.runtime.await %value3 : !async.value + + // CHECK: 1 + %err3_0 = async.runtime.is_error %value3 : !async.value + vector.print %err3_0 : i1 + + // ------------------------------------------------------------------------ // + // Non-blocking async.await inside the async.func + // ------------------------------------------------------------------------ // + %result0 = async.call @async_func_non_blocking_await() : () -> !async.value + %4 = async.await %result0 : !async.value + + // CHECK: 456.789 + vector.print %4 : f32 + + // ------------------------------------------------------------------------ // + // Memref allocated inside async.func. + // ------------------------------------------------------------------------ // + %result1 = async.call @async_func_inside_memref() : () -> !async.value> + %5 = async.await %result1 : !async.value> + %6 = memref.cast %5 : memref to memref<*xf32> + + // CHECK: Unranked Memref + // CHECK-SAME: rank = 0 offset = 0 sizes = [] strides = [] + // CHECK-NEXT: [0.25] + call @printMemrefF32(%6) : (memref<*xf32>) -> () + + // ------------------------------------------------------------------------ // + // Memref passed as async.func parameter + // ------------------------------------------------------------------------ // + %token3 = async.call @async_func_passed_memref(%result1) : (!async.value>) -> !async.token + async.await %token3 : !async.token + + // CHECK: Unranked Memref + // CHECK-SAME: rank = 0 offset = 0 sizes = [] strides = [] + // CHECK-NEXT: [0.5] + call @printMemrefF32(%6) : (memref<*xf32>) -> () + + memref.dealloc %5 : memref + + return +} + +func.func private @printMemrefF32(memref<*xf32>) + attributes { llvm.emit_c_interface } diff --git a/mlir/test/mlir-cpu-runner/async-group.mlir b/mlir/test/mlir-cpu-runner/async-group.mlir index ef6c69ee19e2f..08c546b06bc81 100644 --- a/mlir/test/mlir-cpu-runner/async-group.mlir +++ b/mlir/test/mlir-cpu-runner/async-group.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="async-to-async-runtime,func.func(async-runtime-ref-counting,async-runtime-ref-counting-opt),convert-async-to-llvm,func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(async-to-async-runtime,func.func(async-runtime-ref-counting,async-runtime-ref-counting-opt),convert-async-to-llvm,func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts)" \ // RUN: | mlir-cpu-runner \ // RUN: -e main -entry-point-result=void -O0 \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext \ diff --git a/mlir/test/mlir-cpu-runner/async-value.mlir b/mlir/test/mlir-cpu-runner/async-value.mlir index 73627fe314e2f..cafdc9f57a7aa 100644 --- a/mlir/test/mlir-cpu-runner/async-value.mlir +++ b/mlir/test/mlir-cpu-runner/async-value.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="async-to-async-runtime,func.func(async-runtime-ref-counting,async-runtime-ref-counting-opt),convert-async-to-llvm,func.func(convert-arith-to-llvm),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(async-to-async-runtime,func.func(async-runtime-ref-counting,async-runtime-ref-counting-opt),convert-async-to-llvm,func.func(convert-arith-to-llvm),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" \ // RUN: | mlir-cpu-runner \ // RUN: -e main -entry-point-result=void -O0 \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext \ diff --git a/mlir/test/mlir-cpu-runner/async.mlir b/mlir/test/mlir-cpu-runner/async.mlir index 272fa04904858..85e260f504f91 100644 --- a/mlir/test/mlir-cpu-runner/async.mlir +++ b/mlir/test/mlir-cpu-runner/async.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="async-to-async-runtime,func.func(async-runtime-ref-counting,async-runtime-ref-counting-opt),convert-async-to-llvm,func.func(convert-linalg-to-loops,convert-scf-to-cf),convert-linalg-to-llvm,convert-memref-to-llvm,func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(async-to-async-runtime,func.func(async-runtime-ref-counting,async-runtime-ref-counting-opt),convert-async-to-llvm,func.func(convert-linalg-to-loops,convert-scf-to-cf),convert-linalg-to-llvm,convert-memref-to-llvm,func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts)" \ // RUN: | mlir-cpu-runner \ // RUN: -e main -entry-point-result=void -O0 \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext \ diff --git a/mlir/test/mlir-cpu-runner/bare-ptr-call-conv.mlir b/mlir/test/mlir-cpu-runner/bare-ptr-call-conv.mlir index 016adcf43d732..3720402f8caec 100644 --- a/mlir/test/mlir-cpu-runner/bare-ptr-call-conv.mlir +++ b/mlir/test/mlir-cpu-runner/bare-ptr-call-conv.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-scf-to-cf,convert-arith-to-llvm),convert-memref-to-llvm,convert-func-to-llvm{use-bare-ptr-memref-call-conv=1}" -reconcile-unrealized-casts | mlir-cpu-runner -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext -entry-point-result=void | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-scf-to-cf,convert-arith-to-llvm),convert-memref-to-llvm,convert-func-to-llvm{use-bare-ptr-memref-call-conv=1},reconcile-unrealized-casts)" | mlir-cpu-runner -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext -entry-point-result=void | FileCheck %s // Verify bare pointer memref calling convention. `simple_add1_add2_test` // gets two 2xf32 memrefs, adds 1.0f to the first one and 2.0f to the second diff --git a/mlir/test/mlir-cpu-runner/copy.mlir b/mlir/test/mlir-cpu-runner/copy.mlir index 2c62bf5324c88..d1769dccef3ef 100644 --- a/mlir/test/mlir-cpu-runner/copy.mlir +++ b/mlir/test/mlir-cpu-runner/copy.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-scf-to-cf,convert-arith-to-llvm),convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-scf-to-cf,convert-arith-to-llvm),convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" \ // RUN: | mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext,%mlir_lib_dir/libmlir_c_runner_utils%shlibext \ // RUN: | FileCheck %s diff --git a/mlir/test/mlir-cpu-runner/global-memref.mlir b/mlir/test/mlir-cpu-runner/global-memref.mlir index 7ebd24177ca1c..937d8d3afb248 100644 --- a/mlir/test/mlir-cpu-runner/global-memref.mlir +++ b/mlir/test/mlir-cpu-runner/global-memref.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-arith-to-llvm),convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | mlir-cpu-runner -e main -entry-point-result=void -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext,%mlir_lib_dir/libmlir_c_runner_utils%shlibext | FileCheck %s +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-arith-to-llvm),convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | mlir-cpu-runner -e main -entry-point-result=void -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext,%mlir_lib_dir/libmlir_c_runner_utils%shlibext | FileCheck %s func.func private @printMemrefF32(memref<*xf32>) attributes { llvm.emit_c_interface } func.func private @printMemrefI32(memref<*xi32>) attributes { llvm.emit_c_interface } diff --git a/mlir/test/mlir-cpu-runner/math-polynomial-approx.mlir b/mlir/test/mlir-cpu-runner/math-polynomial-approx.mlir index cbaeaed54cdaa..18c1b78e1c417 100644 --- a/mlir/test/mlir-cpu-runner/math-polynomial-approx.mlir +++ b/mlir/test/mlir-cpu-runner/math-polynomial-approx.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(test-math-polynomial-approximation,convert-arith-to-llvm),convert-vector-to-llvm,func.func(convert-math-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(test-math-polynomial-approximation,convert-arith-to-llvm),convert-vector-to-llvm,func.func(convert-math-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts)" \ // RUN: | mlir-cpu-runner \ // RUN: -e main -entry-point-result=void -O0 \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext \ diff --git a/mlir/test/mlir-cpu-runner/memref-reinterpret-cast.mlir b/mlir/test/mlir-cpu-runner/memref-reinterpret-cast.mlir index 4895e303a2d30..134a1293d28cd 100644 --- a/mlir/test/mlir-cpu-runner/memref-reinterpret-cast.mlir +++ b/mlir/test/mlir-cpu-runner/memref-reinterpret-cast.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-scf-to-cf),convert-memref-to-llvm,func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-scf-to-cf),convert-memref-to-llvm,func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts)" \ // RUN: | mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext,%mlir_lib_dir/libmlir_c_runner_utils%shlibext \ // RUN: | FileCheck %s diff --git a/mlir/test/mlir-cpu-runner/memref-reshape.mlir b/mlir/test/mlir-cpu-runner/memref-reshape.mlir index 05eb536decd79..47ee21a7784aa 100644 --- a/mlir/test/mlir-cpu-runner/memref-reshape.mlir +++ b/mlir/test/mlir-cpu-runner/memref-reshape.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-scf-to-cf,memref-expand,convert-arith-to-llvm),convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-scf-to-cf,memref-expand,convert-arith-to-llvm),convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" \ // RUN: | mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext,%mlir_lib_dir/libmlir_c_runner_utils%shlibext \ // RUN: | FileCheck %s diff --git a/mlir/test/mlir-cpu-runner/print.mlir b/mlir/test/mlir-cpu-runner/print.mlir index dec09521d3f81..039b32d64c727 100644 --- a/mlir/test/mlir-cpu-runner/print.mlir +++ b/mlir/test/mlir-cpu-runner/print.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="convert-func-to-llvm,reconcile-unrealized-casts" \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(convert-func-to-llvm,reconcile-unrealized-casts)" \ // RUN: | mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext,%mlir_lib_dir/libmlir_c_runner_utils%shlibext \ // RUN: | FileCheck %s diff --git a/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir b/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir index da40d5fdb5165..5ebafbe668020 100644 --- a/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir +++ b/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -pass-pipeline="func.func(convert-linalg-to-loops,lower-affine,convert-scf-to-cf,convert-arith-to-llvm),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" %s | mlir-cpu-runner -O3 -e main -entry-point-result=void -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | FileCheck %s +// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-linalg-to-loops,lower-affine,convert-scf-to-cf,convert-arith-to-llvm),convert-vector-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" %s | mlir-cpu-runner -O3 -e main -entry-point-result=void -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | FileCheck %s func.func @main() { %A = memref.alloc() : memref<16x16xf32> diff --git a/mlir/test/mlir-cpu-runner/unranked-memref.mlir b/mlir/test/mlir-cpu-runner/unranked-memref.mlir index 6f552c96bfe8a..f8a37f12c051c 100644 --- a/mlir/test/mlir-cpu-runner/unranked-memref.mlir +++ b/mlir/test/mlir-cpu-runner/unranked-memref.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-linalg-to-loops,convert-scf-to-cf,convert-arith-to-llvm),convert-linalg-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-linalg-to-loops,convert-scf-to-cf,convert-arith-to-llvm),convert-linalg-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | \ // RUN: mlir-cpu-runner -e main -entry-point-result=void \ // RUN: -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext,%mlir_lib_dir/libmlir_c_runner_utils%shlibext | FileCheck %s diff --git a/mlir/test/mlir-cpu-runner/utils.mlir b/mlir/test/mlir-cpu-runner/utils.mlir index 3a9cf9332bf60..565495b802f2b 100644 --- a/mlir/test/mlir-cpu-runner/utils.mlir +++ b/mlir/test/mlir-cpu-runner/utils.mlir @@ -1,7 +1,7 @@ -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-linalg-to-loops,convert-scf-to-cf,convert-arith-to-llvm),convert-linalg-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | mlir-cpu-runner -e print_0d -entry-point-result=void -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-0D -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-linalg-to-loops,convert-scf-to-cf,convert-arith-to-llvm),convert-linalg-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | mlir-cpu-runner -e print_1d -entry-point-result=void -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-1D -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-linalg-to-loops,convert-scf-to-cf,convert-arith-to-llvm),convert-linalg-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | mlir-cpu-runner -e print_3d -entry-point-result=void -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-3D -// RUN: mlir-opt %s -pass-pipeline="func.func(convert-linalg-to-loops,convert-scf-to-cf,convert-arith-to-llvm),convert-linalg-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts" | mlir-cpu-runner -e vector_splat_2d -entry-point-result=void -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-VECTOR-SPLAT-2D +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-linalg-to-loops,convert-scf-to-cf,convert-arith-to-llvm),convert-linalg-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | mlir-cpu-runner -e print_0d -entry-point-result=void -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-0D +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-linalg-to-loops,convert-scf-to-cf,convert-arith-to-llvm),convert-linalg-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | mlir-cpu-runner -e print_1d -entry-point-result=void -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-1D +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-linalg-to-loops,convert-scf-to-cf,convert-arith-to-llvm),convert-linalg-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | mlir-cpu-runner -e print_3d -entry-point-result=void -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-3D +// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-linalg-to-loops,convert-scf-to-cf,convert-arith-to-llvm),convert-linalg-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)" | mlir-cpu-runner -e vector_splat_2d -entry-point-result=void -shared-libs=%mlir_lib_dir/libmlir_runner_utils%shlibext -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | FileCheck %s --check-prefix=PRINT-VECTOR-SPLAT-2D func.func @print_0d() { %f = arith.constant 2.00000e+00 : f32 diff --git a/mlir/test/mlir-opt/async.mlir b/mlir/test/mlir-opt/async.mlir index 05f13f0710780..d5eafd1c6aeff 100644 --- a/mlir/test/mlir-opt/async.mlir +++ b/mlir/test/mlir-opt/async.mlir @@ -1,6 +1,6 @@ // Check if mlir marks the corresponding function with required coroutine attribute. // -// RUN: mlir-opt %s -pass-pipeline="async-to-async-runtime,func.func(async-runtime-ref-counting,async-runtime-ref-counting-opt),convert-async-to-llvm,func.func(convert-linalg-to-loops,convert-scf-to-cf),convert-linalg-to-llvm,convert-memref-to-llvm,func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts" \ +// RUN: mlir-opt %s -pass-pipeline="builtin.module(async-to-async-runtime,func.func(async-runtime-ref-counting,async-runtime-ref-counting-opt),convert-async-to-llvm,func.func(convert-linalg-to-loops,convert-scf-to-cf),convert-linalg-to-llvm,convert-memref-to-llvm,func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts)" \ // RUN: | FileCheck %s // CHECK: llvm.func @async_execute_fn{{.*}}attributes{{.*}}presplitcoroutine diff --git a/mlir/test/python/dialects/async_dialect.py b/mlir/test/python/dialects/async_dialect.py index 6a33bd6b6d030..da3103cecddf2 100644 --- a/mlir/test/python/dialects/async_dialect.py +++ b/mlir/test/python/dialects/async_dialect.py @@ -11,7 +11,7 @@ def run(f): def testAsyncPass(): with Context() as context: - PassManager.parse('async-to-async-runtime') + PassManager.parse('any(async-to-async-runtime)') print('SUCCESS') # CHECK-LABEL: testAsyncPass diff --git a/mlir/test/python/dialects/gpu.py b/mlir/test/python/dialects/gpu.py index edf59dfc9c8fb..38bf038a5eeed 100644 --- a/mlir/test/python/dialects/gpu.py +++ b/mlir/test/python/dialects/gpu.py @@ -11,7 +11,7 @@ def run(f): def testGPUPass(): with Context() as context: - PassManager.parse('gpu-kernel-outlining') + PassManager.parse('any(gpu-kernel-outlining)') print('SUCCESS') # CHECK-LABEL: testGPUPass diff --git a/mlir/test/python/dialects/sparse_tensor/passes.py b/mlir/test/python/dialects/sparse_tensor/passes.py index f3510c5ce8169..9319e16e054de 100644 --- a/mlir/test/python/dialects/sparse_tensor/passes.py +++ b/mlir/test/python/dialects/sparse_tensor/passes.py @@ -16,7 +16,7 @@ def run(f): @run def testSparseTensorPass(): with Context() as context: - PassManager.parse('sparsification') - PassManager.parse('sparse-tensor-conversion') + PassManager.parse('any(sparsification)') + PassManager.parse('any(sparse-tensor-conversion)') # CHECK: SUCCESS print('SUCCESS') diff --git a/mlir/test/python/execution_engine.py b/mlir/test/python/execution_engine.py index c518803789637..7b7ee953ea193 100644 --- a/mlir/test/python/execution_engine.py +++ b/mlir/test/python/execution_engine.py @@ -63,7 +63,7 @@ def testInvalidModule(): def lowerToLLVM(module): pm = PassManager.parse( - "convert-complex-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts") + "builtin.module(convert-complex-to-llvm,convert-memref-to-llvm,convert-func-to-llvm,reconcile-unrealized-casts)") pm.run(module) return module diff --git a/mlir/test/python/integration/dialects/linalg/opsrun.py b/mlir/test/python/integration/dialects/linalg/opsrun.py index e22f5a0ea3d87..585741ae9336d 100644 --- a/mlir/test/python/integration/dialects/linalg/opsrun.py +++ b/mlir/test/python/integration/dialects/linalg/opsrun.py @@ -191,11 +191,17 @@ def transform(module, boilerplate): ops = module.operation.regions[0].blocks[0].operations mod = Module.parse("\n".join([str(op) for op in ops]) + boilerplate) - pm = PassManager.parse( - "func.func(convert-linalg-to-loops, lower-affine, " + - "convert-math-to-llvm, convert-scf-to-cf, arith-expand, memref-expand), " - + "convert-vector-to-llvm, convert-memref-to-llvm, convert-func-to-llvm," + - "reconcile-unrealized-casts") + pm = PassManager('builtin.module') + pm.add("func.func(convert-linalg-to-loops)") + pm.add("func.func(lower-affine)") + pm.add("func.func(convert-math-to-llvm)") + pm.add("func.func(convert-scf-to-cf)") + pm.add("func.func(arith-expand)") + pm.add("func.func(memref-expand)") + pm.add("convert-vector-to-llvm") + pm.add("convert-memref-to-llvm") + pm.add("convert-func-to-llvm") + pm.add("reconcile-unrealized-casts") pm.run(mod) return mod diff --git a/mlir/test/python/ir/affine_expr.py b/mlir/test/python/ir/affine_expr.py index 9854b496fe460..6a3a6fcc65e1b 100644 --- a/mlir/test/python/ir/affine_expr.py +++ b/mlir/test/python/ir/affine_expr.py @@ -116,7 +116,7 @@ def testAffineExprSymbol(): # CHECK: 2 print(s2.position) - # CHEKC: s2 + # CHECK: s2 print(s2) assert s1 == s11 diff --git a/mlir/test/python/pass_manager.py b/mlir/test/python/pass_manager.py index a2d56a1f6e031..492c7e09ec5ae 100644 --- a/mlir/test/python/pass_manager.py +++ b/mlir/test/python/pass_manager.py @@ -28,6 +28,17 @@ def testCapsule(): assert pm1 is not None # And does not crash. run(testCapsule) +# CHECK-LABEL: TEST: testConstruct +@run +def testConstruct(): + with Context(): + # CHECK: pm1: 'any()' + # CHECK: pm2: 'builtin.module()' + pm1 = PassManager() + pm2 = PassManager("builtin.module") + log(f"pm1: '{pm1}'") + log(f"pm2: '{pm2}'") + # Verify successful round-trip. # CHECK-LABEL: TEST: testParseSuccess @@ -44,7 +55,7 @@ def testParseSuccess(): # A registered pass should parse successfully. pm = PassManager.parse("builtin.module(func.func(print-op-stats{json=false}))") - # CHECK: Roundtrip: builtin.module(builtin.module(func.func(print-op-stats{json=false}))) + # CHECK: Roundtrip: builtin.module(func.func(print-op-stats{json=false})) log("Roundtrip: ", pm) run(testParseSuccess) @@ -53,7 +64,7 @@ def testParseSuccess(): def testParseFail(): with Context(): try: - pm = PassManager.parse("unknown-pass") + pm = PassManager.parse("any(unknown-pass)") except ValueError as e: # CHECK: ValueError exception: MLIR Textual PassPipeline Parser:1:1: error: # CHECK-SAME: 'unknown-pass' does not refer to a registered pass or pass pipeline @@ -64,6 +75,20 @@ def testParseFail(): log("Exception not produced") run(testParseFail) +# Check that adding to a pass manager works +# CHECK-LABEL: TEST: testAdd +@run +def testAdd(): + pm = PassManager("any", Context()) + # CHECK: pm: 'any()' + log(f"pm: '{pm}'") + # CHECK: pm: 'any(cse)' + pm.add("cse") + log(f"pm: '{pm}'") + # CHECK: pm: 'any(cse,cse)' + pm.add("cse") + log(f"pm: '{pm}'") + # Verify failure on incorrect level of nesting. # CHECK-LABEL: TEST: testInvalidNesting @@ -83,7 +108,7 @@ def testInvalidNesting(): # CHECK-LABEL: TEST: testRun def testRunPipeline(): with Context(): - pm = PassManager.parse("print-op-stats{json=false}") + pm = PassManager.parse("builtin.module(print-op-stats{json=false})") module = Module.parse(r"""func.func @successfulParse() { return }""") pm.run(module) # CHECK: Operations encountered: diff --git a/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp b/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp index ec0e79ebdd834..f34aaa364fac9 100644 --- a/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp +++ b/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp @@ -457,6 +457,13 @@ void DefGen::emitKeyType() { [&](auto ¶m) { os << param.getCppType(); }); os << '>'; storageCls->declare("KeyTy", std::move(os.str())); + + // Add a method to construct the key type from the storage. + Method *m = storageCls->addConstMethod("KeyTy", "getAsKey"); + m->body().indent() << "return KeyTy("; + llvm::interleaveComma(params, m->body().indent(), + [&](auto ¶m) { m->body() << param.getName(); }); + m->body() << ");"; } void DefGen::emitEquals() { diff --git a/mlir/unittests/IR/SubElementInterfaceTest.cpp b/mlir/unittests/IR/SubElementInterfaceTest.cpp index 292628aad5d47..66e29d48f7f47 100644 --- a/mlir/unittests/IR/SubElementInterfaceTest.cpp +++ b/mlir/unittests/IR/SubElementInterfaceTest.cpp @@ -23,13 +23,14 @@ TEST(SubElementInterfaceTest, Nested) { BoolAttr trueAttr = builder.getBoolAttr(true); BoolAttr falseAttr = builder.getBoolAttr(false); ArrayAttr boolArrayAttr = builder.getArrayAttr({trueAttr, falseAttr}); + StringAttr strAttr = builder.getStringAttr("array"); DictionaryAttr dictAttr = - builder.getDictionaryAttr(builder.getNamedAttr("array", boolArrayAttr)); + builder.getDictionaryAttr(builder.getNamedAttr(strAttr, boolArrayAttr)); SmallVector subAttrs; dictAttr.walkSubAttrs([&](Attribute attr) { subAttrs.push_back(attr); }); EXPECT_EQ(llvm::makeArrayRef(subAttrs), - ArrayRef({trueAttr, falseAttr, boolArrayAttr})); + ArrayRef({strAttr, trueAttr, falseAttr, boolArrayAttr})); } } // namespace diff --git a/openmp/docs/SupportAndFAQ.rst b/openmp/docs/SupportAndFAQ.rst index dc1ad83902d28..8bf0ea56a7ae7 100644 --- a/openmp/docs/SupportAndFAQ.rst +++ b/openmp/docs/SupportAndFAQ.rst @@ -413,3 +413,17 @@ linkable device image. clang++ openmp.cpp -fopenmp --offload-arch=sm_80 -c clang++ cuda.cu --offload-new-driver --offload-arch=sm_80 -fgpu-rdc -c clang++ openmp.o cuda.o --offload-link -o app + +Q: Are libomptarget and plugins backward compatible? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +No. libomptarget and plugins are now built as LLVM libraries starting from LLVM +15. Because LLVM libraries are not backward compatible, libomptarget and plugins +are not as well. Given that fact, the interfaces between 1) the Clang compiler +and libomptarget, 2) the Clang compiler and device runtime library, and +3) libomptarget and plugins are not guaranteed to be compatible with an earlier +version. Users are responsible for ensuring compatibility when not using the +Clang compiler and runtime libraries from the same build. Nevertheless, in order +to better support third-party libraries and toolchains that depend on existing +libomptarget entry points, contributors are discouraged from making +modifications to them. diff --git a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp index 7cef92d304b66..512577c06f9eb 100644 --- a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp @@ -79,7 +79,9 @@ uint32_t getKernelSize() { return __builtin_amdgcn_grid_size_x(); } uint32_t getBlockId() { return __builtin_amdgcn_workgroup_id_x(); } -uint32_t getNumberOfBlocks() { return __builtin_amdgcn_grid_size_x(); } +uint32_t getNumberOfBlocks() { + return __builtin_amdgcn_grid_size_x() / __builtin_amdgcn_workgroup_size_x(); +} uint32_t getWarpId() { return impl::getThreadIdInBlock() / mapping::getWarpSize(); diff --git a/openmp/libomptarget/src/api.cpp b/openmp/libomptarget/src/api.cpp index d65ca0b02864d..f408449f01134 100644 --- a/openmp/libomptarget/src/api.cpp +++ b/openmp/libomptarget/src/api.cpp @@ -190,7 +190,7 @@ EXTERN int omp_target_memcpy(void *Dst, const void *Src, size_t Length, Rc = SrcDev.retrieveData(Buffer, SrcAddr, Length, AsyncInfo); } if (Rc == OFFLOAD_SUCCESS) { - AsyncInfoTy AsyncInfo(SrcDev); + AsyncInfoTy AsyncInfo(DstDev); Rc = DstDev.submitData(DstAddr, Buffer, Length, AsyncInfo); } free(Buffer); diff --git a/openmp/runtime/cmake/LibompHandleFlags.cmake b/openmp/runtime/cmake/LibompHandleFlags.cmake index aee9038520b8a..33847b5545fbc 100644 --- a/openmp/runtime/cmake/LibompHandleFlags.cmake +++ b/openmp/runtime/cmake/LibompHandleFlags.cmake @@ -100,6 +100,7 @@ function(libomp_get_ldflags ldflags) libomp_append(ldflags_local -Wl,--warn-shared-textrel LIBOMP_HAVE_WARN_SHARED_TEXTREL_FLAG) libomp_append(ldflags_local -Wl,--as-needed LIBOMP_HAVE_AS_NEEDED_FLAG) libomp_append(ldflags_local "-Wl,--version-script=${LIBOMP_SRC_DIR}/exports_so.txt" LIBOMP_HAVE_VERSION_SCRIPT_FLAG) + libomp_append(ldflags_local "-Wl,--undefined-version" LIBOMP_HAVE_UNDEFINED_VERSION_FLAG) # FIXME issue #58858 libomp_append(ldflags_local -static-libgcc LIBOMP_HAVE_STATIC_LIBGCC_FLAG) libomp_append(ldflags_local -Wl,-z,noexecstack LIBOMP_HAVE_Z_NOEXECSTACK_FLAG) libomp_append(ldflags_local -no-intel-extensions LIBOMP_HAVE_NO_INTEL_EXTENSIONS_FLAG) diff --git a/openmp/runtime/cmake/config-ix.cmake b/openmp/runtime/cmake/config-ix.cmake index a57cbf9d2ef59..aa79c2a605f95 100644 --- a/openmp/runtime/cmake/config-ix.cmake +++ b/openmp/runtime/cmake/config-ix.cmake @@ -27,7 +27,7 @@ function(libomp_check_version_symbols retval) void func2() { printf(\"World\"); } __asm__(\".symver func1, func@VER1\"); __asm__(\".symver func2, func@VER2\"); - int main() { + int main(void) { func1(); func2(); return 0; @@ -133,6 +133,7 @@ elseif(NOT APPLE) libomp_check_linker_flag(-Wl,--warn-shared-textrel LIBOMP_HAVE_WARN_SHARED_TEXTREL_FLAG) libomp_check_linker_flag(-Wl,--as-needed LIBOMP_HAVE_AS_NEEDED_FLAG) libomp_check_linker_flag("-Wl,--version-script=${LIBOMP_SRC_DIR}/exports_so.txt" LIBOMP_HAVE_VERSION_SCRIPT_FLAG) + libomp_check_linker_flag("-Wl,--undefined-version" LIBOMP_HAVE_UNDEFINED_VERSION_FLAG) # FIXME issue #58858 libomp_check_linker_flag(-static-libgcc LIBOMP_HAVE_STATIC_LIBGCC_FLAG) libomp_check_linker_flag(-Wl,-z,noexecstack LIBOMP_HAVE_Z_NOEXECSTACK_FLAG) endif() diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index 6f7da88f66ac1..8a2bcedb4c9aa 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -3655,6 +3655,8 @@ static inline void __kmp_assign_root_init_mask() { } } static inline void __kmp_reset_root_init_mask(int gtid) { + if (!KMP_AFFINITY_CAPABLE()) + return; kmp_info_t *th = __kmp_threads[gtid]; kmp_root_t *r = th->th.th_root; if (r->r.r_uber_thread == th && r->r.r_affinity_assigned) { diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp index e9d0b99f6417e..43bf79403f44a 100644 --- a/openmp/runtime/src/kmp_affinity.cpp +++ b/openmp/runtime/src/kmp_affinity.cpp @@ -675,7 +675,11 @@ void kmp_topology_t::print(const char *env_var) const { kmp_hw_t print_types[KMP_HW_LAST + 2]; // Num Available Threads - KMP_INFORM(AvailableOSProc, env_var, num_hw_threads); + if (num_hw_threads) { + KMP_INFORM(AvailableOSProc, env_var, num_hw_threads); + } else { + KMP_INFORM(AvailableOSProc, env_var, __kmp_xproc); + } // Uniform or not if (is_uniform()) { @@ -3062,7 +3066,8 @@ static bool __kmp_affinity_create_cpuinfo_map(int *line, } // Skip this proc if it is not included in the machine model. - if (!KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], + if (KMP_AFFINITY_CAPABLE() && + !KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], __kmp_affin_fullMask)) { INIT_PROC_INFO(threadInfo[num_avail]); continue; @@ -4525,6 +4530,9 @@ void __kmp_affinity_uninitialize(void) { *affinity = KMP_AFFINITY_INIT(affinity->env_var); } if (__kmp_affin_origMask != NULL) { + if (KMP_AFFINITY_CAPABLE()) { + __kmp_set_system_affinity(__kmp_affin_origMask, FALSE); + } KMP_CPU_FREE(__kmp_affin_origMask); __kmp_affin_origMask = NULL; } diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp index 080f4015b6e06..88eb150fc0c2a 100644 --- a/openmp/runtime/src/kmp_settings.cpp +++ b/openmp/runtime/src/kmp_settings.cpp @@ -6296,6 +6296,15 @@ void __kmp_env_initialize(char const *string) { __kmp_affinity_top_method = affinity_top_method_all; } } + } else { + // If affinity is disabled, then still need to assign topology method + // to attempt machine detection and affinity types + if (__kmp_affinity_top_method == affinity_top_method_default) + __kmp_affinity_top_method = affinity_top_method_all; + if (__kmp_affinity.type == affinity_default) + __kmp_affinity.type = affinity_disabled; + if (__kmp_hh_affinity.type == affinity_default) + __kmp_hh_affinity.type = affinity_disabled; } #ifdef KMP_DEBUG diff --git a/openmp/runtime/test/affinity/disabled.c b/openmp/runtime/test/affinity/disabled.c new file mode 100644 index 0000000000000..18261010a096a --- /dev/null +++ b/openmp/runtime/test/affinity/disabled.c @@ -0,0 +1,25 @@ +// RUN: %libomp-compile +// RUN: env KMP_AFFINITY=disabled %libomp-run +// RUN: env KMP_AFFINITY=disabled,reset %libomp-run +// REQUIRES: affinity +#include +#include +#include + +int main() { + int nthreads, correct_value;; + int a = 0; + #pragma omp parallel reduction(+: a) + { + a += omp_get_thread_num(); + #pragma omp single + nthreads = omp_get_num_threads(); + } + correct_value = nthreads * (nthreads - 1) / 2; + if (a != correct_value) { + printf("Incorrect value: %d should be %d\n", a, correct_value); + return EXIT_FAILURE; + } + return EXIT_SUCCESS; +} + diff --git a/polly/lib/External/CMakeLists.txt b/polly/lib/External/CMakeLists.txt index 2f912e7daeb21..c0a5b32e283f2 100644 --- a/polly/lib/External/CMakeLists.txt +++ b/polly/lib/External/CMakeLists.txt @@ -64,7 +64,7 @@ if (POLLY_BUNDLED_ISL) check_c_source_compiles(" ${_includes} ${_type} typeVar; - int main() { + int main(void) { return 0; } " ${_variable}) @@ -73,7 +73,7 @@ if (POLLY_BUNDLED_ISL) check_c_source_compiles(" int func(void) __attribute__((__warn_unused_result__)); - int main() { return 0; } + int main(void) { return 0; } " HAS_ATTRIBUTE_WARN_UNUSED_RESULT) set(GCC_WARN_UNUSED_RESULT) if (HAS_ATTRIBUTE_WARN_UNUSED_RESULT) @@ -82,22 +82,22 @@ if (POLLY_BUNDLED_ISL) check_c_source_compiles(" __attribute__ ((unused)) static void foo(void); - int main() { return 0; } + int main(void) { return 0; } " HAVE___ATTRIBUTE__) check_c_source_compiles_numeric(" #include - int main() { (void)ffs(0); return 0; } + int main(void) { (void)ffs(0); return 0; } " HAVE_DECL_FFS) check_c_source_compiles_numeric(" - int main() { (void)__builtin_ffs(0); return 0; } + int main(void) { (void)__builtin_ffs(0); return 0; } " HAVE_DECL___BUILTIN_FFS) check_c_source_compiles_numeric(" #include - int main() { (void)_BitScanForward(NULL, 0); return 0; } + int main(void) { (void)_BitScanForward(NULL, 0); return 0; } " HAVE_DECL__BITSCANFORWARD) if (NOT HAVE_DECL_FFS AND @@ -109,12 +109,12 @@ if (POLLY_BUNDLED_ISL) check_c_source_compiles_numeric(" #include - int main() { (void)strcasecmp(\"\", \"\"); return 0; } + int main(void) { (void)strcasecmp(\"\", \"\"); return 0; } " HAVE_DECL_STRCASECMP) check_c_source_compiles_numeric(" #include - int main() { (void)_stricmp(\"\", \"\"); return 0; } + int main(void) { (void)_stricmp(\"\", \"\"); return 0; } " HAVE_DECL__STRICMP) if (NOT HAVE_DECL_STRCASECMP AND NOT HAVE_DECL__STRICMP) @@ -124,12 +124,12 @@ if (POLLY_BUNDLED_ISL) check_c_source_compiles_numeric(" #include - int main() { (void)strncasecmp(\"\", \"\", 0); return 0; } + int main(void) { (void)strncasecmp(\"\", \"\", 0); return 0; } " HAVE_DECL_STRNCASECMP) check_c_source_compiles_numeric(" #include - int main() { (void)_strnicmp(\"\", \"\", 0); return 0; } + int main(void) { (void)_strnicmp(\"\", \"\", 0); return 0; } " HAVE_DECL__STRNICMP) if (NOT HAVE_DECL_STRNCASECMP AND NOT HAVE_DECL__STRNICMP) @@ -139,12 +139,12 @@ if (POLLY_BUNDLED_ISL) check_c_source_compiles_numeric(" #include - int main() { snprintf((void*)0, 0, \" \"); return 0; } + int main(void) { snprintf((void*)0, 0, \" \"); return 0; } " HAVE_DECL_SNPRINTF) check_c_source_compiles_numeric(" #include - int main() { _snprintf((void*)0, 0, \" \"); return 0; } + int main(void) { _snprintf((void*)0, 0, \" \"); return 0; } " HAVE_DECL__SNPRINTF) if (NOT HAVE_DECL_SNPRINTF AND NOT HAVE_DECL__SNPRINTF) diff --git a/utils/bazel/llvm-project-overlay/bolt/BUILD.bazel b/utils/bazel/llvm-project-overlay/bolt/BUILD.bazel index d2ffeedec1967..bc2dd02e653e8 100644 --- a/utils/bazel/llvm-project-overlay/bolt/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/bolt/BUILD.bazel @@ -2,6 +2,8 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +load("@bazel_skylib//rules:expand_template.bzl", "expand_template") + package( default_visibility = ["//visibility:public"], ) @@ -91,14 +93,24 @@ cc_library( ], ) +expand_template( + name = "RuntimeLibraryVariables_inc", + out = "include/bolt/RuntimeLibs/RuntimeLibraryVariables.inc", + substitutions = { + # FIXME this is a total guess + "@LLVM_LIBDIR_SUFFIX@": "lib", + }, + template = "include/bolt/RuntimeLibs/RuntimeLibraryVariables.inc.in", +) + cc_library( name = "RuntimeLibs", srcs = glob([ "lib/RuntimeLibs/*.cpp", ]), - hdrs = glob([ + textual_hdrs = glob([ "include/bolt/RuntimeLibs/*.h", - ]), + ]) + ["include/bolt/RuntimeLibs/RuntimeLibraryVariables.inc"], includes = ["include"], deps = [ ":Core", diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 2a81792db347c..abcb4cd136520 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -183,6 +183,7 @@ cc_library( name = "__support_builtin_wrappers", hdrs = ["src/__support/builtin_wrappers.h"], deps = [ + ":__support_cpp_type_traits", ":libc_root", ], ) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 010fb851956d3..d5f2aabf2dcaa 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -1349,7 +1349,9 @@ td_library( ], includes = ["include"], deps = [ + ":CallInterfacesTdFiles", ":ControlFlowInterfacesTdFiles", + ":FunctionInterfacesTdFiles", ":InferTypeOpInterfaceTdFiles", ":OpBaseTdFiles", ":SideEffectInterfacesTdFiles", @@ -3998,11 +4000,10 @@ cc_library( ":GPUDialect", ":IR", ":LLVMCommonConversion", + ":LLVMDialect", ":NVGPUDialect", ":NVVMDialect", ":Pass", - ":Transforms", - "//llvm:Support", ], ) @@ -5921,6 +5922,17 @@ cc_library( ], ) +cc_library( + name = "ArithAttrToLLVMConversion", + srcs = glob(["lib/Conversion/ArithCommon/*.cpp"]), + hdrs = glob(["include/mlir/Conversion/ArithCommon/*.h"]), + includes = ["include"], + deps = [ + ":ArithDialect", + ":LLVMDialect", + ], +) + cc_library( name = "ArithToLLVM", srcs = glob(["lib/Conversion/ArithToLLVM/*.cpp"]), @@ -5928,6 +5940,7 @@ cc_library( includes = ["include"], deps = [ ":Analysis", + ":ArithAttrToLLVMConversion", ":ArithDialect", ":ConversionPassIncGen", ":IR", @@ -5966,6 +5979,7 @@ cc_library( includes = ["include"], deps = [ ":Analysis", + ":ArithAttrToLLVMConversion", ":ConversionPassIncGen", ":DataLayoutInterfaces", ":IR", @@ -9275,6 +9289,7 @@ td_library( ], includes = ["include"], deps = [ + ":ArithOpsTdFiles", ":InferTypeOpInterfaceTdFiles", ":OpBaseTdFiles", ":SideEffectInterfacesTdFiles", diff --git a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel index 2c819c3181e98..0cc19721567b0 100644 --- a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel @@ -67,6 +67,7 @@ cc_library( srcs = glob( [ "lib/Analysis/*.cpp", + "lib/Analysis/*.h", "lib/Analysis/DataFlow/*.cpp", ], ), diff --git a/utils/bazel/llvm-project-overlay/mlir/test/Dialect/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/Dialect/BUILD.bazel index bbf8a100475a5..6e997e9f47e77 100644 --- a/utils/bazel/llvm-project-overlay/mlir/test/Dialect/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/test/Dialect/BUILD.bazel @@ -11,6 +11,7 @@ package(default_visibility = ["//visibility:public"]) data = [ "//llvm:llvm-symbolizer", "//mlir:mlir-opt", + "//mlir:mlir-translate", "//mlir/test:lit_data", ], )